1608 files changed, 65767 insertions, 12109 deletions
diff --git a/contrib/llvm/include/llvm-c/Core.h b/contrib/llvm/include/llvm-c/Core.h
index 73bff0b..713894f 100644
--- a/contrib/llvm/include/llvm-c/Core.h
+++ b/contrib/llvm/include/llvm-c/Core.h
@@ -161,15 +161,15 @@ typedef enum {
     /* FIXME: These attributes are currently not included in the C API as
        a temporary measure until the API/ABI impact to the C API is understood
        and the path forward agreed upon.
-    LLVMAddressSafety = 1ULL << 32,
-    LLVMStackProtectStrongAttribute = 1ULL<<33,
-    LLVMCold = 1ULL << 34,
-    LLVMOptimizeNone = 1ULL << 35,
-    LLVMInAllocaAttribute = 1ULL << 36,
-    LLVMNonNullAttribute = 1ULL << 37,
-    LLVMJumpTableAttribute = 1ULL << 38,
-    LLVMDereferenceableAttribute = 1ULL << 39,
-    LLVMDereferenceableOrNullAttribute = 1ULL << 40,
+    LLVMSanitizeAddressAttribute = 1ULL << 32,
+    LLVMStackProtectStrongAttribute = 1ULL<<35,
+    LLVMColdAttribute = 1ULL << 40,
+    LLVMOptimizeNoneAttribute = 1ULL << 42,
+    LLVMInAllocaAttribute = 1ULL << 43,
+    LLVMNonNullAttribute = 1ULL << 44,
+    LLVMJumpTableAttribute = 1ULL << 45,
+    LLVMConvergentAttribute = 1ULL << 46,
+    LLVMSafeStackAttribute = 1ULL << 47,
     */
 } LLVMAttribute;
 
@@ -2661,8 +2661,7 @@ LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef, LLVMValueRef Fn,
                              LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch,
                              const char *Name);
 LLVMValueRef LLVMBuildLandingPad(LLVMBuilderRef B, LLVMTypeRef Ty,
-                                 LLVMValueRef PersFn, unsigned NumClauses,
-                                 const char *Name);
+                                 unsigned NumClauses, const char *Name);
 LLVMValueRef LLVMBuildResume(LLVMBuilderRef B, LLVMValueRef Exn);
 LLVMValueRef LLVMBuildUnreachable(LLVMBuilderRef);
 
diff --git a/contrib/llvm/include/llvm-c/lto.h b/contrib/llvm/include/llvm-c/lto.h
index 1db0778..9f37dd7 100644
--- a/contrib/llvm/include/llvm-c/lto.h
+++ b/contrib/llvm/include/llvm-c/lto.h
@@ -62,7 +62,8 @@ typedef enum {
     LTO_SYMBOL_SCOPE_HIDDEN                = 0x00001000,
     LTO_SYMBOL_SCOPE_PROTECTED             = 0x00002000,
     LTO_SYMBOL_SCOPE_DEFAULT               = 0x00001800,
-    LTO_SYMBOL_SCOPE_DEFAULT_CAN_BE_HIDDEN = 0x00002800
+    LTO_SYMBOL_SCOPE_DEFAULT_CAN_BE_HIDDEN = 0x00002800,
+    LTO_SYMBOL_COMDAT                      = 0x00004000
 } lto_symbol_attributes;
 
 /**
diff --git a/contrib/llvm/include/llvm/ADT/APInt.h b/contrib/llvm/include/llvm/ADT/APInt.h
index e5d143d..a790203 100644
--- a/contrib/llvm/include/llvm/ADT/APInt.h
+++ b/contrib/llvm/include/llvm/ADT/APInt.h
@@ -129,7 +129,7 @@ class APInt {
 
   /// \brief Clear unused high order bits
   ///
-  /// This method is used internally to clear the to "N" bits in the high order
+  /// This method is used internally to clear the top "N" bits in the high order
   /// word that are not used by the APInt. This is needed after the most
   /// significant word is assigned a value to ensure that those bits are
   /// zero'd out.
@@ -795,7 +795,7 @@ public:
 
   /// \brief Bitwise OR function.
   ///
-  /// Performs a bitwise or on *this and RHS. This is implemented bny simply
+  /// Performs a bitwise or on *this and RHS. This is implemented by simply
   /// calling operator|.
   ///
   /// \returns An APInt value representing the bitwise OR of *this and RHS.
@@ -1896,11 +1896,11 @@ inline APInt Xor(const APInt &LHS, const APInt &RHS) { return LHS ^ RHS; }
 /// Performs a bitwise complement operation on APInt.
 inline APInt Not(const APInt &APIVal) { return ~APIVal; }
 
-} // End of APIntOps namespace
+} // namespace APIntOps
 
 // See friend declaration above. This additional declaration is required in
 // order to compile LLVM with IBM xlC compiler.
 hash_code hash_value(const APInt &Arg);
-} // End of llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/ADT/ArrayRef.h b/contrib/llvm/include/llvm/ADT/ArrayRef.h
index 1b2bdff..397e2ee 100644
--- a/contrib/llvm/include/llvm/ADT/ArrayRef.h
+++ b/contrib/llvm/include/llvm/ADT/ArrayRef.h
@@ -361,6 +361,6 @@ namespace llvm {
   template <typename T> struct isPodLike<ArrayRef<T> > {
     static const bool value = true;
   };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/ADT/BitVector.h b/contrib/llvm/include/llvm/ADT/BitVector.h
index f58dd73..e57171d 100644
--- a/contrib/llvm/include/llvm/ADT/BitVector.h
+++ b/contrib/llvm/include/llvm/ADT/BitVector.h
@@ -568,7 +568,7 @@ private:
   }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 namespace std {
   /// Implement std::swap in terms of BitVector swap.
diff --git a/contrib/llvm/include/llvm/ADT/DenseMap.h b/contrib/llvm/include/llvm/ADT/DenseMap.h
index 27f7315..bf58bec 100644
--- a/contrib/llvm/include/llvm/ADT/DenseMap.h
+++ b/contrib/llvm/include/llvm/ADT/DenseMap.h
@@ -42,7 +42,7 @@ struct DenseMapPair : public std::pair<KeyT, ValueT> {
   ValueT &getSecond() { return std::pair<KeyT, ValueT>::second; }
   const ValueT &getSecond() const { return std::pair<KeyT, ValueT>::second; }
 };
-}
+} // namespace detail
 
 template <
     typename KeyT, typename ValueT, typename KeyInfoT = DenseMapInfo<KeyT>,
diff --git a/contrib/llvm/include/llvm/ADT/DenseSet.h b/contrib/llvm/include/llvm/ADT/DenseSet.h
index d340240..b1631be 100644
--- a/contrib/llvm/include/llvm/ADT/DenseSet.h
+++ b/contrib/llvm/include/llvm/ADT/DenseSet.h
@@ -32,7 +32,7 @@ public:
   DenseSetEmpty &getSecond() { return *this; }
   const DenseSetEmpty &getSecond() const { return *this; }
 };
-}
+} // namespace detail
 
 /// DenseSet - This implements a dense probed hash-table based set.
 template<typename ValueT, typename ValueInfoT = DenseMapInfo<ValueT> >
diff --git a/contrib/llvm/include/llvm/ADT/DepthFirstIterator.h b/contrib/llvm/include/llvm/ADT/DepthFirstIterator.h
index d79b9ac..01bbe1a 100644
--- a/contrib/llvm/include/llvm/ADT/DepthFirstIterator.h
+++ b/contrib/llvm/include/llvm/ADT/DepthFirstIterator.h
@@ -288,6 +288,6 @@ iterator_range<idf_ext_iterator<T, SetTy>> inverse_depth_first_ext(const T& G,
   return make_range(idf_ext_begin(G, S), idf_ext_end(G, S));
 }
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/ADT/EquivalenceClasses.h b/contrib/llvm/include/llvm/ADT/EquivalenceClasses.h
index d6a26f8..6e87dbd 100644
--- a/contrib/llvm/include/llvm/ADT/EquivalenceClasses.h
+++ b/contrib/llvm/include/llvm/ADT/EquivalenceClasses.h
@@ -278,6 +278,6 @@ public:
   };
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/ADT/GraphTraits.h b/contrib/llvm/include/llvm/ADT/GraphTraits.h
index 823caef..21bf23b 100644
--- a/contrib/llvm/include/llvm/ADT/GraphTraits.h
+++ b/contrib/llvm/include/llvm/ADT/GraphTraits.h
@@ -101,6 +101,6 @@ struct GraphTraits<Inverse<Inverse<T> > > {
   }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/ADT/Hashing.h b/contrib/llvm/include/llvm/ADT/Hashing.h
index 77e6d77..de56f91 100644
--- a/contrib/llvm/include/llvm/ADT/Hashing.h
+++ b/contrib/llvm/include/llvm/ADT/Hashing.h
@@ -53,6 +53,7 @@
 #include <cassert>
 #include <cstring>
 #include <iterator>
+#include <string>
 #include <utility>
 
 namespace llvm {
diff --git a/contrib/llvm/include/llvm/ADT/IndexedMap.h b/contrib/llvm/include/llvm/ADT/IndexedMap.h
index 5ba85c0..ae9c695 100644
--- a/contrib/llvm/include/llvm/ADT/IndexedMap.h
+++ b/contrib/llvm/include/llvm/ADT/IndexedMap.h
@@ -80,6 +80,6 @@ template <typename T, typename ToIndexT = llvm::identity<unsigned> >
     }
   };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/ADT/IntEqClasses.h b/contrib/llvm/include/llvm/ADT/IntEqClasses.h
index 8e75c48..9dbc228 100644
--- a/contrib/llvm/include/llvm/ADT/IntEqClasses.h
+++ b/contrib/llvm/include/llvm/ADT/IntEqClasses.h
@@ -83,6 +83,6 @@ public:
   void uncompress();
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/ADT/Optional.h b/contrib/llvm/include/llvm/ADT/Optional.h
index 855ab89..dd48497 100644
--- a/contrib/llvm/include/llvm/ADT/Optional.h
+++ b/contrib/llvm/include/llvm/ADT/Optional.h
@@ -204,6 +204,6 @@ void operator>=(const Optional<T> &X, const Optional<U> &Y);
 template<typename T, typename U>
 void operator>(const Optional<T> &X, const Optional<U> &Y);
 
-} // end llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/ADT/PointerUnion.h b/contrib/llvm/include/llvm/ADT/PointerUnion.h
index f27b811..3c63a52 100644
--- a/contrib/llvm/include/llvm/ADT/PointerUnion.h
+++ b/contrib/llvm/include/llvm/ADT/PointerUnion.h
@@ -507,6 +507,6 @@ namespace llvm {
                                   RHS.template get<U>()));
     }
   };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/ADT/PostOrderIterator.h b/contrib/llvm/include/llvm/ADT/PostOrderIterator.h
index 759a2db2..059d7b0 100644
--- a/contrib/llvm/include/llvm/ADT/PostOrderIterator.h
+++ b/contrib/llvm/include/llvm/ADT/PostOrderIterator.h
@@ -295,6 +295,6 @@ public:
   rpo_iterator end() { return Blocks.rend(); }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/ADT/PriorityQueue.h b/contrib/llvm/include/llvm/ADT/PriorityQueue.h
index 827d0b3..869ef81 100644
--- a/contrib/llvm/include/llvm/ADT/PriorityQueue.h
+++ b/contrib/llvm/include/llvm/ADT/PriorityQueue.h
@@ -79,6 +79,6 @@ public:
   }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/ADT/SCCIterator.h b/contrib/llvm/include/llvm/ADT/SCCIterator.h
index bc74416..dc78274 100644
--- a/contrib/llvm/include/llvm/ADT/SCCIterator.h
+++ b/contrib/llvm/include/llvm/ADT/SCCIterator.h
@@ -240,6 +240,6 @@ template <class T> scc_iterator<Inverse<T> > scc_end(const Inverse<T> &G) {
   return scc_iterator<Inverse<T> >::end(G);
 }
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/ADT/STLExtras.h b/contrib/llvm/include/llvm/ADT/STLExtras.h
index b68345a..14204c1 100644
--- a/contrib/llvm/include/llvm/ADT/STLExtras.h
+++ b/contrib/llvm/include/llvm/ADT/STLExtras.h
@@ -417,6 +417,6 @@ template <typename T> struct deref {
   }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/ADT/SetOperations.h b/contrib/llvm/include/llvm/ADT/SetOperations.h
index 71f5db3..b5f4177 100644
--- a/contrib/llvm/include/llvm/ADT/SetOperations.h
+++ b/contrib/llvm/include/llvm/ADT/SetOperations.h
@@ -66,6 +66,6 @@ void set_subtract(S1Ty &S1, const S2Ty &S2) {
     S1.erase(*SI);
 }
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/ADT/SetVector.h b/contrib/llvm/include/llvm/ADT/SetVector.h
index a7fd408..f15f4f7 100644
--- a/contrib/llvm/include/llvm/ADT/SetVector.h
+++ b/contrib/llvm/include/llvm/ADT/SetVector.h
@@ -225,7 +225,7 @@ public:
   }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 // vim: sw=2 ai
 #endif
diff --git a/contrib/llvm/include/llvm/ADT/SmallBitVector.h b/contrib/llvm/include/llvm/ADT/SmallBitVector.h
index ae3d645..a74b7bf 100644
--- a/contrib/llvm/include/llvm/ADT/SmallBitVector.h
+++ b/contrib/llvm/include/llvm/ADT/SmallBitVector.h
@@ -588,7 +588,7 @@ operator^(const SmallBitVector &LHS, const SmallBitVector &RHS) {
   return Result;
 }
 
-} // End llvm namespace
+} // namespace llvm
 
 namespace std {
   /// Implement std::swap in terms of BitVector swap.
diff --git a/contrib/llvm/include/llvm/ADT/SmallPtrSet.h b/contrib/llvm/include/llvm/ADT/SmallPtrSet.h
index 3e3c9c1..0d1635a 100644
--- a/contrib/llvm/include/llvm/ADT/SmallPtrSet.h
+++ b/contrib/llvm/include/llvm/ADT/SmallPtrSet.h
@@ -334,7 +334,7 @@ public:
   }
 };
 
-}
+} // namespace llvm
 
 namespace std {
   /// Implement std::swap in terms of SmallPtrSet swap.
diff --git a/contrib/llvm/include/llvm/ADT/SmallString.h b/contrib/llvm/include/llvm/ADT/SmallString.h
index e569f54..92cd689 100644
--- a/contrib/llvm/include/llvm/ADT/SmallString.h
+++ b/contrib/llvm/include/llvm/ADT/SmallString.h
@@ -292,6 +292,6 @@ public:
   }
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/ADT/SmallVector.h b/contrib/llvm/include/llvm/ADT/SmallVector.h
index 5b208b7..b334ac0 100644
--- a/contrib/llvm/include/llvm/ADT/SmallVector.h
+++ b/contrib/llvm/include/llvm/ADT/SmallVector.h
@@ -924,7 +924,7 @@ static inline size_t capacity_in_bytes(const SmallVector<T, N> &X) {
   return X.capacity_in_bytes();
 }
 
-} // End llvm namespace
+} // namespace llvm
 
 namespace std {
   /// Implement std::swap in terms of SmallVector swap.
@@ -940,6 +940,6 @@ namespace std {
   swap(llvm::SmallVector<T, N> &LHS, llvm::SmallVector<T, N> &RHS) {
     LHS.swap(RHS);
   }
-}
+} // namespace std
 
 #endif
diff --git a/contrib/llvm/include/llvm/ADT/Statistic.h b/contrib/llvm/include/llvm/ADT/Statistic.h
index d98abc3..264c6b5 100644
--- a/contrib/llvm/include/llvm/ADT/Statistic.h
+++ b/contrib/llvm/include/llvm/ADT/Statistic.h
@@ -176,6 +176,6 @@ void PrintStatistics();
 /// \brief Print statistics to the given output stream.
 void PrintStatistics(raw_ostream &OS);
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/ADT/StringExtras.h b/contrib/llvm/include/llvm/ADT/StringExtras.h
index 0992f5d..5e8c072 100644
--- a/contrib/llvm/include/llvm/ADT/StringExtras.h
+++ b/contrib/llvm/include/llvm/ADT/StringExtras.h
@@ -207,6 +207,6 @@ inline std::string join(IteratorT Begin, IteratorT End, StringRef Separator) {
   return join_impl(Begin, End, Separator, tag());
 }
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/ADT/StringMap.h b/contrib/llvm/include/llvm/ADT/StringMap.h
index 8721c73..c8ece8f 100644
--- a/contrib/llvm/include/llvm/ADT/StringMap.h
+++ b/contrib/llvm/include/llvm/ADT/StringMap.h
@@ -447,6 +447,6 @@ public:
   }
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/ADT/StringRef.h b/contrib/llvm/include/llvm/ADT/StringRef.h
index 95660a4..163ec63 100644
--- a/contrib/llvm/include/llvm/ADT/StringRef.h
+++ b/contrib/llvm/include/llvm/ADT/StringRef.h
@@ -566,6 +566,6 @@ namespace llvm {
   // StringRefs can be treated like a POD type.
   template <typename T> struct isPodLike;
   template <> struct isPodLike<StringRef> { static const bool value = true; };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/ADT/StringSet.h b/contrib/llvm/include/llvm/ADT/StringSet.h
index 3e0cc20..7c52476 100644
--- a/contrib/llvm/include/llvm/ADT/StringSet.h
+++ b/contrib/llvm/include/llvm/ADT/StringSet.h
@@ -29,6 +29,6 @@ namespace llvm {
       return base::insert(std::make_pair(Key, '\0'));
     }
   };
-}
+} // namespace llvm
 
 #endif // LLVM_ADT_STRINGSET_H
diff --git a/contrib/llvm/include/llvm/ADT/Triple.h b/contrib/llvm/include/llvm/ADT/Triple.h
index 1362fe3..cb6edc8 100644
--- a/contrib/llvm/include/llvm/ADT/Triple.h
+++ b/contrib/llvm/include/llvm/ADT/Triple.h
@@ -84,7 +84,8 @@ public:
     spir,       // SPIR: standard portable IR for OpenCL 32-bit version
     spir64,     // SPIR: standard portable IR for OpenCL 64-bit version
     kalimba,    // Kalimba: generic kalimba
-    LastArchType = kalimba
+    shave,      // SHAVE: Movidius vector VLIW processors
+    LastArchType = shave
   };
   enum SubArchType {
     NoSubArch,
@@ -608,7 +609,7 @@ public:
   /// @}
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 
 #endif
diff --git a/contrib/llvm/include/llvm/ADT/Twine.h b/contrib/llvm/include/llvm/ADT/Twine.h
index db0bf4b..db4a5be 100644
--- a/contrib/llvm/include/llvm/ADT/Twine.h
+++ b/contrib/llvm/include/llvm/ADT/Twine.h
@@ -537,6 +537,6 @@ namespace llvm {
   }
 
   /// @}
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/ADT/edit_distance.h b/contrib/llvm/include/llvm/ADT/edit_distance.h
index c2b2041..5fc4bee 100644
--- a/contrib/llvm/include/llvm/ADT/edit_distance.h
+++ b/contrib/llvm/include/llvm/ADT/edit_distance.h
@@ -97,6 +97,6 @@ unsigned ComputeEditDistance(ArrayRef<T> FromArray, ArrayRef<T> ToArray,
   return Result;
 }
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/ADT/ilist.h b/contrib/llvm/include/llvm/ADT/ilist.h
index a7b9306..4f10167 100644
--- a/contrib/llvm/include/llvm/ADT/ilist.h
+++ b/contrib/llvm/include/llvm/ADT/ilist.h
@@ -655,7 +655,7 @@ struct ilist : public iplist<NodeTy> {
   void resize(size_type newsize) { resize(newsize, NodeTy()); }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 namespace std {
   // Ensure that swap uses the fast list swap...
diff --git a/contrib/llvm/include/llvm/ADT/ilist_node.h b/contrib/llvm/include/llvm/ADT/ilist_node.h
index 26d0b55..14ca26b 100644
--- a/contrib/llvm/include/llvm/ADT/ilist_node.h
+++ b/contrib/llvm/include/llvm/ADT/ilist_node.h
@@ -101,6 +101,6 @@ public:
   /// @}
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/ADT/iterator.h b/contrib/llvm/include/llvm/ADT/iterator.h
index 54a288d..28728ca 100644
--- a/contrib/llvm/include/llvm/ADT/iterator.h
+++ b/contrib/llvm/include/llvm/ADT/iterator.h
@@ -239,6 +239,6 @@ struct pointee_iterator
   T &operator*() const { return **this->I; }
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/ADT/iterator_range.h b/contrib/llvm/include/llvm/ADT/iterator_range.h
index 523a86f..009b716 100644
--- a/contrib/llvm/include/llvm/ADT/iterator_range.h
+++ b/contrib/llvm/include/llvm/ADT/iterator_range.h
@@ -51,6 +51,6 @@ template <class T> iterator_range<T> make_range(T x, T y) {
 template <typename T> iterator_range<T> make_range(std::pair<T, T> p) {
   return iterator_range<T>(std::move(p.first), std::move(p.second));
 }
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/AliasAnalysis.h b/contrib/llvm/include/llvm/Analysis/AliasAnalysis.h
index de18e58..7f037fb 100644
--- a/contrib/llvm/include/llvm/Analysis/AliasAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/AliasAnalysis.h
@@ -16,11 +16,12 @@
 // which automatically provides functionality for the entire suite of client
 // APIs.
 //
-// This API identifies memory regions with the Location class. The pointer
+// This API identifies memory regions with the MemoryLocation class. The pointer
 // component specifies the base memory address of the region. The Size specifies
-// the maximum size (in address units) of the memory region, or UnknownSize if
-// the size is not known. The TBAA tag identifies the "type" of the memory
-// reference; see the TypeBasedAliasAnalysis class for details.
+// the maximum size (in address units) of the memory region, or
+// MemoryLocation::UnknownSize if the size is not known. The TBAA tag
+// identifies the "type" of the memory reference; see the
+// TypeBasedAliasAnalysis class for details.
 //
 // Some non-obvious details include:
 //  - Pointers that point to two completely different objects in memory never
@@ -80,11 +81,6 @@ public:
   AliasAnalysis() : DL(nullptr), TLI(nullptr), AA(nullptr) {}
   virtual ~AliasAnalysis();  // We want to be subclassed
 
-  /// UnknownSize - This is a special value which can be used with the
-  /// size arguments in alias queries to indicate that the caller does not
-  /// know the sizes of the potential memory references.
-  static uint64_t const UnknownSize = MemoryLocation::UnknownSize;
-
   /// getTargetLibraryInfo - Return a pointer to the current TargetLibraryInfo
   /// object, or null if no TargetLibraryInfo object is available.
   ///
@@ -99,10 +95,6 @@ public:
   /// Alias Queries...
   ///
 
-  /// Legacy typedef for the AA location object. New code should use \c
-  /// MemoryLocation directly.
-  typedef MemoryLocation Location;
-
   /// Alias analysis result - Either we know for sure that it does not alias, we
   /// know for sure it must alias, or we don't know anything: The two pointers
   /// _might_ alias.  This enum is designed so you can do things like:
@@ -123,38 +115,40 @@ public:
   /// Returns an AliasResult indicating whether the two pointers are aliased to
   /// each other.  This is the interface that must be implemented by specific
   /// alias analysis implementations.
-  virtual AliasResult alias(const Location &LocA, const Location &LocB);
+  virtual AliasResult alias(const MemoryLocation &LocA,
+                            const MemoryLocation &LocB);
 
   /// alias - A convenience wrapper.
   AliasResult alias(const Value *V1, uint64_t V1Size,
                     const Value *V2, uint64_t V2Size) {
-    return alias(Location(V1, V1Size), Location(V2, V2Size));
+    return alias(MemoryLocation(V1, V1Size), MemoryLocation(V2, V2Size));
   }
 
   /// alias - A convenience wrapper.
   AliasResult alias(const Value *V1, const Value *V2) {
-    return alias(V1, UnknownSize, V2, UnknownSize);
+    return alias(V1, MemoryLocation::UnknownSize, V2,
+                 MemoryLocation::UnknownSize);
   }
 
   /// isNoAlias - A trivial helper function to check to see if the specified
   /// pointers are no-alias.
-  bool isNoAlias(const Location &LocA, const Location &LocB) {
+  bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
     return alias(LocA, LocB) == NoAlias;
   }
 
   /// isNoAlias - A convenience wrapper.
   bool isNoAlias(const Value *V1, uint64_t V1Size,
                  const Value *V2, uint64_t V2Size) {
-    return isNoAlias(Location(V1, V1Size), Location(V2, V2Size));
+    return isNoAlias(MemoryLocation(V1, V1Size), MemoryLocation(V2, V2Size));
   }
   
   /// isNoAlias - A convenience wrapper.
   bool isNoAlias(const Value *V1, const Value *V2) {
-    return isNoAlias(Location(V1), Location(V2));
+    return isNoAlias(MemoryLocation(V1), MemoryLocation(V2));
   }
   
   /// isMustAlias - A convenience wrapper.
-  bool isMustAlias(const Location &LocA, const Location &LocB) {
+  bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
     return alias(LocA, LocB) == MustAlias;
   }
 
@@ -167,12 +161,12 @@ public:
   /// known to be constant, return true. If OrLocal is true and the
   /// specified memory location is known to be "local" (derived from
   /// an alloca), return true. Otherwise return false.
-  virtual bool pointsToConstantMemory(const Location &Loc,
+  virtual bool pointsToConstantMemory(const MemoryLocation &Loc,
                                       bool OrLocal = false);
 
   /// pointsToConstantMemory - A convenient wrapper.
   bool pointsToConstantMemory(const Value *P, bool OrLocal = false) {
-    return pointsToConstantMemory(Location(P), OrLocal);
+    return pointsToConstantMemory(MemoryLocation(P), OrLocal);
   }
 
   //===--------------------------------------------------------------------===//
@@ -228,13 +222,12 @@ public:
     UnknownModRefBehavior = Anywhere | ModRef
   };
 
-  /// Get the location associated with a pointer argument of a callsite.
-  /// The mask bits are set to indicate the allowed aliasing ModRef kinds.
-  /// Note that these mask bits do not necessarily account for the overall
-  /// behavior of the function, but rather only provide additional
-  /// per-argument information.
-  virtual Location getArgLocation(ImmutableCallSite CS, unsigned ArgIdx,
-                                  ModRefResult &Mask);
+  /// Get the ModRef info associated with a pointer argument of a callsite. The
+  /// result's bits are set to indicate the allowed aliasing ModRef kinds. Note
+  /// that these bits do not necessarily account for the overall behavior of
+  /// the function, but rather only provide additional per-argument
+  /// information.
+  virtual ModRefResult getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx);
 
   /// getModRefBehavior - Return the behavior when calling the given call site.
   virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS);
@@ -324,14 +317,13 @@ public:
       return NoModRef;
     }
 
-    return getModRefInfo(I, Location());
+    return getModRefInfo(I, MemoryLocation());
   }
 
   /// getModRefInfo - Return information about whether or not an instruction may
   /// read or write the specified memory location.  An instruction
   /// that doesn't read or write memory may be trivially LICM'd for example.
-  ModRefResult getModRefInfo(const Instruction *I,
-                             const Location &Loc) {
+  ModRefResult getModRefInfo(const Instruction *I, const MemoryLocation &Loc) {
     switch (I->getOpcode()) {
     case Instruction::VAArg:  return getModRefInfo((const VAArgInst*)I, Loc);
     case Instruction::Load:   return getModRefInfo((const LoadInst*)I,  Loc);
@@ -350,65 +342,64 @@ public:
   /// getModRefInfo - A convenience wrapper.
   ModRefResult getModRefInfo(const Instruction *I,
                              const Value *P, uint64_t Size) {
-    return getModRefInfo(I, Location(P, Size));
+    return getModRefInfo(I, MemoryLocation(P, Size));
   }
 
   /// getModRefInfo (for call sites) - Return information about whether
   /// a particular call site modifies or reads the specified memory location.
   virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
-                                     const Location &Loc);
+                                     const MemoryLocation &Loc);
 
   /// getModRefInfo (for call sites) - A convenience wrapper.
   ModRefResult getModRefInfo(ImmutableCallSite CS,
                              const Value *P, uint64_t Size) {
-    return getModRefInfo(CS, Location(P, Size));
+    return getModRefInfo(CS, MemoryLocation(P, Size));
   }
 
   /// getModRefInfo (for calls) - Return information about whether
   /// a particular call modifies or reads the specified memory location.
-  ModRefResult getModRefInfo(const CallInst *C, const Location &Loc) {
+  ModRefResult getModRefInfo(const CallInst *C, const MemoryLocation &Loc) {
     return getModRefInfo(ImmutableCallSite(C), Loc);
   }
 
   /// getModRefInfo (for calls) - A convenience wrapper.
   ModRefResult getModRefInfo(const CallInst *C, const Value *P, uint64_t Size) {
-    return getModRefInfo(C, Location(P, Size));
+    return getModRefInfo(C, MemoryLocation(P, Size));
   }
 
   /// getModRefInfo (for invokes) - Return information about whether
   /// a particular invoke modifies or reads the specified memory location.
-  ModRefResult getModRefInfo(const InvokeInst *I,
-                             const Location &Loc) {
+  ModRefResult getModRefInfo(const InvokeInst *I, const MemoryLocation &Loc) {
     return getModRefInfo(ImmutableCallSite(I), Loc);
   }
 
   /// getModRefInfo (for invokes) - A convenience wrapper.
   ModRefResult getModRefInfo(const InvokeInst *I,
                              const Value *P, uint64_t Size) {
-    return getModRefInfo(I, Location(P, Size));
+    return getModRefInfo(I, MemoryLocation(P, Size));
   }
 
   /// getModRefInfo (for loads) - Return information about whether
   /// a particular load modifies or reads the specified memory location.
-  ModRefResult getModRefInfo(const LoadInst *L, const Location &Loc);
+  ModRefResult getModRefInfo(const LoadInst *L, const MemoryLocation &Loc);
 
   /// getModRefInfo (for loads) - A convenience wrapper.
   ModRefResult getModRefInfo(const LoadInst *L, const Value *P, uint64_t Size) {
-    return getModRefInfo(L, Location(P, Size));
+    return getModRefInfo(L, MemoryLocation(P, Size));
   }
 
   /// getModRefInfo (for stores) - Return information about whether
   /// a particular store modifies or reads the specified memory location.
-  ModRefResult getModRefInfo(const StoreInst *S, const Location &Loc);
+  ModRefResult getModRefInfo(const StoreInst *S, const MemoryLocation &Loc);
 
   /// getModRefInfo (for stores) - A convenience wrapper.
   ModRefResult getModRefInfo(const StoreInst *S, const Value *P, uint64_t Size){
-    return getModRefInfo(S, Location(P, Size));
+    return getModRefInfo(S, MemoryLocation(P, Size));
   }
 
   /// getModRefInfo (for fences) - Return information about whether
   /// a particular store modifies or reads the specified memory location.
-  ModRefResult getModRefInfo(const FenceInst *S, const Location &Loc) {
+  ModRefResult getModRefInfo(const FenceInst *S, const MemoryLocation &Loc) {
     // Conservatively correct.  (We could possibly be a bit smarter if
     // Loc is a alloca that doesn't escape.)
     return ModRef;
@@ -416,36 +407,38 @@ public:
 
   /// getModRefInfo (for fences) - A convenience wrapper.
   ModRefResult getModRefInfo(const FenceInst *S, const Value *P, uint64_t Size){
-    return getModRefInfo(S, Location(P, Size));
+    return getModRefInfo(S, MemoryLocation(P, Size));
   }
 
   /// getModRefInfo (for cmpxchges) - Return information about whether
   /// a particular cmpxchg modifies or reads the specified memory location.
-  ModRefResult getModRefInfo(const AtomicCmpXchgInst *CX, const Location &Loc);
+  ModRefResult getModRefInfo(const AtomicCmpXchgInst *CX,
+                             const MemoryLocation &Loc);
 
   /// getModRefInfo (for cmpxchges) - A convenience wrapper.
   ModRefResult getModRefInfo(const AtomicCmpXchgInst *CX,
                              const Value *P, unsigned Size) {
-    return getModRefInfo(CX, Location(P, Size));
+    return getModRefInfo(CX, MemoryLocation(P, Size));
   }
 
   /// getModRefInfo (for atomicrmws) - Return information about whether
   /// a particular atomicrmw modifies or reads the specified memory location.
-  ModRefResult getModRefInfo(const AtomicRMWInst *RMW, const Location &Loc);
+  ModRefResult getModRefInfo(const AtomicRMWInst *RMW,
+                             const MemoryLocation &Loc);
 
   /// getModRefInfo (for atomicrmws) - A convenience wrapper.
   ModRefResult getModRefInfo(const AtomicRMWInst *RMW,
                              const Value *P, unsigned Size) {
-    return getModRefInfo(RMW, Location(P, Size));
+    return getModRefInfo(RMW, MemoryLocation(P, Size));
   }
 
   /// getModRefInfo (for va_args) - Return information about whether
   /// a particular va_arg modifies or reads the specified memory location.
-  ModRefResult getModRefInfo(const VAArgInst* I, const Location &Loc);
+  ModRefResult getModRefInfo(const VAArgInst *I, const MemoryLocation &Loc);
 
   /// getModRefInfo (for va_args) - A convenience wrapper.
   ModRefResult getModRefInfo(const VAArgInst* I, const Value* P, uint64_t Size){
-    return getModRefInfo(I, Location(P, Size));
+    return getModRefInfo(I, MemoryLocation(P, Size));
   }
   /// getModRefInfo - Return information about whether a call and an instruction
   /// may refer to the same memory locations.
@@ -462,13 +455,13 @@ public:
   /// callCapturesBefore - Return information about whether a particular call 
   /// site modifies or reads the specified memory location.
   ModRefResult callCapturesBefore(const Instruction *I,
-                                  const AliasAnalysis::Location &MemLoc,
+                                  const MemoryLocation &MemLoc,
                                   DominatorTree *DT);
 
   /// callCapturesBefore - A convenience wrapper.
   ModRefResult callCapturesBefore(const Instruction *I, const Value *P,
                                   uint64_t Size, DominatorTree *DT) {
-    return callCapturesBefore(I, Location(P, Size), DT);
+    return callCapturesBefore(I, MemoryLocation(P, Size), DT);
   }
 
   //===--------------------------------------------------------------------===//
@@ -477,11 +470,11 @@ public:
 
   /// canBasicBlockModify - Return true if it is possible for execution of the
   /// specified basic block to modify the location Loc.
-  bool canBasicBlockModify(const BasicBlock &BB, const Location &Loc);
+  bool canBasicBlockModify(const BasicBlock &BB, const MemoryLocation &Loc);
 
   /// canBasicBlockModify - A convenience wrapper.
   bool canBasicBlockModify(const BasicBlock &BB, const Value *P, uint64_t Size){
-    return canBasicBlockModify(BB, Location(P, Size));
+    return canBasicBlockModify(BB, MemoryLocation(P, Size));
   }
 
   /// canInstructionRangeModRef - Return true if it is possible for the
@@ -489,15 +482,15 @@ public:
   /// mode) the location Loc. The instructions to consider are all
   /// of the instructions in the range of [I1,I2] INCLUSIVE.
   /// I1 and I2 must be in the same basic block.
-  bool canInstructionRangeModRef(const Instruction &I1,
-                                const Instruction &I2, const Location &Loc,
-                                const ModRefResult Mode);
+  bool canInstructionRangeModRef(const Instruction &I1, const Instruction &I2,
+                                 const MemoryLocation &Loc,
+                                 const ModRefResult Mode);
 
   /// canInstructionRangeModRef - A convenience wrapper.
   bool canInstructionRangeModRef(const Instruction &I1,
                                  const Instruction &I2, const Value *Ptr,
                                  uint64_t Size, const ModRefResult Mode) {
-    return canInstructionRangeModRef(I1, I2, Location(Ptr, Size), Mode);
+    return canInstructionRangeModRef(I1, I2, MemoryLocation(Ptr, Size), Mode);
   }
 
   //===--------------------------------------------------------------------===//
@@ -565,6 +558,6 @@ bool isIdentifiedObject(const Value *V);
 /// IdentifiedObjects.
 bool isIdentifiedFunctionLocal(const Value *V);
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/AliasSetTracker.h b/contrib/llvm/include/llvm/Analysis/AliasSetTracker.h
index 18f95b4..ba2eae9 100644
--- a/contrib/llvm/include/llvm/Analysis/AliasSetTracker.h
+++ b/contrib/llvm/include/llvm/Analysis/AliasSetTracker.h
@@ -437,6 +437,6 @@ inline raw_ostream& operator<<(raw_ostream &OS, const AliasSetTracker &AST) {
   return OS;
 }
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/BlockFrequencyInfo.h b/contrib/llvm/include/llvm/Analysis/BlockFrequencyInfo.h
index f27c32d..382c080 100644
--- a/contrib/llvm/include/llvm/Analysis/BlockFrequencyInfo.h
+++ b/contrib/llvm/include/llvm/Analysis/BlockFrequencyInfo.h
@@ -63,6 +63,6 @@ public:
 
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/contrib/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h
index 85a299b..bf24f66 100644
--- a/contrib/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h
+++ b/contrib/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h
@@ -196,23 +196,26 @@ public:
   struct LoopData {
     typedef SmallVector<std::pair<BlockNode, BlockMass>, 4> ExitMap;
     typedef SmallVector<BlockNode, 4> NodeList;
-    LoopData *Parent;       ///< The parent loop.
-    bool IsPackaged;        ///< Whether this has been packaged.
-    uint32_t NumHeaders;    ///< Number of headers.
-    ExitMap Exits;          ///< Successor edges (and weights).
-    NodeList Nodes;         ///< Header and the members of the loop.
-    BlockMass BackedgeMass; ///< Mass returned to loop header.
+    typedef SmallVector<BlockMass, 1> HeaderMassList;
+    LoopData *Parent;            ///< The parent loop.
+    bool IsPackaged;             ///< Whether this has been packaged.
+    uint32_t NumHeaders;         ///< Number of headers.
+    ExitMap Exits;               ///< Successor edges (and weights).
+    NodeList Nodes;              ///< Header and the members of the loop.
+    HeaderMassList BackedgeMass; ///< Mass returned to each loop header.
     BlockMass Mass;
     Scaled64 Scale;
 
     LoopData(LoopData *Parent, const BlockNode &Header)
-        : Parent(Parent), IsPackaged(false), NumHeaders(1), Nodes(1, Header) {}
+        : Parent(Parent), IsPackaged(false), NumHeaders(1), Nodes(1, Header),
+          BackedgeMass(1) {}
     template <class It1, class It2>
     LoopData(LoopData *Parent, It1 FirstHeader, It1 LastHeader, It2 FirstOther,
              It2 LastOther)
         : Parent(Parent), IsPackaged(false), Nodes(FirstHeader, LastHeader) {
       NumHeaders = Nodes.size();
       Nodes.insert(Nodes.end(), FirstOther, LastOther);
+      BackedgeMass.resize(NumHeaders);
     }
     bool isHeader(const BlockNode &Node) const {
       if (isIrreducible())
@@ -223,6 +226,14 @@ public:
     BlockNode getHeader() const { return Nodes[0]; }
     bool isIrreducible() const { return NumHeaders > 1; }
 
+    HeaderMassList::difference_type getHeaderIndex(const BlockNode &B) {
+      assert(isHeader(B) && "this is only valid on loop header blocks");
+      if (isIrreducible())
+        return std::lower_bound(Nodes.begin(), Nodes.begin() + NumHeaders, B) -
+               Nodes.begin();
+      return 0;
+    }
+
     NodeList::const_iterator members_begin() const {
       return Nodes.begin() + NumHeaders;
     }
@@ -431,6 +442,16 @@ public:
   /// \brief Compute the loop scale for a loop.
   void computeLoopScale(LoopData &Loop);
 
+  /// Adjust the mass of all headers in an irreducible loop.
+  ///
+  /// Initially, irreducible loops are assumed to distribute their mass
+  /// equally among its headers. This can lead to wrong frequency estimates
+  /// since some headers may be executed more frequently than others.
+  ///
+  /// This adjusts header mass distribution so it matches the weights of
+  /// the backedges going into each of the loop headers.
+  void adjustLoopHeaderMass(LoopData &Loop);
+
   /// \brief Package up a loop.
   void packageLoop(LoopData &Loop);
 
@@ -607,7 +628,7 @@ void IrreducibleGraph::addEdges(const BlockNode &Node,
   else
     addBlockEdges(*this, Irr, OuterLoop);
 }
-}
+} // namespace bfi_detail
 
 /// \brief Shared implementation for block frequency analysis.
 ///
@@ -695,6 +716,17 @@ void IrreducibleGraph::addEdges(const BlockNode &Node,
 ///         - Distribute the mass accordingly, dithering to minimize mass loss,
 ///           as described in \a distributeMass().
 ///
+///     In the case of irreducible loops, instead of a single loop header,
+///     there will be several. The computation of backedge masses is similar
+///     but instead of having a single backedge mass, there will be one
+///     backedge per loop header. In these cases, each backedge will carry
+///     a mass proportional to the edge weights along the corresponding
+///     path.
+///
+///     At the end of propagation, the full mass assigned to the loop will be
+///     distributed among the loop headers proportionally according to the
+///     mass flowing through their backedges.
+///
 ///     Finally, calculate the loop scale from the accumulated backedge mass.
 ///
 ///  3. Distribute mass in the function (\a computeMassInFunction()).
@@ -735,11 +767,6 @@ void IrreducibleGraph::addEdges(const BlockNode &Node,
 ///         as sub-loops, rather than arbitrarily shoving the problematic
 ///         blocks into the headers of the main irreducible SCC.
 ///
-///       - Backedge frequencies are assumed to be evenly split between the
-///         headers of a given irreducible SCC.  Instead, we could track the
-///         backedge mass separately for each header, and adjust their relative
-///         frequencies.
-///
 ///       - Entry frequencies are assumed to be evenly split between the
 ///         headers of a given irreducible SCC, which is the only option if we
 ///         need to compute mass in the SCC before its parent loop.  Instead,
@@ -846,7 +873,7 @@ template <class BT> class BlockFrequencyInfoImpl : BlockFrequencyInfoImplBase {
   ///
   /// \pre \a computeMassInLoop() has been called for each subloop of \c
   /// OuterLoop.
-  /// \pre \c Insert points at the the last loop successfully processed by \a
+  /// \pre \c Insert points at the last loop successfully processed by \a
   /// computeMassInLoop().
   /// \pre \c OuterLoop has irreducible SCCs.
   void computeIrreducibleMass(LoopData *OuterLoop,
@@ -1042,6 +1069,8 @@ bool BlockFrequencyInfoImpl<BT>::computeMassInLoop(LoopData &Loop) {
     for (const BlockNode &M : Loop.Nodes)
       if (!propagateMassToSuccessors(&Loop, M))
         llvm_unreachable("unhandled irreducible control flow");
+
+    adjustLoopHeaderMass(Loop);
   } else {
     Working[Loop.getHeader().Index].getMass() = BlockMass::getFull();
     if (!propagateMassToSuccessors(&Loop, Loop.getHeader()))
@@ -1104,7 +1133,7 @@ template <class BT> struct BlockEdgesAdder {
       G.addEdge(Irr, BFI.getNode(*I), OuterLoop);
   }
 };
-}
+} // namespace bfi_detail
 template <class BT>
 void BlockFrequencyInfoImpl<BT>::computeIrreducibleMass(
     LoopData *OuterLoop, std::list<LoopData>::iterator Insert) {
diff --git a/contrib/llvm/include/llvm/Analysis/BranchProbabilityInfo.h b/contrib/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
index 9d86756..f2ca3e0 100644
--- a/contrib/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
+++ b/contrib/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
@@ -158,6 +158,6 @@ private:
   bool calcInvokeHeuristics(BasicBlock *BB);
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/CFG.h b/contrib/llvm/include/llvm/Analysis/CFG.h
index 7f92eda..f837cb4 100644
--- a/contrib/llvm/include/llvm/Analysis/CFG.h
+++ b/contrib/llvm/include/llvm/Analysis/CFG.h
@@ -78,6 +78,6 @@ bool isPotentiallyReachable(const BasicBlock *From, const BasicBlock *To,
                             const DominatorTree *DT = nullptr,
                             const LoopInfo *LI = nullptr);
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/CFGPrinter.h b/contrib/llvm/include/llvm/Analysis/CFGPrinter.h
index 0357648..0cc4e5d 100644
--- a/contrib/llvm/include/llvm/Analysis/CFGPrinter.h
+++ b/contrib/llvm/include/llvm/Analysis/CFGPrinter.h
@@ -119,7 +119,7 @@ struct DOTGraphTraits<const Function*> : public DefaultDOTGraphTraits {
     return "";
   }
 };
-} // End llvm namespace
+} // namespace llvm
 
 namespace llvm {
   class FunctionPass;
diff --git a/contrib/llvm/include/llvm/Analysis/CGSCCPassManager.h b/contrib/llvm/include/llvm/Analysis/CGSCCPassManager.h
index 6a406cd..42f0e65 100644
--- a/contrib/llvm/include/llvm/Analysis/CGSCCPassManager.h
+++ b/contrib/llvm/include/llvm/Analysis/CGSCCPassManager.h
@@ -485,6 +485,6 @@ CGSCCToFunctionPassAdaptor<FunctionPassT>
 createCGSCCToFunctionPassAdaptor(FunctionPassT Pass) {
   return CGSCCToFunctionPassAdaptor<FunctionPassT>(std::move(Pass));
 }
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/CallGraph.h b/contrib/llvm/include/llvm/Analysis/CallGraph.h
index 5b64d85..ed52e86 100644
--- a/contrib/llvm/include/llvm/Analysis/CallGraph.h
+++ b/contrib/llvm/include/llvm/Analysis/CallGraph.h
@@ -56,6 +56,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Pass.h"
 #include <map>
@@ -229,7 +230,8 @@ public:
   /// \brief Adds a function to the list of functions called by this one.
   void addCalledFunction(CallSite CS, CallGraphNode *M) {
     assert(!CS.getInstruction() || !CS.getCalledFunction() ||
-           !CS.getCalledFunction()->isIntrinsic());
+           !CS.getCalledFunction()->isIntrinsic() ||
+           !Intrinsic::isLeaf(CS.getCalledFunction()->getIntrinsicID()));
     CalledFunctions.emplace_back(CS.getInstruction(), M);
     M->AddRef();
   }
@@ -479,6 +481,6 @@ struct GraphTraits<const CallGraph *> : public GraphTraits<
   static const CallGraphNode &CGdereference(PairTy P) { return *P.second; }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/CallGraphSCCPass.h b/contrib/llvm/include/llvm/Analysis/CallGraphSCCPass.h
index 667e171..94fa5bd 100644
--- a/contrib/llvm/include/llvm/Analysis/CallGraphSCCPass.h
+++ b/contrib/llvm/include/llvm/Analysis/CallGraphSCCPass.h
@@ -102,6 +102,6 @@ public:
   iterator end() const { return Nodes.end(); }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/CodeMetrics.h b/contrib/llvm/include/llvm/Analysis/CodeMetrics.h
index 2f59691..6ab83ae 100644
--- a/contrib/llvm/include/llvm/Analysis/CodeMetrics.h
+++ b/contrib/llvm/include/llvm/Analysis/CodeMetrics.h
@@ -102,6 +102,6 @@ struct CodeMetrics {
                                      SmallPtrSetImpl<const Value *> &EphValues);
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/ConstantFolding.h b/contrib/llvm/include/llvm/Analysis/ConstantFolding.h
index 541a210..a0d5eab 100644
--- a/contrib/llvm/include/llvm/Analysis/ConstantFolding.h
+++ b/contrib/llvm/include/llvm/Analysis/ConstantFolding.h
@@ -97,6 +97,6 @@ bool canConstantFoldCallTo(const Function *F);
 /// with the specified arguments, returning null if unsuccessful.
 Constant *ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
                            const TargetLibraryInfo *TLI = nullptr);
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/DomPrinter.h b/contrib/llvm/include/llvm/Analysis/DomPrinter.h
index 0ed2899..1402d77 100644
--- a/contrib/llvm/include/llvm/Analysis/DomPrinter.h
+++ b/contrib/llvm/include/llvm/Analysis/DomPrinter.h
@@ -25,6 +25,6 @@ namespace llvm {
   FunctionPass *createPostDomOnlyPrinterPass();
   FunctionPass *createPostDomViewerPass();
   FunctionPass *createPostDomOnlyViewerPass();
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/DominanceFrontier.h b/contrib/llvm/include/llvm/Analysis/DominanceFrontier.h
index 996700e..0cdd73e 100644
--- a/contrib/llvm/include/llvm/Analysis/DominanceFrontier.h
+++ b/contrib/llvm/include/llvm/Analysis/DominanceFrontier.h
@@ -205,6 +205,6 @@ public:
 EXTERN_TEMPLATE_INSTANTIATION(class DominanceFrontierBase<BasicBlock>);
 EXTERN_TEMPLATE_INSTANTIATION(class ForwardDominanceFrontierBase<BasicBlock>);
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/DominanceFrontierImpl.h b/contrib/llvm/include/llvm/Analysis/DominanceFrontierImpl.h
index 629ae38..4904f93 100644
--- a/contrib/llvm/include/llvm/Analysis/DominanceFrontierImpl.h
+++ b/contrib/llvm/include/llvm/Analysis/DominanceFrontierImpl.h
@@ -221,6 +221,6 @@ ForwardDominanceFrontierBase<BlockT>::calculate(const DomTreeT &DT,
   return *Result;
 }
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/IVUsers.h b/contrib/llvm/include/llvm/Analysis/IVUsers.h
index ae9c1f5..2ad0ae7 100644
--- a/contrib/llvm/include/llvm/Analysis/IVUsers.h
+++ b/contrib/llvm/include/llvm/Analysis/IVUsers.h
@@ -178,6 +178,6 @@ protected:
 
 Pass *createIVUsersPass();
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/InlineCost.h b/contrib/llvm/include/llvm/Analysis/InlineCost.h
index 79ed74d..57da132 100644
--- a/contrib/llvm/include/llvm/Analysis/InlineCost.h
+++ b/contrib/llvm/include/llvm/Analysis/InlineCost.h
@@ -36,7 +36,7 @@ namespace InlineConstants {
   /// Do not inline functions which allocate this many bytes on the stack
   /// when the caller is recursive.
   const unsigned TotalAllocaSizeRecursiveCaller = 1024;
-}
+} // namespace InlineConstants
 
 /// \brief Represents the cost of inlining a function.
 ///
@@ -138,6 +138,6 @@ public:
   bool isInlineViable(Function &Callee);
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/Interval.h b/contrib/llvm/include/llvm/Analysis/Interval.h
index 01eba3f..cbdb0c0 100644
--- a/contrib/llvm/include/llvm/Analysis/Interval.h
+++ b/contrib/llvm/include/llvm/Analysis/Interval.h
@@ -145,6 +145,6 @@ template <> struct GraphTraits<Inverse<Interval*> > {
   }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/IntervalIterator.h b/contrib/llvm/include/llvm/Analysis/IntervalIterator.h
index 655ce2d..5ec50d4 100644
--- a/contrib/llvm/include/llvm/Analysis/IntervalIterator.h
+++ b/contrib/llvm/include/llvm/Analysis/IntervalIterator.h
@@ -263,6 +263,6 @@ inline interval_part_interval_iterator intervals_end(IntervalPartition &IP) {
   return interval_part_interval_iterator();
 }
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/IntervalPartition.h b/contrib/llvm/include/llvm/Analysis/IntervalPartition.h
index 274be2b..2176d0c 100644
--- a/contrib/llvm/include/llvm/Analysis/IntervalPartition.h
+++ b/contrib/llvm/include/llvm/Analysis/IntervalPartition.h
@@ -106,6 +106,6 @@ private:
   void updatePredecessors(Interval *Int);
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h b/contrib/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h
index 5a339f1..eea0d81 100644
--- a/contrib/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h
+++ b/contrib/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h
@@ -92,5 +92,5 @@ private:
   const SmallPtrSetImpl<BasicBlock *> *DefBlocks;
   SmallVector<BasicBlock *, 32> PHIBlocks;
 };
-}
+} // namespace llvm
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/JumpInstrTableInfo.h b/contrib/llvm/include/llvm/Analysis/JumpInstrTableInfo.h
index b6dad47..ea331a4 100644
--- a/contrib/llvm/include/llvm/Analysis/JumpInstrTableInfo.h
+++ b/contrib/llvm/include/llvm/Analysis/JumpInstrTableInfo.h
@@ -66,6 +66,6 @@ private:
 /// bound specifies the maximum number of bytes needed to represent an
 /// unconditional jump or a trap instruction in the back end currently in use.
 ModulePass *createJumpInstrTableInfoPass(unsigned Bound);
-}
+} // namespace llvm
 
 #endif /* LLVM_ANALYSIS_JUMPINSTRTABLEINFO_H */
diff --git a/contrib/llvm/include/llvm/Analysis/LazyCallGraph.h b/contrib/llvm/include/llvm/Analysis/LazyCallGraph.h
index b0b9068..af4861f 100644
--- a/contrib/llvm/include/llvm/Analysis/LazyCallGraph.h
+++ b/contrib/llvm/include/llvm/Analysis/LazyCallGraph.h
@@ -569,6 +569,6 @@ public:
   static StringRef name() { return "LazyCallGraphPrinterPass"; }
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/LibCallAliasAnalysis.h b/contrib/llvm/include/llvm/Analysis/LibCallAliasAnalysis.h
index df95e0e..a4b7e5d 100644
--- a/contrib/llvm/include/llvm/Analysis/LibCallAliasAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/LibCallAliasAnalysis.h
@@ -39,8 +39,8 @@ namespace llvm {
     ~LibCallAliasAnalysis() override;
 
     ModRefResult getModRefInfo(ImmutableCallSite CS,
-                               const Location &Loc) override;
- 
+                               const MemoryLocation &Loc) override;
+
     ModRefResult getModRefInfo(ImmutableCallSite CS1,
                                ImmutableCallSite CS2) override {
       // TODO: Could compare two direct calls against each other if we cared to.
@@ -64,8 +64,8 @@ namespace llvm {
   private:
     ModRefResult AnalyzeLibCallDetails(const LibCallFunctionInfo *FI,
                                        ImmutableCallSite CS,
-                                       const Location &Loc);
+                                       const MemoryLocation &Loc);
   };
-}  // End of llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/LibCallSemantics.h b/contrib/llvm/include/llvm/Analysis/LibCallSemantics.h
index 34831b2..170e2a4 100644
--- a/contrib/llvm/include/llvm/Analysis/LibCallSemantics.h
+++ b/contrib/llvm/include/llvm/Analysis/LibCallSemantics.h
@@ -48,8 +48,7 @@ class InvokeInst;
     enum LocResult {
       Yes, No, Unknown
     };
-    LocResult (*isLocation)(ImmutableCallSite CS,
-                            const AliasAnalysis::Location &Loc);
+    LocResult (*isLocation)(ImmutableCallSite CS, const MemoryLocation &Loc);
   };
   
   /// LibCallFunctionInfo - Each record in the array of FunctionInfo structs
@@ -207,7 +206,7 @@ class InvokeInst;
     llvm_unreachable("invalid enum");
   }
 
-  bool canSimplifyInvokeNoUnwind(const InvokeInst *II);
+  bool canSimplifyInvokeNoUnwind(const Function *F);
 
 } // end namespace llvm
 
diff --git a/contrib/llvm/include/llvm/Analysis/Lint.h b/contrib/llvm/include/llvm/Analysis/Lint.h
index 7c88b13..79cd82f 100644
--- a/contrib/llvm/include/llvm/Analysis/Lint.h
+++ b/contrib/llvm/include/llvm/Analysis/Lint.h
@@ -44,6 +44,6 @@ void lintFunction(
   const Function &F  ///< The function to be checked
 );
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/Loads.h b/contrib/llvm/include/llvm/Analysis/Loads.h
index 42667d2..c8a6e4a 100644
--- a/contrib/llvm/include/llvm/Analysis/Loads.h
+++ b/contrib/llvm/include/llvm/Analysis/Loads.h
@@ -52,6 +52,6 @@ Value *FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
                                 AliasAnalysis *AA = nullptr,
                                 AAMDNodes *AATags = nullptr);
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/contrib/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index 7b635a8..0f3c731 100644
--- a/contrib/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -555,6 +555,6 @@ private:
   DominatorTree *DT;
   LoopInfo *LI;
 };
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/LoopInfo.h b/contrib/llvm/include/llvm/Analysis/LoopInfo.h
index bbcde8d..7bfebab 100644
--- a/contrib/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/contrib/llvm/include/llvm/Analysis/LoopInfo.h
@@ -763,6 +763,6 @@ public:
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/LoopInfoImpl.h b/contrib/llvm/include/llvm/Analysis/LoopInfoImpl.h
index f5cc856..b8f80df 100644
--- a/contrib/llvm/include/llvm/Analysis/LoopInfoImpl.h
+++ b/contrib/llvm/include/llvm/Analysis/LoopInfoImpl.h
@@ -535,6 +535,6 @@ void LoopInfoBase<BlockT, LoopT>::verify() const {
 #endif
 }
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/LoopPass.h b/contrib/llvm/include/llvm/Analysis/LoopPass.h
index 8650000..57ad793 100644
--- a/contrib/llvm/include/llvm/Analysis/LoopPass.h
+++ b/contrib/llvm/include/llvm/Analysis/LoopPass.h
@@ -169,6 +169,6 @@ private:
   Loop *CurrentLoop;
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/MemoryBuiltins.h b/contrib/llvm/include/llvm/Analysis/MemoryBuiltins.h
index 805a43d..557d6fc 100644
--- a/contrib/llvm/include/llvm/Analysis/MemoryBuiltins.h
+++ b/contrib/llvm/include/llvm/Analysis/MemoryBuiltins.h
@@ -262,6 +262,6 @@ public:
   SizeOffsetEvalType visitInstruction(Instruction &I);
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h b/contrib/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
index cf51dd6..9c50ae0 100644
--- a/contrib/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -287,7 +287,7 @@ namespace llvm {
       /// conflicting tags.
       AAMDNodes AATags;
 
-      NonLocalPointerInfo() : Size(AliasAnalysis::UnknownSize) {}
+      NonLocalPointerInfo() : Size(MemoryLocation::UnknownSize) {}
     };
 
     /// CachedNonLocalPointerInfo - This map stores the cached results of doing
@@ -403,13 +403,12 @@ namespace llvm {
     ///
     /// Note that this is an uncached query, and thus may be inefficient.
     ///
-    MemDepResult getPointerDependencyFrom(const AliasAnalysis::Location &Loc,
+    MemDepResult getPointerDependencyFrom(const MemoryLocation &Loc,
                                           bool isLoad,
                                           BasicBlock::iterator ScanIt,
                                           BasicBlock *BB,
                                           Instruction *QueryInst = nullptr);
 
-
     /// getLoadLoadClobberFullWidthSize - This is a little bit of analysis that
     /// looks at a memory location for a load (specified by MemLocBase, Offs,
     /// and Size) and compares it against a load.  If the specified load could
@@ -428,15 +427,14 @@ namespace llvm {
                                            BasicBlock *BB);
     bool getNonLocalPointerDepFromBB(Instruction *QueryInst,
                                      const PHITransAddr &Pointer,
-                                     const AliasAnalysis::Location &Loc,
-                                     bool isLoad, BasicBlock *BB,
+                                     const MemoryLocation &Loc, bool isLoad,
+                                     BasicBlock *BB,
                                      SmallVectorImpl<NonLocalDepResult> &Result,
-                                     DenseMap<BasicBlock*, Value*> &Visited,
+                                     DenseMap<BasicBlock *, Value *> &Visited,
                                      bool SkipFirstBlock = false);
     MemDepResult GetNonLocalInfoForBlock(Instruction *QueryInst,
-                                         const AliasAnalysis::Location &Loc,
-                                         bool isLoad, BasicBlock *BB,
-                                         NonLocalDepInfo *Cache,
+                                         const MemoryLocation &Loc, bool isLoad,
+                                         BasicBlock *BB, NonLocalDepInfo *Cache,
                                          unsigned NumSortedEntries);
 
     void RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P);
@@ -447,6 +445,6 @@ namespace llvm {
 
   };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/MemoryLocation.h b/contrib/llvm/include/llvm/Analysis/MemoryLocation.h
index 94d938d..ea69633 100644
--- a/contrib/llvm/include/llvm/Analysis/MemoryLocation.h
+++ b/contrib/llvm/include/llvm/Analysis/MemoryLocation.h
@@ -26,6 +26,7 @@ class LoadInst;
 class StoreInst;
 class MemTransferInst;
 class MemIntrinsic;
+class TargetLibraryInfo;
 
 /// Representation for a specific memory location.
 ///
@@ -87,6 +88,10 @@ public:
   /// transfer.
   static MemoryLocation getForDest(const MemIntrinsic *MI);
 
+  /// Return a location representing a particular argument of a call.
+  static MemoryLocation getForArgument(ImmutableCallSite CS, unsigned ArgIdx,
+                                       const TargetLibraryInfo &TLI);
+
   explicit MemoryLocation(const Value *Ptr = nullptr,
                           uint64_t Size = UnknownSize,
                           const AAMDNodes &AATags = AAMDNodes())
@@ -132,6 +137,6 @@ template <> struct DenseMapInfo<MemoryLocation> {
     return LHS == RHS;
   }
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/Passes.h b/contrib/llvm/include/llvm/Analysis/Passes.h
index d112ab1..ffaf871 100644
--- a/contrib/llvm/include/llvm/Analysis/Passes.h
+++ b/contrib/llvm/include/llvm/Analysis/Passes.h
@@ -173,6 +173,6 @@ namespace llvm {
   //
   FunctionPass *createMemDerefPrinter();
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/PostDominators.h b/contrib/llvm/include/llvm/Analysis/PostDominators.h
index 0f7e2b8..f654652a 100644
--- a/contrib/llvm/include/llvm/Analysis/PostDominators.h
+++ b/contrib/llvm/include/llvm/Analysis/PostDominators.h
@@ -112,6 +112,6 @@ template <> struct GraphTraits<PostDominatorTree*>
   }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/PtrUseVisitor.h b/contrib/llvm/include/llvm/Analysis/PtrUseVisitor.h
index 6e61fc3..8b5b90a 100644
--- a/contrib/llvm/include/llvm/Analysis/PtrUseVisitor.h
+++ b/contrib/llvm/include/llvm/Analysis/PtrUseVisitor.h
@@ -280,6 +280,6 @@ protected:
   }
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/RegionInfo.h b/contrib/llvm/include/llvm/Analysis/RegionInfo.h
index 7ceb086..22fd1df 100644
--- a/contrib/llvm/include/llvm/Analysis/RegionInfo.h
+++ b/contrib/llvm/include/llvm/Analysis/RegionInfo.h
@@ -906,5 +906,5 @@ EXTERN_TEMPLATE_INSTANTIATION(class RegionBase<RegionTraits<Function>>);
 EXTERN_TEMPLATE_INSTANTIATION(class RegionNodeBase<RegionTraits<Function>>);
 EXTERN_TEMPLATE_INSTANTIATION(class RegionInfoBase<RegionTraits<Function>>);
 
-} // End llvm namespace
+} // namespace llvm
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/RegionPass.h b/contrib/llvm/include/llvm/Analysis/RegionPass.h
index bd51c49..5866fc5 100644
--- a/contrib/llvm/include/llvm/Analysis/RegionPass.h
+++ b/contrib/llvm/include/llvm/Analysis/RegionPass.h
@@ -123,6 +123,6 @@ public:
   }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h b/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h
index 1d1bd67..1c81408 100644
--- a/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -981,6 +981,6 @@ namespace llvm {
     /// to locate them all and call their destructors.
     SCEVUnknown *FirstUnknown;
   };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/ScalarEvolutionExpander.h b/contrib/llvm/include/llvm/Analysis/ScalarEvolutionExpander.h
index 8ec2078..83493fa 100644
--- a/contrib/llvm/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/contrib/llvm/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -275,6 +275,6 @@ namespace llvm {
     Value *expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
                        Type *ExpandTy, Type *IntTy, bool useSubtract);
   };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h b/contrib/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
index ff82db1..14feeed 100644
--- a/contrib/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
+++ b/contrib/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
@@ -829,6 +829,6 @@ static inline const SCEV *apply(const SCEV *Scev, LoopToScevMapT &Map,
   return SCEVApplyRewriter::rewrite(Scev, Map, SE);
 }
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/ScalarEvolutionNormalization.h b/contrib/llvm/include/llvm/Analysis/ScalarEvolutionNormalization.h
index 7c6423a..4133864 100644
--- a/contrib/llvm/include/llvm/Analysis/ScalarEvolutionNormalization.h
+++ b/contrib/llvm/include/llvm/Analysis/ScalarEvolutionNormalization.h
@@ -73,6 +73,6 @@ const SCEV *TransformForPostIncUse(TransformKind Kind,
                                    ScalarEvolution &SE,
                                    DominatorTree &DT);
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/TargetFolder.h b/contrib/llvm/include/llvm/Analysis/TargetFolder.h
index 12bf9fe..0e17a58 100644
--- a/contrib/llvm/include/llvm/Analysis/TargetFolder.h
+++ b/contrib/llvm/include/llvm/Analysis/TargetFolder.h
@@ -265,6 +265,6 @@ public:
   }
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h b/contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 3700c9e..d863b4f 100644
--- a/contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -908,6 +908,6 @@ public:
 /// clients.
 ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index e6a8a76..59b95a8 100644
--- a/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -446,6 +446,6 @@ public:
         U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr);
   }
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/AsmParser/Parser.h b/contrib/llvm/include/llvm/AsmParser/Parser.h
index 7ef78d7..0c37a9b 100644
--- a/contrib/llvm/include/llvm/AsmParser/Parser.h
+++ b/contrib/llvm/include/llvm/AsmParser/Parser.h
@@ -67,6 +67,6 @@ std::unique_ptr<Module> parseAssembly(MemoryBufferRef F, SMDiagnostic &Err,
 /// @return true on error.
 bool parseAssemblyInto(MemoryBufferRef F, Module &M, SMDiagnostic &Err);
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Bitcode/BitCodes.h b/contrib/llvm/include/llvm/Bitcode/BitCodes.h
index 96c4201..6b23eb9 100644
--- a/contrib/llvm/include/llvm/Bitcode/BitCodes.h
+++ b/contrib/llvm/include/llvm/Bitcode/BitCodes.h
@@ -77,7 +77,7 @@ namespace bitc {
                                       //                             [id, name]
   };
 
-} // End bitc namespace
+} // namespace bitc
 
 /// BitCodeAbbrevOp - This describes one or more operands in an abbreviation.
 /// This is actually a union of two different things:
@@ -180,6 +180,6 @@ public:
     OperandList.push_back(OpInfo);
   }
 };
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Bitcode/BitcodeWriterPass.h b/contrib/llvm/include/llvm/Bitcode/BitcodeWriterPass.h
index ae915c6..cc742f1 100644
--- a/contrib/llvm/include/llvm/Bitcode/BitcodeWriterPass.h
+++ b/contrib/llvm/include/llvm/Bitcode/BitcodeWriterPass.h
@@ -56,6 +56,6 @@ public:
   static StringRef name() { return "BitcodeWriterPass"; }
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Bitcode/BitstreamReader.h b/contrib/llvm/include/llvm/Bitcode/BitstreamReader.h
index 4c040a7..9201daf 100644
--- a/contrib/llvm/include/llvm/Bitcode/BitstreamReader.h
+++ b/contrib/llvm/include/llvm/Bitcode/BitstreamReader.h
@@ -512,6 +512,6 @@ public:
   bool ReadBlockInfoBlock();
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Bitcode/BitstreamWriter.h b/contrib/llvm/include/llvm/Bitcode/BitstreamWriter.h
index f7487a0..eef6076 100644
--- a/contrib/llvm/include/llvm/Bitcode/BitstreamWriter.h
+++ b/contrib/llvm/include/llvm/Bitcode/BitstreamWriter.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Bitcode/BitCodes.h"
+#include "llvm/Support/Endian.h"
 #include <vector>
 
 namespace llvm {
@@ -63,10 +64,7 @@ class BitstreamWriter {
   // BackpatchWord - Backpatch a 32-bit word in the output with the specified
   // value.
   void BackpatchWord(unsigned ByteNo, unsigned NewWord) {
-    Out[ByteNo++] = (unsigned char)(NewWord >>  0);
-    Out[ByteNo++] = (unsigned char)(NewWord >>  8);
-    Out[ByteNo++] = (unsigned char)(NewWord >> 16);
-    Out[ByteNo  ] = (unsigned char)(NewWord >> 24);
+    support::endian::write32le(&Out[ByteNo], NewWord);
   }
 
   void WriteByte(unsigned char Value) {
@@ -74,12 +72,9 @@ class BitstreamWriter {
   }
 
   void WriteWord(unsigned Value) {
-    unsigned char Bytes[4] = {
-      (unsigned char)(Value >>  0),
-      (unsigned char)(Value >>  8),
-      (unsigned char)(Value >> 16),
-      (unsigned char)(Value >> 24) };
-    Out.append(&Bytes[0], &Bytes[4]);
+    Value = support::endian::byte_swap<uint32_t, support::little>(Value);
+    Out.append(reinterpret_cast<const char *>(&Value),
+               reinterpret_cast<const char *>(&Value + 1));
   }
 
   unsigned GetBufferOffset() const {
@@ -525,6 +520,6 @@ public:
 };
 
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/contrib/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index 3a6b5c7..41aa148 100644
--- a/contrib/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/contrib/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -342,7 +342,7 @@ namespace bitc {
                                      //             align, vol,
                                      //             ordering, synchscope]
     FUNC_CODE_INST_RESUME      = 39, // RESUME:     [opval]
-    FUNC_CODE_INST_LANDINGPAD  = 40, // LANDINGPAD: [ty,val,val,num,id0,val0...]
+    FUNC_CODE_INST_LANDINGPAD_OLD  = 40, // LANDINGPAD: [ty,val,val,num,id0,val0...]
     FUNC_CODE_INST_LOADATOMIC  = 41, // LOAD: [opty, op, align, vol,
                                      //        ordering, synchscope]
     FUNC_CODE_INST_STOREATOMIC_OLD = 42, // STORE: [ptrty,ptr,val, align, vol
@@ -352,6 +352,7 @@ namespace bitc {
     FUNC_CODE_INST_STOREATOMIC = 45, // STORE: [ptrty,ptr,val, align, vol
     FUNC_CODE_INST_CMPXCHG     = 46, // CMPXCHG: [ptrty,ptr,valty,cmp,new, align,
                                      //           vol,ordering,synchscope]
+    FUNC_CODE_INST_LANDINGPAD  = 47, // LANDINGPAD: [ty,val,num,id0,val0...]
   };
 
   enum UseListCodes {
@@ -403,7 +404,8 @@ namespace bitc {
     ATTR_KIND_JUMP_TABLE = 40,
     ATTR_KIND_DEREFERENCEABLE = 41,
     ATTR_KIND_DEREFERENCEABLE_OR_NULL = 42,
-    ATTR_KIND_CONVERGENT = 43
+    ATTR_KIND_CONVERGENT = 43,
+    ATTR_KIND_SAFESTACK = 44,
   };
 
   enum ComdatSelectionKindCodes {
@@ -414,7 +416,7 @@ namespace bitc {
     COMDAT_SELECTION_KIND_SAME_SIZE = 5,
   };
 
-} // End bitc namespace
-} // End llvm namespace
+} // namespace bitc
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Bitcode/ReaderWriter.h b/contrib/llvm/include/llvm/Bitcode/ReaderWriter.h
index 9d30098..d158569 100644
--- a/contrib/llvm/include/llvm/Bitcode/ReaderWriter.h
+++ b/contrib/llvm/include/llvm/Bitcode/ReaderWriter.h
@@ -15,6 +15,7 @@
 #define LLVM_BITCODE_READERWRITER_H
 
 #include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/Support/Endian.h"
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include <memory>
@@ -32,7 +33,7 @@ namespace llvm {
   /// deserialization of function bodies. If ShouldLazyLoadMetadata is true,
   /// lazily load metadata as well. If successful, this moves Buffer. On
   /// error, this *does not* move Buffer.
-  ErrorOr<Module *>
+  ErrorOr<std::unique_ptr<Module>>
   getLazyBitcodeModule(std::unique_ptr<MemoryBuffer> &&Buffer,
                        LLVMContext &Context,
                        DiagnosticHandlerFunction DiagnosticHandler = nullptr,
@@ -41,7 +42,8 @@ namespace llvm {
   /// Read the header of the specified stream and prepare for lazy
   /// deserialization and streaming of function bodies.
   ErrorOr<std::unique_ptr<Module>> getStreamedBitcodeModule(
-      StringRef Name, DataStreamer *Streamer, LLVMContext &Context,
+      StringRef Name, std::unique_ptr<DataStreamer> Streamer,
+      LLVMContext &Context,
       DiagnosticHandlerFunction DiagnosticHandler = nullptr);
 
   /// Read the header of the specified bitcode buffer and extract just the
@@ -52,7 +54,7 @@ namespace llvm {
                          DiagnosticHandlerFunction DiagnosticHandler = nullptr);
 
   /// Read the specified bitcode file, returning the module.
-  ErrorOr<Module *>
+  ErrorOr<std::unique_ptr<Module>>
   parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context,
                    DiagnosticHandlerFunction DiagnosticHandler = nullptr);
 
@@ -132,14 +134,8 @@ namespace llvm {
     // Must contain the header!
     if (BufEnd-BufPtr < KnownHeaderSize) return true;
 
-    unsigned Offset = ( BufPtr[OffsetField  ]        |
-                       (BufPtr[OffsetField+1] << 8)  |
-                       (BufPtr[OffsetField+2] << 16) |
-                       (BufPtr[OffsetField+3] << 24));
-    unsigned Size   = ( BufPtr[SizeField    ]        |
-                       (BufPtr[SizeField  +1] << 8)  |
-                       (BufPtr[SizeField  +2] << 16) |
-                       (BufPtr[SizeField  +3] << 24));
+    unsigned Offset = support::endian::read32le(&BufPtr[OffsetField]);
+    unsigned Size = support::endian::read32le(&BufPtr[SizeField]);
 
     // Verify that Offset+Size fits in the file.
     if (VerifyBufferSize && Offset+Size > unsigned(BufEnd-BufPtr))
@@ -170,7 +166,7 @@ namespace llvm {
     }
   };
 
-} // End llvm namespace
+} // namespace llvm
 
 namespace std {
 template <> struct is_error_code_enum<llvm::BitcodeError> : std::true_type {};
diff --git a/contrib/llvm/include/llvm/CodeGen/Analysis.h b/contrib/llvm/include/llvm/CodeGen/Analysis.h
index c4b94ed..96e9554 100644
--- a/contrib/llvm/include/llvm/CodeGen/Analysis.h
+++ b/contrib/llvm/include/llvm/CodeGen/Analysis.h
@@ -115,6 +115,6 @@ bool returnTypeIsEligibleForTailCall(const Function *F,
 // or we are in LTO.
 bool canBeOmittedFromSymbolTable(const GlobalValue *GV);
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h b/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h
index 47201e2..8a0989f 100644
--- a/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -418,16 +418,17 @@ public:
   /// Emit reference to a ttype global with a specified encoding.
   void EmitTTypeReference(const GlobalValue *GV, unsigned Encoding) const;
 
-  /// Emit the 4-byte offset of Label from the start of its section.  This can
-  /// be done with a special directive if the target supports it (e.g. cygwin)
-  /// or by emitting it as an offset from a label at the start of the section.
-  void emitSectionOffset(const MCSymbol *Label) const;
+  /// Emit a reference to a symbol for use in dwarf. Different object formats
+  /// represent this in different ways. Some use a relocation others encode
+  /// the label offset in its section.
+  void emitDwarfSymbolReference(const MCSymbol *Label,
+                                bool ForceOffset = false) const;
 
   /// Emit the 4-byte offset of a string from the start of its section.
   ///
   /// When possible, emit a DwarfStringPool section offset without any
   /// relocations, and without using the symbol.  Otherwise, defers to \a
-  /// emitSectionOffset().
+  /// emitDwarfSymbolReference().
   void emitDwarfStringOffset(DwarfStringPoolEntryRef S) const;
 
   /// Get the value for DW_AT_APPLE_isa. Zero if no isa encoding specified.
@@ -534,6 +535,6 @@ private:
   void EmitXXStructorList(const Constant *List, bool isCtor);
   GCMetadataPrinter *GetOrCreateGCPrinter(GCStrategy &C);
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 3e464f4..cb61cc7 100644
--- a/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -830,6 +830,6 @@ public:
   }
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/CalcSpillWeights.h b/contrib/llvm/include/llvm/CodeGen/CalcSpillWeights.h
index 91fb0a9..7c90190 100644
--- a/contrib/llvm/include/llvm/CodeGen/CalcSpillWeights.h
+++ b/contrib/llvm/include/llvm/CodeGen/CalcSpillWeights.h
@@ -74,6 +74,6 @@ namespace llvm {
                                      const MachineBlockFrequencyInfo &MBFI,
                                      VirtRegAuxInfo::NormalizingFn norm =
                                          normalizeSpillWeight);
-}
+} // namespace llvm
 
 #endif // LLVM_CODEGEN_CALCSPILLWEIGHTS_H
diff --git a/contrib/llvm/include/llvm/CodeGen/CommandFlags.h b/contrib/llvm/include/llvm/CodeGen/CommandFlags.h
index b824df3..3c3f770 100644
--- a/contrib/llvm/include/llvm/CodeGen/CommandFlags.h
+++ b/contrib/llvm/include/llvm/CodeGen/CommandFlags.h
@@ -16,6 +16,7 @@
 #ifndef LLVM_CODEGEN_COMMANDFLAGS_H
 #define LLVM_CODEGEN_COMMANDFLAGS_H
 
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/IR/Module.h"
 #include "llvm/MC/MCTargetOptionsCommandFlags.h"
 #include "llvm//MC/SubtargetFeature.h"
@@ -150,7 +151,7 @@ FuseFPOps("fp-contract",
               clEnumValN(FPOpFusion::Standard, "on",
                          "Only fuse 'blessed' FP ops."),
               clEnumValN(FPOpFusion::Strict, "off",
-                         "Only fuse FP ops when the result won't be effected."),
+                         "Only fuse FP ops when the result won't be affected."),
               clEnumValEnd));
 
 cl::list<std::string>
@@ -247,7 +248,6 @@ static inline TargetOptions InitTargetOptionsFromCodeGenFlags() {
     Options.FloatABIType = FloatABIForCalls;
   Options.NoZerosInBSS = DontPlaceZerosInBSS;
   Options.GuaranteedTailCallOpt = EnableGuaranteedTailCallOpt;
-  Options.DisableTailCalls = DisableTailCalls;
   Options.StackAlignmentOverride = OverrideStackAlignment;
   Options.TrapFuncName = TrapFuncName;
   Options.PositionIndependentExecutable = EnablePIE;
@@ -315,6 +315,11 @@ static inline void setFunctionAttributes(StringRef CPU, StringRef Features,
                                        "no-frame-pointer-elim",
                                        DisableFPElim ? "true" : "false");
 
+    if (DisableTailCalls.getNumOccurrences() > 0)
+      NewAttrs = NewAttrs.addAttribute(Ctx, AttributeSet::FunctionIndex,
+                                       "disable-tail-calls",
+                                       toStringRef(DisableTailCalls));
+
     // Let NewAttrs override Attrs.
     NewAttrs = Attrs.addAttributes(Ctx, AttributeSet::FunctionIndex, NewAttrs);
     F.setAttributes(NewAttrs);
diff --git a/contrib/llvm/include/llvm/CodeGen/DFAPacketizer.h b/contrib/llvm/include/llvm/CodeGen/DFAPacketizer.h
index f9cdc2a..ccff388 100644
--- a/contrib/llvm/include/llvm/CodeGen/DFAPacketizer.h
+++ b/contrib/llvm/include/llvm/CodeGen/DFAPacketizer.h
@@ -159,6 +159,6 @@ public:
   }
 
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/DIE.h b/contrib/llvm/include/llvm/CodeGen/DIE.h
index 464e0fa..1ea3217 100644
--- a/contrib/llvm/include/llvm/CodeGen/DIE.h
+++ b/contrib/llvm/include/llvm/CodeGen/DIE.h
@@ -635,6 +635,6 @@ public:
 #endif
 };
 
-} // end llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/FaultMaps.h b/contrib/llvm/include/llvm/CodeGen/FaultMaps.h
new file mode 100644
index 0000000..d5c2fee
--- /dev/null
+++ b/contrib/llvm/include/llvm/CodeGen/FaultMaps.h
@@ -0,0 +1,73 @@
+//===------------------- FaultMaps.h - StackMaps ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_FAULTMAPS_H
+#define LLVM_CODEGEN_FAULTMAPS_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/MC/MCSymbol.h"
+
+#include <vector>
+#include <map>
+
+namespace llvm {
+
+class AsmPrinter;
+class MCExpr;
+class MCSymbol;
+class MCStreamer;
+
+class FaultMaps {
+public:
+  enum FaultKind { FaultingLoad = 1, FaultKindMax };
+
+  static const char *faultTypeToString(FaultKind);
+
+  explicit FaultMaps(AsmPrinter &AP);
+
+  void recordFaultingOp(FaultKind FaultTy, const MCSymbol *HandlerLabel);
+  void serializeToFaultMapSection();
+
+private:
+  static const char *WFMP;
+
+  struct FaultInfo {
+    FaultKind Kind;
+    const MCExpr *FaultingOffsetExpr;
+    const MCExpr *HandlerOffsetExpr;
+
+    FaultInfo()
+        : Kind(FaultKindMax), FaultingOffsetExpr(nullptr),
+          HandlerOffsetExpr(nullptr) {}
+
+    explicit FaultInfo(FaultMaps::FaultKind Kind, const MCExpr *FaultingOffset,
+                       const MCExpr *HandlerOffset)
+        : Kind(Kind), FaultingOffsetExpr(FaultingOffset),
+          HandlerOffsetExpr(HandlerOffset) {}
+  };
+
+  typedef std::vector<FaultInfo> FunctionFaultInfos;
+
+  // We'd like to keep a stable iteration order for FunctionInfos to help
+  // FileCheck based testing.
+  struct MCSymbolComparator {
+    bool operator()(const MCSymbol *LHS, const MCSymbol *RHS) const {
+      return LHS->getName() < RHS->getName();
+    }
+  };
+
+  std::map<const MCSymbol *, FunctionFaultInfos, MCSymbolComparator>
+      FunctionInfos;
+  AsmPrinter &AP;
+
+  void emitFunctionInfo(const MCSymbol *FnLabel, const FunctionFaultInfos &FFI);
+};
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/CodeGen/GCMetadata.h b/contrib/llvm/include/llvm/CodeGen/GCMetadata.h
index e883bd1..b34f67a 100644
--- a/contrib/llvm/include/llvm/CodeGen/GCMetadata.h
+++ b/contrib/llvm/include/llvm/CodeGen/GCMetadata.h
@@ -201,6 +201,6 @@ public:
   /// will soon change.
   GCFunctionInfo &getFunctionInfo(const Function &F);
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/GCMetadataPrinter.h b/contrib/llvm/include/llvm/CodeGen/GCMetadataPrinter.h
index 2208470..e451cd2 100644
--- a/contrib/llvm/include/llvm/CodeGen/GCMetadataPrinter.h
+++ b/contrib/llvm/include/llvm/CodeGen/GCMetadataPrinter.h
@@ -59,6 +59,6 @@ public:
 
   virtual ~GCMetadataPrinter();
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/GCStrategy.h b/contrib/llvm/include/llvm/CodeGen/GCStrategy.h
index a1b8e89..2a4dabb 100644
--- a/contrib/llvm/include/llvm/CodeGen/GCStrategy.h
+++ b/contrib/llvm/include/llvm/CodeGen/GCStrategy.h
@@ -172,6 +172,6 @@ public:
 /// register your GCMetadataPrinter subclass with the
 /// GCMetadataPrinterRegistery as well.
 typedef Registry<GCStrategy> GCRegistry;
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/GCs.h b/contrib/llvm/include/llvm/CodeGen/GCs.h
index 5207f80..5418fff 100644
--- a/contrib/llvm/include/llvm/CodeGen/GCs.h
+++ b/contrib/llvm/include/llvm/CodeGen/GCs.h
@@ -41,6 +41,6 @@ void linkErlangGCPrinter();
 void linkShadowStackGC();
 
 void linkStatepointExampleGC();
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/ISDOpcodes.h b/contrib/llvm/include/llvm/CodeGen/ISDOpcodes.h
index c2071fe..5a1cf59 100644
--- a/contrib/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/contrib/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -890,8 +890,8 @@ namespace ISD {
     CVT_INVALID /// Marker - Invalid opcode
   };
 
-} // end llvm::ISD namespace
+} // namespace ISD
 
-} // end llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/IntrinsicLowering.h b/contrib/llvm/include/llvm/CodeGen/IntrinsicLowering.h
index 9e6ab7d..a764645 100644
--- a/contrib/llvm/include/llvm/CodeGen/IntrinsicLowering.h
+++ b/contrib/llvm/include/llvm/CodeGen/IntrinsicLowering.h
@@ -54,6 +54,6 @@ namespace llvm {
     /// simple integer bswap.
     static bool LowerToByteSwap(CallInst *CI);
   };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/LatencyPriorityQueue.h b/contrib/llvm/include/llvm/CodeGen/LatencyPriorityQueue.h
index f347f66..cc33f34 100644
--- a/contrib/llvm/include/llvm/CodeGen/LatencyPriorityQueue.h
+++ b/contrib/llvm/include/llvm/CodeGen/LatencyPriorityQueue.h
@@ -93,6 +93,6 @@ private:
     void AdjustPriorityOfUnscheduledPreds(SUnit *SU);
     SUnit *getSingleUnscheduledPred(SUnit *SU);
   };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/LexicalScopes.h b/contrib/llvm/include/llvm/CodeGen/LexicalScopes.h
index 7d7e48a..7478c3a 100644
--- a/contrib/llvm/include/llvm/CodeGen/LexicalScopes.h
+++ b/contrib/llvm/include/llvm/CodeGen/LexicalScopes.h
@@ -252,6 +252,6 @@ private:
   LexicalScope *CurrentFnLexicalScope;
 };
 
-} // end llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/LiveInterval.h b/contrib/llvm/include/llvm/CodeGen/LiveInterval.h
index 9b8b91c..ea44ab1 100644
--- a/contrib/llvm/include/llvm/CodeGen/LiveInterval.h
+++ b/contrib/llvm/include/llvm/CodeGen/LiveInterval.h
@@ -866,5 +866,5 @@ namespace llvm {
 
   };
 
-}
+} // namespace llvm
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/LiveIntervalAnalysis.h b/contrib/llvm/include/llvm/CodeGen/LiveIntervalAnalysis.h
index 9673f80..9d68841 100644
--- a/contrib/llvm/include/llvm/CodeGen/LiveIntervalAnalysis.h
+++ b/contrib/llvm/include/llvm/CodeGen/LiveIntervalAnalysis.h
@@ -444,6 +444,6 @@ extern cl::opt<bool> UseSegmentSetForPhysRegs;
 
     class HMEditor;
   };
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/LiveRangeEdit.h b/contrib/llvm/include/llvm/CodeGen/LiveRangeEdit.h
index c97c636..f04efc3 100644
--- a/contrib/llvm/include/llvm/CodeGen/LiveRangeEdit.h
+++ b/contrib/llvm/include/llvm/CodeGen/LiveRangeEdit.h
@@ -228,6 +228,6 @@ public:
                                 const MachineBlockFrequencyInfo&);
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/LiveStackAnalysis.h b/contrib/llvm/include/llvm/CodeGen/LiveStackAnalysis.h
index f495507..b4808ab 100644
--- a/contrib/llvm/include/llvm/CodeGen/LiveStackAnalysis.h
+++ b/contrib/llvm/include/llvm/CodeGen/LiveStackAnalysis.h
@@ -95,6 +95,6 @@ namespace llvm {
     /// print - Implement the dump method.
     void print(raw_ostream &O, const Module* = nullptr) const override;
   };
-}
+} // namespace llvm
 
 #endif /* LLVM_CODEGEN_LIVESTACK_ANALYSIS_H */
diff --git a/contrib/llvm/include/llvm/CodeGen/LiveVariables.h b/contrib/llvm/include/llvm/CodeGen/LiveVariables.h
index 55b97dc..334e8c5 100644
--- a/contrib/llvm/include/llvm/CodeGen/LiveVariables.h
+++ b/contrib/llvm/include/llvm/CodeGen/LiveVariables.h
@@ -306,6 +306,6 @@ public:
   void setPHIJoin(unsigned Reg) { PHIJoins.set(Reg); }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h b/contrib/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h
index 710b2d4..67b756d 100644
--- a/contrib/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h
+++ b/contrib/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h
@@ -19,33 +19,62 @@
 #define LLVM_CODEGEN_MIRPARSER_MIRPARSER_H
 
 #include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineFunctionInitializer.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include <memory>
 
 namespace llvm {
 
+class MIRParserImpl;
 class SMDiagnostic;
 
+/// This class initializes machine functions by applying the state loaded from
+/// a MIR file.
+class MIRParser : public MachineFunctionInitializer {
+  std::unique_ptr<MIRParserImpl> Impl;
+
+public:
+  MIRParser(std::unique_ptr<MIRParserImpl> Impl);
+  MIRParser(const MIRParser &) = delete;
+  ~MIRParser();
+
+  /// Parse the optional LLVM IR module that's embedded in the MIR file.
+  ///
+  /// A new, empty module is created if the LLVM IR isn't present.
+  /// Returns null if a parsing error occurred.
+  std::unique_ptr<Module> parseLLVMModule();
+
+  /// Initialize the machine function to the state that's described in the MIR
+  /// file.
+  ///
+  /// Return true if error occurred.
+  bool initializeMachineFunction(MachineFunction &MF) override;
+};
+
 /// This function is the main interface to the MIR serialization format parser.
 ///
-/// It reads a YAML file that has an optional LLVM IR and returns an LLVM
-/// module.
+/// It reads in a MIR file and returns a MIR parser that can parse the embedded
+/// LLVM IR module and initialize the machine functions by parsing the machine
+/// function's state.
+///
 /// \param Filename - The name of the file to parse.
 /// \param Error - Error result info.
-/// \param Context - Context in which to allocate globals info.
-std::unique_ptr<Module> parseMIRFile(StringRef Filename, SMDiagnostic &Error,
-                                     LLVMContext &Context);
+/// \param Context - Context which will be used for the parsed LLVM IR module.
+std::unique_ptr<MIRParser> createMIRParserFromFile(StringRef Filename,
+                                                   SMDiagnostic &Error,
+                                                   LLVMContext &Context);
 
 /// This function is another interface to the MIR serialization format parser.
 ///
-/// It parses the optional LLVM IR in the given buffer, and returns an LLVM
-/// module.
+/// It returns a MIR parser that works with the given memory buffer and that can
+/// parse the embedded LLVM IR module and initialize the machine functions by
+/// parsing the machine function's state.
+///
 /// \param Contents - The MemoryBuffer containing the machine level IR.
-/// \param Error - Error result info.
-/// \param Context - Context in which to allocate globals info.
-std::unique_ptr<Module> parseMIR(std::unique_ptr<MemoryBuffer> Contents,
-                                 SMDiagnostic &Error, LLVMContext &Context);
+/// \param Context - Context which will be used for the parsed LLVM IR module.
+std::unique_ptr<MIRParser>
+createMIRParser(std::unique_ptr<MemoryBuffer> Contents, LLVMContext &Context);
 
 } // end namespace llvm
 
diff --git a/contrib/llvm/include/llvm/CodeGen/MIRYamlMapping.h b/contrib/llvm/include/llvm/CodeGen/MIRYamlMapping.h
index f9d4c74..b1fe47a 100644
--- a/contrib/llvm/include/llvm/CodeGen/MIRYamlMapping.h
+++ b/contrib/llvm/include/llvm/CodeGen/MIRYamlMapping.h
@@ -20,17 +20,54 @@
 
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/YAMLTraits.h"
+#include <vector>
+
+namespace llvm {
+namespace yaml {
+
+struct MachineBasicBlock {
+  std::string Name;
+  unsigned Alignment = 0;
+  bool IsLandingPad = false;
+  bool AddressTaken = false;
+  // TODO: Serialize the successors and liveins.
+  // TODO: Serialize machine instructions.
+};
+
+template <> struct MappingTraits<MachineBasicBlock> {
+  static void mapping(IO &YamlIO, MachineBasicBlock &MBB) {
+    YamlIO.mapOptional("name", MBB.Name,
+                       std::string()); // Don't print out an empty name.
+    YamlIO.mapOptional("alignment", MBB.Alignment);
+    YamlIO.mapOptional("isLandingPad", MBB.IsLandingPad);
+    YamlIO.mapOptional("addressTaken", MBB.AddressTaken);
+  }
+};
+
+} // end namespace yaml
+} // end namespace llvm
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::MachineBasicBlock)
 
 namespace llvm {
 namespace yaml {
 
 struct MachineFunction {
   StringRef Name;
+  unsigned Alignment = 0;
+  bool ExposesReturnsTwice = false;
+  bool HasInlineAsm = false;
+
+  std::vector<MachineBasicBlock> BasicBlocks;
 };
 
 template <> struct MappingTraits<MachineFunction> {
   static void mapping(IO &YamlIO, MachineFunction &MF) {
     YamlIO.mapRequired("name", MF.Name);
+    YamlIO.mapOptional("alignment", MF.Alignment);
+    YamlIO.mapOptional("exposesReturnsTwice", MF.ExposesReturnsTwice);
+    YamlIO.mapOptional("hasInlineAsm", MF.HasInlineAsm);
+    YamlIO.mapOptional("body", MF.BasicBlocks);
   }
 };
 
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/contrib/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 357aef0..619894c 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -801,6 +801,6 @@ public:
   MachineBasicBlock::iterator getInitial() { return I; }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h b/contrib/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
index feb394e..9d0a069 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
@@ -66,6 +66,6 @@ public:
 
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h b/contrib/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
index 7ba7495..da6ea1d 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
@@ -84,7 +84,7 @@ public:
                                     const MachineBasicBlock *Dst) const;
 };
 
-}
+} // namespace llvm
 
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineConstantPool.h b/contrib/llvm/include/llvm/CodeGen/MachineConstantPool.h
index c619afb..8a915fb 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineConstantPool.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineConstantPool.h
@@ -174,6 +174,6 @@ public:
   void dump() const;
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineDominanceFrontier.h b/contrib/llvm/include/llvm/CodeGen/MachineDominanceFrontier.h
index 4131194..f8dd2cd 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineDominanceFrontier.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineDominanceFrontier.h
@@ -104,6 +104,6 @@ public:
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineDominators.h b/contrib/llvm/include/llvm/CodeGen/MachineDominators.h
index 4428fa6..6518114 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineDominators.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineDominators.h
@@ -270,6 +270,6 @@ template <> struct GraphTraits<MachineDominatorTree*>
   }
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/contrib/llvm/include/llvm/CodeGen/MachineFrameInfo.h
index 3889d471..ac92a4b 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineFrameInfo.h
@@ -79,9 +79,9 @@ public:
 /// @brief Abstract Stack Frame Information
 class MachineFrameInfo {
 
-  // StackObject - Represent a single object allocated on the stack.
+  // Represent a single object allocated on the stack.
   struct StackObject {
-    // SPOffset - The offset of this object from the stack pointer on entry to
+    // The offset of this object from the stack pointer on entry to
     // the function.  This field has no meaning for a variable sized element.
     int64_t SPOffset;
 
@@ -89,23 +89,23 @@ class MachineFrameInfo {
     // ~0ULL means a dead object.
     uint64_t Size;
 
-    // Alignment - The required alignment of this stack slot.
+    // The required alignment of this stack slot.
     unsigned Alignment;
 
-    // isImmutable - If true, the value of the stack object is set before
+    // If true, the value of the stack object is set before
     // entering the function and is not modified inside the function. By
     // default, fixed objects are immutable unless marked otherwise.
     bool isImmutable;
 
-    // isSpillSlot - If true the stack object is used as spill slot. It
+    // If true the stack object is used as spill slot. It
     // cannot alias any other memory objects.
     bool isSpillSlot;
 
-    /// Alloca - If this stack object is originated from an Alloca instruction
+    /// If this stack object is originated from an Alloca instruction
     /// this value saves the original IR allocation. Can be NULL.
     const AllocaInst *Alloca;
 
-    // PreAllocated - If true, the object was mapped into the local frame
+    // If true, the object was mapped into the local frame
     // block and doesn't need additional handling for allocation beyond that.
     bool PreAllocated;
 
@@ -121,51 +121,47 @@ class MachineFrameInfo {
         isSpillSlot(isSS), Alloca(Val), PreAllocated(false), isAliased(A) {}
   };
 
-  /// StackAlignment - The alignment of the stack.
+  /// The alignment of the stack.
   unsigned StackAlignment;
 
-  /// StackRealignable - Can the stack be realigned.
+  /// Can the stack be realigned.
   bool StackRealignable;
 
-  /// Objects - The list of stack objects allocated...
-  ///
+  /// The list of stack objects allocated.
   std::vector<StackObject> Objects;
 
-  /// NumFixedObjects - This contains the number of fixed objects contained on
+  /// This contains the number of fixed objects contained on
   /// the stack.  Because fixed objects are stored at a negative index in the
   /// Objects list, this is also the index to the 0th object in the list.
-  ///
   unsigned NumFixedObjects;
 
-  /// HasVarSizedObjects - This boolean keeps track of whether any variable
+  /// This boolean keeps track of whether any variable
   /// sized objects have been allocated yet.
-  ///
   bool HasVarSizedObjects;
 
-  /// FrameAddressTaken - This boolean keeps track of whether there is a call
+  /// This boolean keeps track of whether there is a call
   /// to builtin \@llvm.frameaddress.
   bool FrameAddressTaken;
 
-  /// ReturnAddressTaken - This boolean keeps track of whether there is a call
+  /// This boolean keeps track of whether there is a call
   /// to builtin \@llvm.returnaddress.
   bool ReturnAddressTaken;
 
-  /// HasStackMap - This boolean keeps track of whether there is a call
+  /// This boolean keeps track of whether there is a call
   /// to builtin \@llvm.experimental.stackmap.
   bool HasStackMap;
 
-  /// HasPatchPoint - This boolean keeps track of whether there is a call
+  /// This boolean keeps track of whether there is a call
   /// to builtin \@llvm.experimental.patchpoint.
   bool HasPatchPoint;
 
-  /// StackSize - The prolog/epilog code inserter calculates the final stack
+  /// The prolog/epilog code inserter calculates the final stack
   /// offsets for all of the fixed size objects, updating the Objects list
   /// above.  It then updates StackSize to contain the number of bytes that need
   /// to be allocated on entry to the function.
-  ///
   uint64_t StackSize;
 
-  /// OffsetAdjustment - The amount that a frame offset needs to be adjusted to
+  /// The amount that a frame offset needs to be adjusted to
   /// have the actual offset from the stack/frame pointer.  The exact usage of
   /// this is target-dependent, but it is typically used to adjust between
   /// SP-relative and FP-relative offsets.  E.G., if objects are accessed via
@@ -176,52 +172,49 @@ class MachineFrameInfo {
   /// corresponding adjustments are performed directly.
   int OffsetAdjustment;
 
-  /// MaxAlignment - The prolog/epilog code inserter may process objects
-  /// that require greater alignment than the default alignment the target
-  /// provides. To handle this, MaxAlignment is set to the maximum alignment
+  /// The prolog/epilog code inserter may process objects that require greater
+  /// alignment than the default alignment the target provides.
+  /// To handle this, MaxAlignment is set to the maximum alignment
   /// needed by the objects on the current frame.  If this is greater than the
   /// native alignment maintained by the compiler, dynamic alignment code will
   /// be needed.
   ///
   unsigned MaxAlignment;
 
-  /// AdjustsStack - Set to true if this function adjusts the stack -- e.g.,
+  /// Set to true if this function adjusts the stack -- e.g.,
   /// when calling another function. This is only valid during and after
   /// prolog/epilog code insertion.
   bool AdjustsStack;
 
-  /// HasCalls - Set to true if this function has any function calls.
+  /// Set to true if this function has any function calls.
   bool HasCalls;
 
-  /// StackProtectorIdx - The frame index for the stack protector.
+  /// The frame index for the stack protector.
   int StackProtectorIdx;
 
-  /// FunctionContextIdx - The frame index for the function context. Used for
-  /// SjLj exceptions.
+  /// The frame index for the function context. Used for SjLj exceptions.
   int FunctionContextIdx;
 
-  /// MaxCallFrameSize - This contains the size of the largest call frame if the
-  /// target uses frame setup/destroy pseudo instructions (as defined in the
-  /// TargetFrameInfo class).  This information is important for frame pointer
-  /// elimination.  If is only valid during and after prolog/epilog code
-  /// insertion.
-  ///
+  /// This contains the size of the largest call frame if the target uses frame
+  /// setup/destroy pseudo instructions (as defined in the TargetFrameInfo
+  /// class).  This information is important for frame pointer elimination.
+  /// If is only valid during and after prolog/epilog code insertion.
   unsigned MaxCallFrameSize;
 
-  /// CSInfo - The prolog/epilog code inserter fills in this vector with each
+  /// The prolog/epilog code inserter fills in this vector with each
   /// callee saved register saved in the frame.  Beyond its use by the prolog/
   /// epilog code inserter, this data used for debug info and exception
   /// handling.
   std::vector<CalleeSavedInfo> CSInfo;
 
-  /// CSIValid - Has CSInfo been set yet?
+  /// Has CSInfo been set yet?
   bool CSIValid;
 
-  /// LocalFrameObjects - References to frame indices which are mapped
+  /// References to frame indices which are mapped
   /// into the local frame allocation block. <FrameIdx, LocalOffset>
   SmallVector<std::pair<int, int64_t>, 32> LocalFrameObjects;
 
-  /// LocalFrameSize - Size of the pre-allocated local frame block.
+  /// Size of the pre-allocated local frame block.
   int64_t LocalFrameSize;
 
   /// Required alignment of the local object blob, which is the strictest
@@ -284,101 +277,90 @@ public:
     HasTailCall = false;
   }
 
-  /// hasStackObjects - Return true if there are any stack objects in this
-  /// function.
-  ///
+  /// Return true if there are any stack objects in this function.
   bool hasStackObjects() const { return !Objects.empty(); }
 
-  /// hasVarSizedObjects - This method may be called any time after instruction
+  /// This method may be called any time after instruction
   /// selection is complete to determine if the stack frame for this function
   /// contains any variable sized objects.
-  ///
   bool hasVarSizedObjects() const { return HasVarSizedObjects; }
 
-  /// getStackProtectorIndex/setStackProtectorIndex - Return the index for the
-  /// stack protector object.
-  ///
+  /// Return the index for the stack protector object.
   int getStackProtectorIndex() const { return StackProtectorIdx; }
   void setStackProtectorIndex(int I) { StackProtectorIdx = I; }
 
-  /// getFunctionContextIndex/setFunctionContextIndex - Return the index for the
-  /// function context object. This object is used for SjLj exceptions.
+  /// Return the index for the function context object.
+  /// This object is used for SjLj exceptions.
   int getFunctionContextIndex() const { return FunctionContextIdx; }
   void setFunctionContextIndex(int I) { FunctionContextIdx = I; }
 
-  /// isFrameAddressTaken - This method may be called any time after instruction
+  /// This method may be called any time after instruction
   /// selection is complete to determine if there is a call to
   /// \@llvm.frameaddress in this function.
   bool isFrameAddressTaken() const { return FrameAddressTaken; }
   void setFrameAddressIsTaken(bool T) { FrameAddressTaken = T; }
 
-  /// isReturnAddressTaken - This method may be called any time after
+  /// This method may be called any time after
   /// instruction selection is complete to determine if there is a call to
   /// \@llvm.returnaddress in this function.
   bool isReturnAddressTaken() const { return ReturnAddressTaken; }
   void setReturnAddressIsTaken(bool s) { ReturnAddressTaken = s; }
 
-  /// hasStackMap - This method may be called any time after instruction
+  /// This method may be called any time after instruction
   /// selection is complete to determine if there is a call to builtin
   /// \@llvm.experimental.stackmap.
   bool hasStackMap() const { return HasStackMap; }
   void setHasStackMap(bool s = true) { HasStackMap = s; }
 
-  /// hasPatchPoint - This method may be called any time after instruction
+  /// This method may be called any time after instruction
   /// selection is complete to determine if there is a call to builtin
   /// \@llvm.experimental.patchpoint.
   bool hasPatchPoint() const { return HasPatchPoint; }
   void setHasPatchPoint(bool s = true) { HasPatchPoint = s; }
 
-  /// getObjectIndexBegin - Return the minimum frame object index.
-  ///
+  /// Return the minimum frame object index.
   int getObjectIndexBegin() const { return -NumFixedObjects; }
 
-  /// getObjectIndexEnd - Return one past the maximum frame object index.
-  ///
+  /// Return one past the maximum frame object index.
   int getObjectIndexEnd() const { return (int)Objects.size()-NumFixedObjects; }
 
-  /// getNumFixedObjects - Return the number of fixed objects.
+  /// Return the number of fixed objects.
   unsigned getNumFixedObjects() const { return NumFixedObjects; }
 
-  /// getNumObjects - Return the number of objects.
-  ///
+  /// Return the number of objects.
   unsigned getNumObjects() const { return Objects.size(); }
 
-  /// mapLocalFrameObject - Map a frame index into the local object block
+  /// Map a frame index into the local object block
   void mapLocalFrameObject(int ObjectIndex, int64_t Offset) {
     LocalFrameObjects.push_back(std::pair<int, int64_t>(ObjectIndex, Offset));
     Objects[ObjectIndex + NumFixedObjects].PreAllocated = true;
   }
 
-  /// getLocalFrameObjectMap - Get the local offset mapping for a for an object
+  /// Get the local offset mapping for a for an object.
   std::pair<int, int64_t> getLocalFrameObjectMap(int i) {
     assert (i >= 0 && (unsigned)i < LocalFrameObjects.size() &&
             "Invalid local object reference!");
     return LocalFrameObjects[i];
   }
 
-  /// getLocalFrameObjectCount - Return the number of objects allocated into
-  /// the local object block.
+  /// Return the number of objects allocated into the local object block.
   int64_t getLocalFrameObjectCount() { return LocalFrameObjects.size(); }
 
-  /// setLocalFrameSize - Set the size of the local object blob.
+  /// Set the size of the local object blob.
   void setLocalFrameSize(int64_t sz) { LocalFrameSize = sz; }
 
-  /// getLocalFrameSize - Get the size of the local object blob.
+  /// Get the size of the local object blob.
   int64_t getLocalFrameSize() const { return LocalFrameSize; }
 
-  /// setLocalFrameMaxAlign - Required alignment of the local object blob,
+  /// Required alignment of the local object blob,
   /// which is the strictest alignment of any object in it.
   void setLocalFrameMaxAlign(unsigned Align) { LocalFrameMaxAlign = Align; }
 
-  /// getLocalFrameMaxAlign - Return the required alignment of the local
-  /// object blob.
+  /// Return the required alignment of the local object blob.
   unsigned getLocalFrameMaxAlign() const { return LocalFrameMaxAlign; }
 
-  /// getUseLocalStackAllocationBlock - Get whether the local allocation blob
-  /// should be allocated together or let PEI allocate the locals in it
-  /// directly.
+  /// Get whether the local allocation blob should be allocated together or
+  /// let PEI allocate the locals in it directly.
   bool getUseLocalStackAllocationBlock() {return UseLocalStackAllocationBlock;}
 
   /// setUseLocalStackAllocationBlock - Set whether the local allocation blob
@@ -388,30 +370,28 @@ public:
     UseLocalStackAllocationBlock = v;
   }
 
-  /// isObjectPreAllocated - Return true if the object was pre-allocated into
-  /// the local block.
+  /// Return true if the object was pre-allocated into the local block.
   bool isObjectPreAllocated(int ObjectIdx) const {
     assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
            "Invalid Object Idx!");
     return Objects[ObjectIdx+NumFixedObjects].PreAllocated;
   }
 
-  /// getObjectSize - Return the size of the specified object.
-  ///
+  /// Return the size of the specified object.
   int64_t getObjectSize(int ObjectIdx) const {
     assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
            "Invalid Object Idx!");
     return Objects[ObjectIdx+NumFixedObjects].Size;
   }
 
-  /// setObjectSize - Change the size of the specified stack object.
+  /// Change the size of the specified stack object.
   void setObjectSize(int ObjectIdx, int64_t Size) {
     assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
            "Invalid Object Idx!");
     Objects[ObjectIdx+NumFixedObjects].Size = Size;
   }
 
-  /// getObjectAlignment - Return the alignment of the specified stack object.
+  /// Return the alignment of the specified stack object.
   unsigned getObjectAlignment(int ObjectIdx) const {
     assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
            "Invalid Object Idx!");
@@ -426,7 +406,7 @@ public:
     ensureMaxAlignment(Align);
   }
 
-  /// getObjectAllocation - Return the underlying Alloca of the specified
+  /// Return the underlying Alloca of the specified
   /// stack object if it exists. Returns 0 if none exists.
   const AllocaInst* getObjectAllocation(int ObjectIdx) const {
     assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
@@ -434,9 +414,8 @@ public:
     return Objects[ObjectIdx+NumFixedObjects].Alloca;
   }
 
-  /// getObjectOffset - Return the assigned stack offset of the specified object
+  /// Return the assigned stack offset of the specified object
   /// from the incoming stack pointer.
-  ///
   int64_t getObjectOffset(int ObjectIdx) const {
     assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
            "Invalid Object Idx!");
@@ -445,9 +424,8 @@ public:
     return Objects[ObjectIdx+NumFixedObjects].SPOffset;
   }
 
-  /// setObjectOffset - Set the stack frame offset of the specified object.  The
+  /// Set the stack frame offset of the specified object. The
   /// offset is relative to the stack pointer on entry to the function.
-  ///
   void setObjectOffset(int ObjectIdx, int64_t SPOffset) {
     assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
            "Invalid Object Idx!");
@@ -456,44 +434,37 @@ public:
     Objects[ObjectIdx+NumFixedObjects].SPOffset = SPOffset;
   }
 
-  /// getStackSize - Return the number of bytes that must be allocated to hold
+  /// Return the number of bytes that must be allocated to hold
   /// all of the fixed size frame objects.  This is only valid after
   /// Prolog/Epilog code insertion has finalized the stack frame layout.
-  ///
   uint64_t getStackSize() const { return StackSize; }
 
-  /// setStackSize - Set the size of the stack...
-  ///
+  /// Set the size of the stack.
   void setStackSize(uint64_t Size) { StackSize = Size; }
 
   /// Estimate and return the size of the stack frame.
   unsigned estimateStackSize(const MachineFunction &MF) const;
 
-  /// getOffsetAdjustment - Return the correction for frame offsets.
-  ///
+  /// Return the correction for frame offsets.
   int getOffsetAdjustment() const { return OffsetAdjustment; }
 
-  /// setOffsetAdjustment - Set the correction for frame offsets.
-  ///
+  /// Set the correction for frame offsets.
   void setOffsetAdjustment(int Adj) { OffsetAdjustment = Adj; }
 
-  /// getMaxAlignment - Return the alignment in bytes that this function must be
-  /// aligned to, which is greater than the default stack alignment provided by
-  /// the target.
-  ///
+  /// Return the alignment in bytes that this function must be aligned to,
+  /// which is greater than the default stack alignment provided by the target.
   unsigned getMaxAlignment() const { return MaxAlignment; }
 
-  /// ensureMaxAlignment - Make sure the function is at least Align bytes
-  /// aligned.
+  /// Make sure the function is at least Align bytes aligned.
   void ensureMaxAlignment(unsigned Align);
 
-  /// AdjustsStack - Return true if this function adjusts the stack -- e.g.,
+  /// Return true if this function adjusts the stack -- e.g.,
   /// when calling another function. This is only valid during and after
   /// prolog/epilog code insertion.
   bool adjustsStack() const { return AdjustsStack; }
   void setAdjustsStack(bool V) { AdjustsStack = V; }
 
-  /// hasCalls - Return true if the current function has any function calls.
+  /// Return true if the current function has any function calls.
   bool hasCalls() const { return HasCalls; }
   void setHasCalls(bool V) { HasCalls = V; }
 
@@ -513,7 +484,7 @@ public:
   bool hasTailCall() const { return HasTailCall; }
   void setHasTailCall() { HasTailCall = true; }
 
-  /// getMaxCallFrameSize - Return the maximum size of a call frame that must be
+  /// Return the maximum size of a call frame that must be
   /// allocated for an outgoing function call.  This is only available if
   /// CallFrameSetup/Destroy pseudo instructions are used by the target, and
   /// then only during or after prolog/epilog code insertion.
@@ -521,25 +492,23 @@ public:
   unsigned getMaxCallFrameSize() const { return MaxCallFrameSize; }
   void setMaxCallFrameSize(unsigned S) { MaxCallFrameSize = S; }
 
-  /// CreateFixedObject - Create a new object at a fixed location on the stack.
+  /// Create a new object at a fixed location on the stack.
   /// All fixed objects should be created before other objects are created for
   /// efficiency. By default, fixed objects are not pointed to by LLVM IR
   /// values. This returns an index with a negative value.
-  ///
   int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool Immutable,
                         bool isAliased = false);
 
-  /// CreateFixedSpillStackObject - Create a spill slot at a fixed location
-  /// on the stack.  Returns an index with a negative value.
+  /// Create a spill slot at a fixed location on the stack.
+  /// Returns an index with a negative value.
   int CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset);
 
-  /// isFixedObjectIndex - Returns true if the specified index corresponds to a
-  /// fixed stack object.
+  /// Returns true if the specified index corresponds to a fixed stack object.
   bool isFixedObjectIndex(int ObjectIdx) const {
     return ObjectIdx < 0 && (ObjectIdx >= -(int)NumFixedObjects);
   }
 
-  /// isAliasedObjectIndex - Returns true if the specified index corresponds
+  /// Returns true if the specified index corresponds
   /// to an object that might be pointed to by an LLVM IR value.
   bool isAliasedObjectIndex(int ObjectIdx) const {
     assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
@@ -558,61 +527,52 @@ public:
     return Objects[ObjectIdx+NumFixedObjects].isImmutable;
   }
 
-  /// isSpillSlotObjectIndex - Returns true if the specified index corresponds
-  /// to a spill slot..
+  /// Returns true if the specified index corresponds to a spill slot.
   bool isSpillSlotObjectIndex(int ObjectIdx) const {
     assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
            "Invalid Object Idx!");
     return Objects[ObjectIdx+NumFixedObjects].isSpillSlot;
   }
 
-  /// isDeadObjectIndex - Returns true if the specified index corresponds to
-  /// a dead object.
+  /// Returns true if the specified index corresponds to a dead object.
   bool isDeadObjectIndex(int ObjectIdx) const {
     assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
            "Invalid Object Idx!");
     return Objects[ObjectIdx+NumFixedObjects].Size == ~0ULL;
   }
 
-  /// CreateStackObject - Create a new statically sized stack object, returning
+  /// Create a new statically sized stack object, returning
   /// a nonnegative identifier to represent it.
-  ///
   int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS,
                         const AllocaInst *Alloca = nullptr);
 
-  /// CreateSpillStackObject - Create a new statically sized stack object that
-  /// represents a spill slot, returning a nonnegative identifier to represent
-  /// it.
-  ///
+  /// Create a new statically sized stack object that represents a spill slot,
+  /// returning a nonnegative identifier to represent it.
   int CreateSpillStackObject(uint64_t Size, unsigned Alignment);
 
-  /// RemoveStackObject - Remove or mark dead a statically sized stack object.
-  ///
+  /// Remove or mark dead a statically sized stack object.
   void RemoveStackObject(int ObjectIdx) {
     // Mark it dead.
     Objects[ObjectIdx+NumFixedObjects].Size = ~0ULL;
   }
 
-  /// CreateVariableSizedObject - Notify the MachineFrameInfo object that a
-  /// variable sized object has been created.  This must be created whenever a
-  /// variable sized object is created, whether or not the index returned is
-  /// actually used.
-  ///
+  /// Notify the MachineFrameInfo object that a variable sized object has been
+  /// created.  This must be created whenever a variable sized object is
+  /// created, whether or not the index returned is actually used.
   int CreateVariableSizedObject(unsigned Alignment, const AllocaInst *Alloca);
 
-  /// getCalleeSavedInfo - Returns a reference to call saved info vector for the
-  /// current function.
+  /// Returns a reference to call saved info vector for the current function.
   const std::vector<CalleeSavedInfo> &getCalleeSavedInfo() const {
     return CSInfo;
   }
 
-  /// setCalleeSavedInfo - Used by prolog/epilog inserter to set the function's
-  /// callee saved information.
+  /// Used by prolog/epilog inserter to set the function's callee saved
+  /// information.
   void setCalleeSavedInfo(const std::vector<CalleeSavedInfo> &CSI) {
     CSInfo = CSI;
   }
 
-  /// isCalleeSavedInfoValid - Has the callee saved info been calculated yet?
+  /// Has the callee saved info been calculated yet?
   bool isCalleeSavedInfoValid() const { return CSIValid; }
 
   void setCalleeSavedInfoValid(bool v) { CSIValid = v; }
@@ -632,15 +592,14 @@ public:
   /// method always returns an empty set.
   BitVector getPristineRegs(const MachineFunction &MF) const;
 
-  /// print - Used by the MachineFunction printer to print information about
-  /// stack objects. Implemented in MachineFunction.cpp
-  ///
+  /// Used by the MachineFunction printer to print information about
+  /// stack objects. Implemented in MachineFunction.cpp.
   void print(const MachineFunction &MF, raw_ostream &OS) const;
 
   /// dump - Print the function to stderr.
   void dump(const MachineFunction &MF) const;
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineFunction.h b/contrib/llvm/include/llvm/CodeGen/MachineFunction.h
index 94610ca..d838cad 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineFunction.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineFunction.h
@@ -546,6 +546,6 @@ template <> struct GraphTraits<Inverse<const MachineFunction*> > :
   }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h b/contrib/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h
index 023eeb1..576e72b 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h
@@ -19,6 +19,7 @@
 namespace llvm {
 
 class MachineFunction;
+class MachineFunctionInitializer;
 class TargetMachine;
 
 /// MachineFunctionAnalysis - This class is a Pass that manages a
@@ -28,9 +29,12 @@ private:
   const TargetMachine &TM;
   MachineFunction *MF;
   unsigned NextFnNum;
+  MachineFunctionInitializer *MFInitializer;
+
 public:
   static char ID;
-  explicit MachineFunctionAnalysis(const TargetMachine &tm);
+  explicit MachineFunctionAnalysis(const TargetMachine &tm,
+                                   MachineFunctionInitializer *MFInitializer);
   ~MachineFunctionAnalysis() override;
 
   MachineFunction &getMF() const { return *MF; }
@@ -46,6 +50,6 @@ private:
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineFunctionInitializer.h b/contrib/llvm/include/llvm/CodeGen/MachineFunctionInitializer.h
new file mode 100644
index 0000000..ff4c29c
--- /dev/null
+++ b/contrib/llvm/include/llvm/CodeGen/MachineFunctionInitializer.h
@@ -0,0 +1,38 @@
+//===- MachineFunctionInitalizer.h - machine function initializer ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares an interface that allows custom machine function
+// initialization.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINEFUNCTIONINITIALIZER_H
+#define LLVM_CODEGEN_MACHINEFUNCTIONINITIALIZER_H
+
+namespace llvm {
+
+class MachineFunction;
+
+/// This interface provides a way to initialize machine functions after they are
+/// created by the machine function analysis pass.
+class MachineFunctionInitializer {
+  virtual void anchor();
+
+public:
+  virtual ~MachineFunctionInitializer() {}
+
+  /// Initialize the machine function.
+  ///
+  /// Return true if error occurred.
+  virtual bool initializeMachineFunction(MachineFunction &MF) = 0;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineFunctionPass.h b/contrib/llvm/include/llvm/CodeGen/MachineFunctionPass.h
index 50a1f6e..0e09c90 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineFunctionPass.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineFunctionPass.h
@@ -54,6 +54,6 @@ private:
   bool runOnFunction(Function &F) override;
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineInstr.h b/contrib/llvm/include/llvm/CodeGen/MachineInstr.h
index edda03f..0313e93 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -1235,6 +1235,6 @@ inline raw_ostream& operator<<(raw_ostream &OS, const MachineInstr &MI) {
   return OS;
 }
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h b/contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
index 47397c6..0778ff4 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -40,7 +40,7 @@ namespace RegState {
     ImplicitDefine = Implicit | Define,
     ImplicitKill   = Implicit | Kill
   };
-}
+} // namespace RegState
 
 class MachineInstrBuilder {
   MachineFunction *MF;
@@ -502,6 +502,6 @@ public:
   }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineInstrBundle.h b/contrib/llvm/include/llvm/CodeGen/MachineInstrBundle.h
index 1220224..edebfa6 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineInstrBundle.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineInstrBundle.h
@@ -247,6 +247,6 @@ public:
   const MachineOperand *operator->() const { return &deref(); }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h b/contrib/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h
index adcd1d0..b59b585 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h
@@ -125,6 +125,6 @@ public:
   void dump() const;
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineLoopInfo.h b/contrib/llvm/include/llvm/CodeGen/MachineLoopInfo.h
index 438ef2e..8c245ae 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineLoopInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineLoopInfo.h
@@ -186,6 +186,6 @@ template <> struct GraphTraits<MachineLoop*> {
   }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineMemOperand.h b/contrib/llvm/include/llvm/CodeGen/MachineMemOperand.h
index a16c294..9962ff9 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineMemOperand.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineMemOperand.h
@@ -221,6 +221,6 @@ public:
 
 raw_ostream &operator<<(raw_ostream &OS, const MachineMemOperand &MRO);
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineModuleInfo.h b/contrib/llvm/include/llvm/CodeGen/MachineModuleInfo.h
index ce45c16..5faf8de 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineModuleInfo.h
@@ -157,11 +157,6 @@ class MachineModuleInfo : public ImmutablePass {
   /// emit common EH frames.
   std::vector<const Function *> Personalities;
 
-  /// UsedFunctions - The functions in the @llvm.used list in a more easily
-  /// searchable format.  This does not include the functions in
-  /// llvm.compiler.used.
-  SmallPtrSet<const Function *, 32> UsedFunctions;
-
   /// AddrLabelSymbols - This map keeps track of which symbol is being used for
   /// the specified basic block's address of label.
   MMIAddrLabelMap *AddrLabelSymbols;
@@ -246,10 +241,6 @@ public:
     return const_cast<MachineModuleInfo*>(this)->getObjFileInfo<Ty>();
   }
 
-  /// AnalyzeModule - Scan the module for global debug information.
-  ///
-  void AnalyzeModule(const Module &M);
-
   /// hasDebugInfo - Returns true if valid debug info is present.
   ///
   bool hasDebugInfo() const { return DbgInfoAvailable; }
@@ -339,13 +330,6 @@ public:
     return Personalities;
   }
 
-  /// isUsedFunction - Return true if the functions in the llvm.used list.  This
-  /// does not return true for things in llvm.compiler.used unless they are also
-  /// in llvm.used.
-  bool isUsedFunction(const Function *F) const {
-    return UsedFunctions.count(F);
-  }
-
   /// addCatchTypeInfo - Provide the catch typeinfo for a landing pad.
   ///
   void addCatchTypeInfo(MachineBasicBlock *LandingPad,
@@ -457,6 +441,6 @@ public:
 
 }; // End class MachineModuleInfo
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineOperand.h b/contrib/llvm/include/llvm/CodeGen/MachineOperand.h
index ddffdca..8c8ce71 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineOperand.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineOperand.h
@@ -741,6 +741,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const MachineOperand& MO) {
   // See friend declaration above. This additional declaration is required in
   // order to compile LLVM with IBM xlC compiler.
   hash_code hash_value(const MachineOperand &MO);
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineRegionInfo.h b/contrib/llvm/include/llvm/CodeGen/MachineRegionInfo.h
index cf49c29..794f1d6 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineRegionInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineRegionInfo.h
@@ -176,6 +176,6 @@ EXTERN_TEMPLATE_INSTANTIATION(class RegionBase<RegionTraits<MachineFunction>>);
 EXTERN_TEMPLATE_INSTANTIATION(class RegionNodeBase<RegionTraits<MachineFunction>>);
 EXTERN_TEMPLATE_INSTANTIATION(class RegionInfoBase<RegionTraits<MachineFunction>>);
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/contrib/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
index e5b837a..c17ad38 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -1036,6 +1036,6 @@ getPressureSets(unsigned RegUnit) const {
   return PSetIterator(RegUnit, this);
 }
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineSSAUpdater.h b/contrib/llvm/include/llvm/CodeGen/MachineSSAUpdater.h
index 5f988ad..dad0c46 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineSSAUpdater.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineSSAUpdater.h
@@ -111,6 +111,6 @@ private:
   MachineSSAUpdater(const MachineSSAUpdater&) = delete;
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineValueType.h b/contrib/llvm/include/llvm/CodeGen/MachineValueType.h
index a728df3..a3eea5b 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineValueType.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineValueType.h
@@ -644,6 +644,6 @@ class MVT {
     /// @}
   };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/PBQPRAConstraint.h b/contrib/llvm/include/llvm/CodeGen/PBQPRAConstraint.h
index 833b9ba..832c043 100644
--- a/contrib/llvm/include/llvm/CodeGen/PBQPRAConstraint.h
+++ b/contrib/llvm/include/llvm/CodeGen/PBQPRAConstraint.h
@@ -64,6 +64,6 @@ private:
   void anchor() override;
 };
 
-}
+} // namespace llvm
 
 #endif /* LLVM_CODEGEN_PBQPRACONSTRAINT_H */
diff --git a/contrib/llvm/include/llvm/CodeGen/Passes.h b/contrib/llvm/include/llvm/CodeGen/Passes.h
index 9c7e7b4..3aeec2a 100644
--- a/contrib/llvm/include/llvm/CodeGen/Passes.h
+++ b/contrib/llvm/include/llvm/CodeGen/Passes.h
@@ -552,6 +552,10 @@ namespace llvm {
   /// MachineCSE - This pass performs global CSE on machine instructions.
   extern char &MachineCSEID;
 
+  /// ImplicitNullChecks - This pass folds null pointer checks into nearby
+  /// memory operations.
+  extern char &ImplicitNullChecksID;
+
   /// MachineLICM - This pass performs LICM on machine instructions.
   extern char &MachineLICMID;
 
@@ -633,7 +637,7 @@ namespace llvm {
   /// createForwardControlFlowIntegrityPass - This pass adds control-flow
   /// integrity.
   ModulePass *createForwardControlFlowIntegrityPass();
-} // End llvm namespace
+} // namespace llvm
 
 /// Target machine pass initializer for passes with dependencies. Use with
 /// INITIALIZE_TM_PASS_END.
diff --git a/contrib/llvm/include/llvm/CodeGen/PseudoSourceValue.h b/contrib/llvm/include/llvm/CodeGen/PseudoSourceValue.h
index cc3e25a..e0ec72f 100644
--- a/contrib/llvm/include/llvm/CodeGen/PseudoSourceValue.h
+++ b/contrib/llvm/include/llvm/CodeGen/PseudoSourceValue.h
@@ -106,6 +106,6 @@ namespace llvm {
 
     int getFrameIndex() const { return FI; }
   };
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/RegisterScavenging.h b/contrib/llvm/include/llvm/CodeGen/RegisterScavenging.h
index df3fd34..b2e31fa 100644
--- a/contrib/llvm/include/llvm/CodeGen/RegisterScavenging.h
+++ b/contrib/llvm/include/llvm/CodeGen/RegisterScavenging.h
@@ -184,6 +184,6 @@ private:
 
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/ResourcePriorityQueue.h b/contrib/llvm/include/llvm/CodeGen/ResourcePriorityQueue.h
index 0097e04..d1ea9ff 100644
--- a/contrib/llvm/include/llvm/CodeGen/ResourcePriorityQueue.h
+++ b/contrib/llvm/include/llvm/CodeGen/ResourcePriorityQueue.h
@@ -131,6 +131,6 @@ private:
     unsigned numberRCValPredInSU (SUnit *SU, unsigned RCId);
     unsigned numberRCValSuccInSU (SUnit *SU, unsigned RCId);
   };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/RuntimeLibcalls.h b/contrib/llvm/include/llvm/CodeGen/RuntimeLibcalls.h
index 2be5de6..34adde5 100644
--- a/contrib/llvm/include/llvm/CodeGen/RuntimeLibcalls.h
+++ b/contrib/llvm/include/llvm/CodeGen/RuntimeLibcalls.h
@@ -429,7 +429,7 @@ namespace RTLIB {
   /// Return the SYNC_FETCH_AND_* value for the given opcode and type, or
   /// UNKNOWN_LIBCALL if there is none.
   Libcall getATOMIC(unsigned Opc, MVT VT);
-}
-}
+} // namespace RTLIB
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/ScheduleDAG.h b/contrib/llvm/include/llvm/CodeGen/ScheduleDAG.h
index 8391314..9b5d59c 100644
--- a/contrib/llvm/include/llvm/CodeGen/ScheduleDAG.h
+++ b/contrib/llvm/include/llvm/CodeGen/ScheduleDAG.h
@@ -748,6 +748,6 @@ namespace llvm {
     reverse_iterator rend() { return Index2Node.rend(); }
     const_reverse_iterator rend() const { return Index2Node.rend(); }
   };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/ScheduleHazardRecognizer.h b/contrib/llvm/include/llvm/CodeGen/ScheduleHazardRecognizer.h
index 8a40e72..ef872a2 100644
--- a/contrib/llvm/include/llvm/CodeGen/ScheduleHazardRecognizer.h
+++ b/contrib/llvm/include/llvm/CodeGen/ScheduleHazardRecognizer.h
@@ -106,6 +106,6 @@ public:
   }
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/ScoreboardHazardRecognizer.h b/contrib/llvm/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
index ab14c2d..5911cfb 100644
--- a/contrib/llvm/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
+++ b/contrib/llvm/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
@@ -121,6 +121,6 @@ public:
   void RecedeCycle() override;
 };
 
-}
+} // namespace llvm
 
 #endif //!LLVM_CODEGEN_SCOREBOARDHAZARDRECOGNIZER_H
diff --git a/contrib/llvm/include/llvm/CodeGen/SelectionDAG.h b/contrib/llvm/include/llvm/CodeGen/SelectionDAG.h
index 78fdd04..aa50dea 100644
--- a/contrib/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/contrib/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -669,7 +669,7 @@ public:
   SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT);
   SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N);
   SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2,
-                  bool nuw = false, bool nsw = false, bool exact = false);
+                  const SDNodeFlags *Flags = nullptr);
   SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2,
                   SDValue N3);
   SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2,
@@ -990,8 +990,7 @@ public:
 
   /// Get the specified node if it's already available, or else return NULL.
   SDNode *getNodeIfExists(unsigned Opcode, SDVTList VTs, ArrayRef<SDValue> Ops,
-                          bool nuw = false, bool nsw = false,
-                          bool exact = false);
+                          const SDNodeFlags *Flags = nullptr);
 
   /// Creates a SDDbgValue node.
   SDDbgValue *getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, unsigned R,
@@ -1253,8 +1252,8 @@ private:
   void allnodes_clear();
 
   BinarySDNode *GetBinarySDNode(unsigned Opcode, SDLoc DL, SDVTList VTs,
-                                SDValue N1, SDValue N2, bool nuw, bool nsw,
-                                bool exact);
+                                SDValue N1, SDValue N2,
+                                const SDNodeFlags *Flags = nullptr);
 
   /// Look up the node specified by ID in CSEMap.  If it exists, return it.  If
   /// not, return the insertion token that will make insertion faster.  This
diff --git a/contrib/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/contrib/llvm/include/llvm/CodeGen/SelectionDAGISel.h
index a011e4c..dc4fa2b 100644
--- a/contrib/llvm/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/contrib/llvm/include/llvm/CodeGen/SelectionDAGISel.h
@@ -301,6 +301,6 @@ private:
 
 };
 
-}
+} // namespace llvm
 
 #endif /* LLVM_CODEGEN_SELECTIONDAGISEL_H */
diff --git a/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index daa1943..4b65eaa 100644
--- a/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -89,7 +89,7 @@ namespace ISD {
   /// Return true if the node has at least one operand
   /// and all operands of the specified node are ISD::UNDEF.
   bool allOperandsUndef(const SDNode *N);
-}  // end llvm:ISD namespace
+} // namespace ISD
 
 //===----------------------------------------------------------------------===//
 /// Unlike LLVM values, Selection DAG nodes may return multiple
@@ -1017,6 +1017,11 @@ static bool isBinOpWithFlags(unsigned Opcode) {
   case ISD::ADD:
   case ISD::SUB:
   case ISD::SHL:
+  case ISD::FADD:
+  case ISD::FDIV:
+  case ISD::FMUL:
+  case ISD::FREM:
+  case ISD::FSUB:
     return true;
   default:
     return false;
@@ -1029,8 +1034,8 @@ class BinaryWithFlagsSDNode : public BinarySDNode {
 public:
   SDNodeFlags Flags;
   BinaryWithFlagsSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs,
-                        SDValue X, SDValue Y)
-      : BinarySDNode(Opc, Order, dl, VTs, X, Y), Flags() {}
+                        SDValue X, SDValue Y, const SDNodeFlags &NodeFlags)
+      : BinarySDNode(Opc, Order, dl, VTs, X, Y), Flags(NodeFlags) {}
   static bool classof(const SDNode *N) {
     return isBinOpWithFlags(N->getOpcode());
   }
@@ -2263,8 +2268,8 @@ namespace ISD {
     return isa<StoreSDNode>(N) &&
       cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
   }
-}
+} // namespace ISD
 
-} // end llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/SlotIndexes.h b/contrib/llvm/include/llvm/CodeGen/SlotIndexes.h
index 9d6d6f5..5f21397 100644
--- a/contrib/llvm/include/llvm/CodeGen/SlotIndexes.h
+++ b/contrib/llvm/include/llvm/CodeGen/SlotIndexes.h
@@ -705,6 +705,6 @@ namespace llvm {
   struct IntervalMapInfo<SlotIndex> : IntervalMapHalfOpenInfo<SlotIndex> {
   };
 
-}
+} // namespace llvm
 
 #endif // LLVM_CODEGEN_SLOTINDEXES_H
diff --git a/contrib/llvm/include/llvm/CodeGen/StackMaps.h b/contrib/llvm/include/llvm/CodeGen/StackMaps.h
index 46a773f..ba27404 100644
--- a/contrib/llvm/include/llvm/CodeGen/StackMaps.h
+++ b/contrib/llvm/include/llvm/CodeGen/StackMaps.h
@@ -255,6 +255,6 @@ private:
   void debug() { print(dbgs()); }
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/ValueTypes.h b/contrib/llvm/include/llvm/CodeGen/ValueTypes.h
index e1a9fd3..e02d7db 100644
--- a/contrib/llvm/include/llvm/CodeGen/ValueTypes.h
+++ b/contrib/llvm/include/llvm/CodeGen/ValueTypes.h
@@ -361,6 +361,6 @@ namespace llvm {
     unsigned getExtendedSizeInBits() const;
   };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/VirtRegMap.h b/contrib/llvm/include/llvm/CodeGen/VirtRegMap.h
index d7e9209..5b771d0 100644
--- a/contrib/llvm/include/llvm/CodeGen/VirtRegMap.h
+++ b/contrib/llvm/include/llvm/CodeGen/VirtRegMap.h
@@ -185,6 +185,6 @@ namespace llvm {
     VRM.print(OS);
     return OS;
   }
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/WinEHFuncInfo.h b/contrib/llvm/include/llvm/CodeGen/WinEHFuncInfo.h
index 1cff320..5c1b3df 100644
--- a/contrib/llvm/include/llvm/CodeGen/WinEHFuncInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/WinEHFuncInfo.h
@@ -144,14 +144,15 @@ struct WinEHFuncInfo {
   SmallVector<WinEHUnwindMapEntry, 4> UnwindMap;
   SmallVector<WinEHTryBlockMapEntry, 4> TryBlockMap;
   SmallVector<std::pair<MCSymbol *, int>, 4> IPToStateList;
-  int UnwindHelpFrameIdx;
-  int UnwindHelpFrameOffset;
+  int UnwindHelpFrameIdx = INT_MAX;
+  int UnwindHelpFrameOffset = -1;
+  unsigned NumIPToStateFuncsVisited = 0;
 
-  unsigned NumIPToStateFuncsVisited;
+  /// frameescape index of the 32-bit EH registration node. Set by
+  /// WinEHStatePass and used indirectly by SEH filter functions of the parent.
+  int EHRegNodeEscapeIndex = INT_MAX;
 
-  WinEHFuncInfo()
-      : UnwindHelpFrameIdx(INT_MAX), UnwindHelpFrameOffset(-1),
-        NumIPToStateFuncsVisited(0) {}
+  WinEHFuncInfo() {}
 };
 
 /// Analyze the IR in ParentFn and it's handlers to build WinEHFuncInfo, which
@@ -160,5 +161,5 @@ struct WinEHFuncInfo {
 void calculateWinCXXEHStateNumbers(const Function *ParentFn,
                                    WinEHFuncInfo &FuncInfo);
 
-}
+} // namespace llvm
 #endif // LLVM_CODEGEN_WINEHFUNCINFO_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/DIContext.h b/contrib/llvm/include/llvm/DebugInfo/DIContext.h
index 871e60c..8e5794d 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DIContext.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DIContext.h
@@ -172,6 +172,6 @@ public:
   virtual std::unique_ptr<LoadedObjectInfo> clone() const = 0;
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
index 6ab5d5c..72f304a 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
@@ -57,6 +57,6 @@ private:
   AttributeSpecVector AttributeSpecs;
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h
index 47dbf5f..f891438 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h
@@ -49,6 +49,6 @@ public:
   void dump(raw_ostream &OS) const;
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h
index 743f9c6..9f7527f 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h
@@ -26,6 +26,6 @@ public:
   ~DWARFCompileUnit() override;
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
index 423c0d3..0e29ad6 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
@@ -295,6 +295,6 @@ public:
   }
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
index 2114208..88519ce 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
@@ -58,6 +58,6 @@ private:
   void clear();
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h
index 837a8e6..15850b2 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h
@@ -65,6 +65,6 @@ public:
   }
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h
index 791f010..58359fa 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h
@@ -82,6 +82,6 @@ private:
   DenseSet<uint32_t> ParsedCUOffsets;
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h
index f29d5fe..3cbae41 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h
@@ -155,6 +155,6 @@ struct DWARFDebugInfoEntryInlinedChain {
   const DWARFUnit *U;
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index 93e7c79..e728d59 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -248,6 +248,6 @@ private:
   LineTableMapTy LineTableMap;
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h
index bd44c2e..6a3f2ad 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h
@@ -76,6 +76,6 @@ public:
   void parse(DataExtractor data);
   void dump(raw_ostream &OS) const;
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
index 7ddcc0d..2d6bb0e 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
@@ -91,6 +91,6 @@ private:
   void dumpString(raw_ostream &OS, const DWARFUnit *U) const;
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
index f24e278..de853c3 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
@@ -32,7 +32,7 @@ protected:
   bool extractImpl(DataExtractor debug_info, uint32_t *offset_ptr) override;
 };
 
-}
+} // namespace llvm
 
 #endif
 
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index 5604b93..54209cf 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -279,6 +279,6 @@ private:
   const DWARFDebugInfoEntryMinimal *getSubprogramForAddress(uint64_t Address);
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h b/contrib/llvm/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h
index b5fa8c3..8a06d55 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h
@@ -54,6 +54,6 @@ private:
 
   std::unique_ptr<IPDBEnumSymbols> Enumerator;
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBDataStream.h b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBDataStream.h
index 808a0f3..429cd7e 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBDataStream.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBDataStream.h
@@ -32,6 +32,6 @@ public:
   virtual void reset() = 0;
   virtual IPDBDataStream *clone() const = 0;
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h
index 645ac96..5001a95 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h
@@ -28,6 +28,6 @@ public:
   virtual void reset() = 0;
   virtual MyType *clone() const = 0;
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBLineNumber.h b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBLineNumber.h
index 92cd58d..30036df 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBLineNumber.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBLineNumber.h
@@ -31,6 +31,6 @@ public:
   virtual uint32_t getCompilandId() const = 0;
   virtual bool isStatement() const = 0;
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBSession.h b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBSession.h
index a130a38..1dca911 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBSession.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBSession.h
@@ -56,6 +56,6 @@ public:
 
   virtual std::unique_ptr<IPDBEnumDataStreams> getDebugStreams() const = 0;
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBSourceFile.h b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBSourceFile.h
index 55000ef..8081ea5 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBSourceFile.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBSourceFile.h
@@ -32,6 +32,6 @@ public:
   virtual PDB_Checksum getChecksumType() const = 0;
   virtual std::unique_ptr<IPDBEnumSymbols> getCompilands() const = 0;
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBContext.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBContext.h
index 2bb9746..3b4a77e 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBContext.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBContext.h
@@ -55,6 +55,6 @@ private:
   std::string getFunctionName(uint64_t Address, DINameKind NameKind) const;
   std::unique_ptr<IPDBSession> Session;
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBExtras.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBExtras.h
index 48ce1c1..64f9694 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBExtras.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBExtras.h
@@ -33,6 +33,6 @@ raw_ostream &operator<<(raw_ostream &OS, const PDB_UniqueId &Id);
 raw_ostream &operator<<(raw_ostream &OS, const Variant &Value);
 raw_ostream &operator<<(raw_ostream &OS, const VersionInfo &Version);
 raw_ostream &operator<<(raw_ostream &OS, const TagStats &Stats);
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymDumper.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymDumper.h
index 65110f3..ffd31a5 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymDumper.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymDumper.h
@@ -56,6 +56,6 @@ public:
 private:
   bool RequireImpl;
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h
index c055dd7..bd85e60 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h
@@ -34,6 +34,6 @@ public:
   // FORWARD_SYMBOL_METHOD(getValue)
   FORWARD_SYMBOL_METHOD(getVirtualAddress)
 };
-}
+} // namespace llvm
 
 #endif // LLVM_DEBUGINFO_PDB_PDBSYMBOLANNOTATION_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolBlock.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolBlock.h
index 2ca1250..6782179 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolBlock.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolBlock.h
@@ -36,6 +36,6 @@ public:
   FORWARD_SYMBOL_METHOD(getSymIndexId)
   FORWARD_SYMBOL_METHOD(getVirtualAddress)
 };
-}
+} // namespace llvm
 
 #endif // LLVM_DEBUGINFO_PDB_PDBSYMBOLBLOCK_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompiland.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompiland.h
index f8c796a..d92830f 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompiland.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompiland.h
@@ -33,6 +33,6 @@ public:
   FORWARD_SYMBOL_METHOD(getSourceFileName)
   FORWARD_SYMBOL_METHOD(getSymIndexId)
 };
-}
+} // namespace llvm
 
 #endif // LLVM_DEBUGINFO_PDB_PDBSYMBOLCOMPILAND_H
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h b/contrib/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h
index 4b2add8..f86490b 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h
@@ -197,15 +197,20 @@ public:
   /// M is found.
   virtual bool removeModule(Module *M);
 
-  /// FindFunctionNamed - Search all of the active modules to find the one that
+  /// FindFunctionNamed - Search all of the active modules to find the function that
   /// defines FnName.  This is very slow operation and shouldn't be used for
   /// general code.
   virtual Function *FindFunctionNamed(const char *FnName);
 
+  /// FindGlobalVariableNamed - Search all of the active modules to find the global variable
+  /// that defines Name.  This is very slow operation and shouldn't be used for
+  /// general code.
+  virtual GlobalVariable *FindGlobalVariableNamed(const char *Name, bool AllowInternal = false);
+
   /// runFunction - Execute the specified function with the specified arguments,
   /// and return the result.
   virtual GenericValue runFunction(Function *F,
-                                const std::vector<GenericValue> &ArgValues) = 0;
+                                   ArrayRef<GenericValue> ArgValues) = 0;
 
   /// getPointerToNamedFunction - This method returns the address of the
   /// specified function by using the dlsym function call.  As such it is only
@@ -629,6 +634,6 @@ public:
 // Create wrappers for C Binding types (see CBindingWrapping.h).
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ExecutionEngine, LLVMExecutionEngineRef)
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/GenericValue.h b/contrib/llvm/include/llvm/ExecutionEngine/GenericValue.h
index 0e92f79..ea5ddfc 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/GenericValue.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/GenericValue.h
@@ -49,5 +49,5 @@ struct GenericValue {
 inline GenericValue PTOGV(void *P) { return GenericValue(P); }
 inline void* GVTOP(const GenericValue &GV) { return GV.PointerVal; }
 
-} // End llvm namespace.
+} // namespace llvm
 #endif
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/MCJIT.h b/contrib/llvm/include/llvm/ExecutionEngine/MCJIT.h
index 66ddb7c..294f11d 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/MCJIT.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/MCJIT.h
@@ -33,6 +33,6 @@ namespace {
       LLVMLinkInMCJIT();
     }
   } ForceMCJITLinking;
-}
+} // namespace
 
 #endif
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/ObjectCache.h b/contrib/llvm/include/llvm/ExecutionEngine/ObjectCache.h
index cc01a4e..1084de8 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/ObjectCache.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/ObjectCache.h
@@ -35,6 +35,6 @@ public:
   virtual std::unique_ptr<MemoryBuffer> getObject(const Module* M) = 0;
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
index 074d55e..4c515db 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
@@ -15,9 +15,9 @@
 #ifndef LLVM_EXECUTIONENGINE_ORC_COMPILEONDEMANDLAYER_H
 #define LLVM_EXECUTIONENGINE_ORC_COMPILEONDEMANDLAYER_H
 
-//#include "CloneSubModule.h"
 #include "IndirectionUtils.h"
 #include "LambdaResolver.h"
+#include "LogicalDylib.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
 #include "llvm/Transforms/Utils/Cloning.h"
@@ -36,7 +36,9 @@ namespace orc {
 /// added to the layer below. When a stub is called it triggers the extraction
 /// of the function body from the original module. The extracted body is then
 /// compiled and executed.
-template <typename BaseLayerT, typename CompileCallbackMgrT>
+template <typename BaseLayerT, typename CompileCallbackMgrT,
+          typename PartitioningFtor =
+            std::function<std::set<Function*>(Function&)>>
 class CompileOnDemandLayer {
 private:
 
@@ -44,333 +46,63 @@ private:
   // variables.
   class GlobalDeclMaterializer : public ValueMaterializer {
   public:
-    GlobalDeclMaterializer(Module &Dst) : Dst(Dst) {}
+    typedef std::set<const Function*> StubSet;
+
+    GlobalDeclMaterializer(Module &Dst, const StubSet *StubsToClone = nullptr)
+        : Dst(Dst), StubsToClone(StubsToClone) {}
+
     Value* materializeValueFor(Value *V) final {
       if (auto *GV = dyn_cast<GlobalVariable>(V))
         return cloneGlobalVariableDecl(Dst, *GV);
-      else if (auto *F = dyn_cast<Function>(V))
-        return cloneFunctionDecl(Dst, *F);
+      else if (auto *F = dyn_cast<Function>(V)) {
+        auto *ClonedF = cloneFunctionDecl(Dst, *F);
+        if (StubsToClone && StubsToClone->count(F)) {
+          GlobalVariable *FnBodyPtr =
+            createImplPointer(*ClonedF->getType(), *ClonedF->getParent(),
+                              ClonedF->getName() + "$orc_addr", nullptr);
+          makeStub(*ClonedF, *FnBodyPtr);
+          ClonedF->setLinkage(GlobalValue::AvailableExternallyLinkage);
+          ClonedF->addFnAttr(Attribute::AlwaysInline);
+        }
+        return ClonedF;
+      }
       // Else.
       return nullptr;
     }
   private:
     Module &Dst;
+    const StubSet *StubsToClone;
   };
 
   typedef typename BaseLayerT::ModuleSetHandleT BaseLayerModuleSetHandleT;
-  class UncompiledPartition;
-
-  // Logical module.
-  //
-  //   This struct contains the handles for the global values and stubs (which
-  // cover the external symbols of the original module), plus the handes for
-  // each of the extracted partitions. These handleds are used for lookup (only
-  // the globals/stubs module is searched) and memory management. The actual
-  // searching and resource management are handled by the LogicalDylib that owns
-  // the LogicalModule.
-  struct LogicalModule {
-    LogicalModule() {}
-
-    LogicalModule(LogicalModule &&Other)
-        : SrcM(std::move(Other.SrcM)),
-          GVsAndStubsHandle(std::move(Other.GVsAndStubsHandle)),
-          ImplHandles(std::move(Other.ImplHandles)) {}
-
-    std::unique_ptr<Module> SrcM;
-    BaseLayerModuleSetHandleT GVsAndStubsHandle;
-    std::vector<BaseLayerModuleSetHandleT> ImplHandles;
-  };
-
-  // Logical dylib.
-  //
-  //   This class handles symbol resolution and resource management for a set of
-  // modules that were added together as a logical dylib.
-  //
-  //   A logical dylib contains one-or-more LogicalModules plus a set of
-  // UncompiledPartitions. LogicalModules support symbol resolution and resource
-  // management for for code that has already been emitted. UncompiledPartitions
-  // represent code that has not yet been compiled.
-  class LogicalDylib {
-  private:
-    friend class UncompiledPartition;
-    typedef std::list<LogicalModule> LogicalModuleList;
-  public:
-
-    typedef unsigned UncompiledPartitionID;
-    typedef typename LogicalModuleList::iterator LMHandle;
-
-    // Construct a logical dylib.
-    LogicalDylib(CompileOnDemandLayer &CODLayer) : CODLayer(CODLayer) { }
-
-    // Delete this logical dylib, release logical module resources.
-    virtual ~LogicalDylib() {
-      releaseLogicalModuleResources();
-    }
-
-    // Get a reference to the containing layer.
-    CompileOnDemandLayer& getCODLayer() { return CODLayer; }
-
-    // Get a reference to the base layer.
-    BaseLayerT& getBaseLayer() { return CODLayer.BaseLayer; }
-
-    // Start a new context for a single logical module.
-    LMHandle createLogicalModule() {
-      LogicalModules.push_back(LogicalModule());
-      return std::prev(LogicalModules.end());
-    }
-
-    // Set the global-values-and-stubs module handle for this logical module.
-    void setGVsAndStubsHandle(LMHandle LMH, BaseLayerModuleSetHandleT H) {
-      LMH->GVsAndStubsHandle = H;
-    }
-
-    // Return the global-values-and-stubs module handle for this logical module.
-    BaseLayerModuleSetHandleT getGVsAndStubsHandle(LMHandle LMH) {
-      return LMH->GVsAndStubsHandle;
-    }
-
-    //   Add a handle to a module containing lazy function bodies to the given
-    // logical module.
-    void addToLogicalModule(LMHandle LMH, BaseLayerModuleSetHandleT H) {
-      LMH->ImplHandles.push_back(H);
-    }
-
-    // Create an UncompiledPartition attached to this LogicalDylib.
-    UncompiledPartition& createUncompiledPartition(LMHandle LMH,
-                                                   std::shared_ptr<Module> SrcM);
-
-    // Take ownership of the given UncompiledPartition from the logical dylib.
-    std::unique_ptr<UncompiledPartition>
-    takeUPOwnership(UncompiledPartitionID ID);
-
-    // Look up a symbol in this context.
-    JITSymbol findSymbolInternally(LMHandle LMH, const std::string &Name) {
-      if (auto Symbol = getBaseLayer().findSymbolIn(LMH->GVsAndStubsHandle,
-                                                    Name, false))
-        return Symbol;
 
-      for (auto I = LogicalModules.begin(), E = LogicalModules.end(); I != E;
-           ++I)
-        if (I != LMH)
-          if (auto Symbol = getBaseLayer().findSymbolIn(I->GVsAndStubsHandle,
-                                                        Name, false))
-            return Symbol;
-
-      return nullptr;
-    }
-
-    JITSymbol findSymbol(const std::string &Name, bool ExportedSymbolsOnly) {
-      for (auto &LM : LogicalModules)
-        if (auto Symbol = getBaseLayer().findSymbolIn(LM.GVsAndStubsHandle,
-                                                      Name,
-                                                      ExportedSymbolsOnly))
-          return Symbol;
-      return nullptr;
-    }
-
-    // Find an external symbol (via the user supplied SymbolResolver).
-    virtual RuntimeDyld::SymbolInfo
-    findSymbolExternally(const std::string &Name) const = 0;
-
-  private:
-
-    void releaseLogicalModuleResources() {
-      for (auto I = LogicalModules.begin(), E = LogicalModules.end(); I != E;
-           ++I) {
-        getBaseLayer().removeModuleSet(I->GVsAndStubsHandle);
-        for (auto H : I->ImplHandles)
-          getBaseLayer().removeModuleSet(H);
-      }
-    }
-
-    CompileOnDemandLayer &CODLayer;
-    LogicalModuleList LogicalModules;
-    std::vector<std::unique_ptr<UncompiledPartition>> UncompiledPartitions;
+  struct LogicalModuleResources {
+    std::shared_ptr<Module> SourceModule;
+    std::set<const Function*> StubsToClone;
   };
 
-  template <typename ResolverPtrT>
-  class LogicalDylibImpl : public LogicalDylib  {
-  public:
-    LogicalDylibImpl(CompileOnDemandLayer &CODLayer, ResolverPtrT Resolver)
-      : LogicalDylib(CODLayer), Resolver(std::move(Resolver)) {}
-
-    RuntimeDyld::SymbolInfo
-    findSymbolExternally(const std::string &Name) const override {
-      return Resolver->findSymbol(Name);
-    }
-
-  private:
-    ResolverPtrT Resolver;
+  struct LogicalDylibResources {
+    typedef std::function<RuntimeDyld::SymbolInfo(const std::string&)>
+      SymbolResolverFtor;
+    SymbolResolverFtor ExternalSymbolResolver;
+    PartitioningFtor Partitioner;
   };
 
-  template <typename ResolverPtrT>
-  static std::unique_ptr<LogicalDylib>
-  createLogicalDylib(CompileOnDemandLayer &CODLayer,
-                     ResolverPtrT Resolver) {
-    typedef LogicalDylibImpl<ResolverPtrT> Impl;
-    return llvm::make_unique<Impl>(CODLayer, std::move(Resolver));
-  }
+  typedef LogicalDylib<BaseLayerT, LogicalModuleResources,
+                       LogicalDylibResources> CODLogicalDylib;
 
-  // Uncompiled partition.
-  //
-  // Represents one as-yet uncompiled portion of a module.
-  class UncompiledPartition {
-  public:
-
-    struct PartitionEntry {
-      PartitionEntry(Function *F, TargetAddress CallbackID)
-          : F(F), CallbackID(CallbackID) {}
-      Function *F;
-      TargetAddress CallbackID;
-    };
-
-    typedef std::vector<PartitionEntry> PartitionEntryList;
-
-    // Creates an uncompiled partition with the list of functions that make up
-    // this partition.
-    UncompiledPartition(LogicalDylib &LD, typename LogicalDylib::LMHandle LMH,
-                        std::shared_ptr<Module> SrcM)
-        : LD(LD), LMH(LMH), SrcM(std::move(SrcM)), ID(~0U) {}
-
-    ~UncompiledPartition() {
-      // FIXME: When we want to support threaded lazy compilation we'll need to
-      //        lock the callback manager here.
-      auto &CCMgr = LD.getCODLayer().CompileCallbackMgr;
-      for (auto PEntry : PartitionEntries)
-        CCMgr.releaseCompileCallback(PEntry.CallbackID);
-    }
-
-    // Set the ID for this partition.
-    void setID(typename LogicalDylib::UncompiledPartitionID ID) {
-      this->ID = ID;
-    }
-
-    // Set the function set and callbacks for this partition.
-    void setPartitionEntries(PartitionEntryList PartitionEntries) {
-      this->PartitionEntries = std::move(PartitionEntries);
-    }
-
-    // Handle a compile callback for the function at index FnIdx.
-    TargetAddress compile(unsigned FnIdx) {
-      // Take ownership of self. This will ensure we delete the partition and
-      // free all its resources once we're done compiling.
-      std::unique_ptr<UncompiledPartition> This = LD.takeUPOwnership(ID);
-
-      // Release all other compile callbacks for this partition.
-      // We skip the callback for this function because that's the one that
-      // called us, and the callback manager will already have removed it.
-      auto &CCMgr = LD.getCODLayer().CompileCallbackMgr;
-      for (unsigned I = 0; I < PartitionEntries.size(); ++I)
-        if (I != FnIdx)
-          CCMgr.releaseCompileCallback(PartitionEntries[I].CallbackID);
-
-      // Grab the name of the function being called here.
-      Function *F = PartitionEntries[FnIdx].F;
-      std::string CalledFnName = Mangle(F->getName(), SrcM->getDataLayout());
-
-      // Extract the function and add it to the base layer.
-      auto PartitionImplH = emitPartition();
-      LD.addToLogicalModule(LMH, PartitionImplH);
-
-      // Update body pointers.
-      // FIXME: When we start supporting remote lazy jitting this will need to
-      //        be replaced with a user-supplied callback for updating the
-      //        remote pointers.
-      TargetAddress CalledAddr = 0;
-      for (unsigned I = 0; I < PartitionEntries.size(); ++I) {
-        auto F = PartitionEntries[I].F;
-        std::string FName(F->getName());
-        auto FnBodySym =
-          LD.getBaseLayer().findSymbolIn(PartitionImplH,
-                                         Mangle(FName, SrcM->getDataLayout()),
-                                         false);
-        auto FnPtrSym =
-          LD.getBaseLayer().findSymbolIn(LD.getGVsAndStubsHandle(LMH),
-                                         Mangle(FName + "$orc_addr",
-                                                SrcM->getDataLayout()),
-                                         false);
-        assert(FnBodySym && "Couldn't find function body.");
-        assert(FnPtrSym && "Couldn't find function body pointer.");
-
-        auto FnBodyAddr = FnBodySym.getAddress();
-        void *FnPtrAddr = reinterpret_cast<void*>(
-                            static_cast<uintptr_t>(FnPtrSym.getAddress()));
-
-        // If this is the function we're calling record the address so we can
-        // return it from this function.
-        if (I == FnIdx)
-          CalledAddr = FnBodyAddr;
-
-        memcpy(FnPtrAddr, &FnBodyAddr, sizeof(uintptr_t));
-      }
-
-      // Finally, clear the partition structure so we don't try to
-      // double-release the callbacks in the UncompiledPartition destructor.
-      PartitionEntries.clear();
-
-      return CalledAddr;
-    }
-
-  private:
-
-    BaseLayerModuleSetHandleT emitPartition() {
-      // Create the module.
-      std::string NewName(SrcM->getName());
-      for (auto &PEntry : PartitionEntries) {
-        NewName += ".";
-        NewName += PEntry.F->getName();
-      }
-      auto PM = llvm::make_unique<Module>(NewName, SrcM->getContext());
-      PM->setDataLayout(SrcM->getDataLayout());
-      ValueToValueMapTy VMap;
-      GlobalDeclMaterializer GDM(*PM);
-
-      // Create decls in the new module.
-      for (auto &PEntry : PartitionEntries)
-        cloneFunctionDecl(*PM, *PEntry.F, &VMap);
-
-      // Move the function bodies.
-      for (auto &PEntry : PartitionEntries)
-        moveFunctionBody(*PEntry.F, VMap);
-
-      // Create memory manager and symbol resolver.
-      auto MemMgr = llvm::make_unique<SectionMemoryManager>();
-      auto Resolver = createLambdaResolver(
-          [this](const std::string &Name) {
-            if (auto Symbol = LD.findSymbolInternally(LMH, Name))
-              return RuntimeDyld::SymbolInfo(Symbol.getAddress(),
-                                             Symbol.getFlags());
-            return LD.findSymbolExternally(Name);
-          },
-          [this](const std::string &Name) {
-            if (auto Symbol = LD.findSymbolInternally(LMH, Name))
-              return RuntimeDyld::SymbolInfo(Symbol.getAddress(),
-                                             Symbol.getFlags());
-            return RuntimeDyld::SymbolInfo(nullptr);
-          });
-      std::vector<std::unique_ptr<Module>> PartMSet;
-      PartMSet.push_back(std::move(PM));
-      return LD.getBaseLayer().addModuleSet(std::move(PartMSet),
-                                            std::move(MemMgr),
-                                            std::move(Resolver));
-    }
-
-    LogicalDylib &LD;
-    typename LogicalDylib::LMHandle LMH;
-    std::shared_ptr<Module> SrcM;
-    typename LogicalDylib::UncompiledPartitionID ID;
-    PartitionEntryList PartitionEntries;
-  };
-
-  typedef std::list<std::unique_ptr<LogicalDylib>> LogicalDylibList;
+  typedef typename CODLogicalDylib::LogicalModuleHandle LogicalModuleHandle;
+  typedef std::list<CODLogicalDylib> LogicalDylibList;
 
 public:
   /// @brief Handle to a set of loaded modules.
   typedef typename LogicalDylibList::iterator ModuleSetHandleT;
 
   /// @brief Construct a compile-on-demand layer instance.
-  CompileOnDemandLayer(BaseLayerT &BaseLayer, CompileCallbackMgrT &CallbackMgr)
-      : BaseLayer(BaseLayer), CompileCallbackMgr(CallbackMgr) {}
+  CompileOnDemandLayer(BaseLayerT &BaseLayer, CompileCallbackMgrT &CallbackMgr,
+                       bool CloneStubsIntoPartitions)
+      : BaseLayer(BaseLayer), CompileCallbackMgr(CallbackMgr),
+        CloneStubsIntoPartitions(CloneStubsIntoPartitions) {}
 
   /// @brief Add a module to the compile-on-demand layer.
   template <typename ModuleSetT, typename MemoryManagerPtrT,
@@ -382,20 +114,25 @@ public:
     assert(MemMgr == nullptr &&
            "User supplied memory managers not supported with COD yet.");
 
-    LogicalDylibs.push_back(createLogicalDylib(*this, std::move(Resolver)));
+    LogicalDylibs.push_back(CODLogicalDylib(BaseLayer));
+    auto &LDResources = LogicalDylibs.back().getDylibResources();
+
+    LDResources.ExternalSymbolResolver =
+      [Resolver](const std::string &Name) {
+        return Resolver->findSymbol(Name);
+      };
+
+    LDResources.Partitioner =
+      [](Function &F) {
+        std::set<Function*> Partition;
+        Partition.insert(&F);
+        return Partition;
+      };
 
     // Process each of the modules in this module set.
-    for (auto &M : Ms) {
-      std::vector<std::vector<Function*>> Partitioning;
-      for (auto &F : *M) {
-        if (F.isDeclaration())
-          continue;
-        Partitioning.emplace_back(1, &F);
-      }
-      addLogicalModule(*LogicalDylibs.back(),
-                       std::shared_ptr<Module>(std::move(M)),
-                       std::move(Partitioning));
-    }
+    for (auto &M : Ms)
+      addLogicalModule(LogicalDylibs.back(),
+                       std::shared_ptr<Module>(std::move(M)));
 
     return std::prev(LogicalDylibs.end());
   }
@@ -420,13 +157,12 @@ public:
   ///        below this one.
   JITSymbol findSymbolIn(ModuleSetHandleT H, const std::string &Name,
                          bool ExportedSymbolsOnly) {
-    return (*H)->findSymbol(Name, ExportedSymbolsOnly);
+    return H->findSymbol(Name, ExportedSymbolsOnly);
   }
 
 private:
 
-  void addLogicalModule(LogicalDylib &LD, std::shared_ptr<Module> SrcM,
-                        std::vector<std::vector<Function*>> Partitions) {
+  void addLogicalModule(CODLogicalDylib &LD, std::shared_ptr<Module> SrcM) {
 
     // Bump the linkage and rename any anonymous/privote members in SrcM to
     // ensure that everything will resolve properly after we partition SrcM.
@@ -434,6 +170,8 @@ private:
 
     // Create a logical module handle for SrcM within the logical dylib.
     auto LMH = LD.createLogicalModule();
+    auto &LMResources =  LD.getLogicalModuleResources(LMH);
+    LMResources.SourceModule = SrcM;
 
     // Create the GVs-and-stubs module.
     auto GVsAndStubsM = llvm::make_unique<Module>(
@@ -442,31 +180,35 @@ private:
     GVsAndStubsM->setDataLayout(SrcM->getDataLayout());
     ValueToValueMapTy VMap;
 
-    // Process partitions and create stubs.
+    // Process module and create stubs.
     // We create the stubs before copying the global variables as we know the
     // stubs won't refer to any globals (they only refer to their implementation
     // pointer) so there's no ordering/value-mapping issues.
-    for (auto& Partition : Partitions) {
-      auto &UP = LD.createUncompiledPartition(LMH, SrcM);
-      typename UncompiledPartition::PartitionEntryList PartitionEntries;
-      for (auto &F : Partition) {
-        assert(!F->isDeclaration() &&
-               "Partition should only contain definitions");
-        unsigned FnIdx = PartitionEntries.size();
-        auto CCI = CompileCallbackMgr.getCompileCallback(SrcM->getContext());
-        PartitionEntries.push_back(
-          typename UncompiledPartition::PartitionEntry(F, CCI.getAddress()));
-        Function *StubF = cloneFunctionDecl(*GVsAndStubsM, *F, &VMap);
-        GlobalVariable *FnBodyPtr =
-          createImplPointer(*StubF->getType(), *StubF->getParent(),
-                            StubF->getName() + "$orc_addr",
-                            createIRTypedAddress(*StubF->getFunctionType(),
-                                                 CCI.getAddress()));
-        makeStub(*StubF, *FnBodyPtr);
-        CCI.setCompileAction([&UP, FnIdx]() { return UP.compile(FnIdx); });
-      }
-
-      UP.setPartitionEntries(std::move(PartitionEntries));
+    for (auto &F : *SrcM) {
+
+      // Skip declarations.
+      if (F.isDeclaration())
+        continue;
+
+      // Record all functions defined by this module.
+      if (CloneStubsIntoPartitions)
+        LMResources.StubsToClone.insert(&F);
+
+      // For each definition: create a callback, a stub, and a function body
+      // pointer. Initialize the function body pointer to point at the callback,
+      // and set the callback to compile the function body.
+      auto CCInfo = CompileCallbackMgr.getCompileCallback(SrcM->getContext());
+      Function *StubF = cloneFunctionDecl(*GVsAndStubsM, F, &VMap);
+      GlobalVariable *FnBodyPtr =
+        createImplPointer(*StubF->getType(), *StubF->getParent(),
+                          StubF->getName() + "$orc_addr",
+                          createIRTypedAddress(*StubF->getFunctionType(),
+                                               CCInfo.getAddress()));
+      makeStub(*StubF, *FnBodyPtr);
+      CCInfo.setCompileAction(
+        [this, &LD, LMH, &F]() {
+          return this->extractAndCompile(LD, LMH, F);
+        });
     }
 
     // Now clone the global variable declarations.
@@ -483,12 +225,9 @@ private:
     // Build a resolver for the stubs module and add it to the base layer.
     auto GVsAndStubsResolver = createLambdaResolver(
         [&LD](const std::string &Name) {
-          if (auto Symbol = LD.findSymbol(Name, false))
-            return RuntimeDyld::SymbolInfo(Symbol.getAddress(),
-                                           Symbol.getFlags());
-          return LD.findSymbolExternally(Name);
+          return LD.getDylibResources().ExternalSymbolResolver(Name);
         },
-        [&LD](const std::string &Name) {
+        [](const std::string &Name) {
           return RuntimeDyld::SymbolInfo(nullptr);
         });
 
@@ -498,7 +237,7 @@ private:
       BaseLayer.addModuleSet(std::move(GVsAndStubsMSet),
                              llvm::make_unique<SectionMemoryManager>(),
                              std::move(GVsAndStubsResolver));
-    LD.setGVsAndStubsHandle(LMH, GVsAndStubsH);
+    LD.addToLogicalModule(LMH, GVsAndStubsH);
   }
 
   static std::string Mangle(StringRef Name, const DataLayout &DL) {
@@ -511,35 +250,104 @@ private:
     return MangledName;
   }
 
+  TargetAddress extractAndCompile(CODLogicalDylib &LD,
+                                  LogicalModuleHandle LMH,
+                                  Function &F) {
+    Module &SrcM = *LD.getLogicalModuleResources(LMH).SourceModule;
+
+    // If F is a declaration we must already have compiled it.
+    if (F.isDeclaration())
+      return 0;
+
+    // Grab the name of the function being called here.
+    std::string CalledFnName = Mangle(F.getName(), SrcM.getDataLayout());
+
+    auto Partition = LD.getDylibResources().Partitioner(F);
+    auto PartitionH = emitPartition(LD, LMH, Partition);
+
+    TargetAddress CalledAddr = 0;
+    for (auto *SubF : Partition) {
+      std::string FName = SubF->getName();
+      auto FnBodySym =
+        BaseLayer.findSymbolIn(PartitionH, Mangle(FName, SrcM.getDataLayout()),
+                               false);
+      auto FnPtrSym =
+        BaseLayer.findSymbolIn(*LD.moduleHandlesBegin(LMH),
+                               Mangle(FName + "$orc_addr",
+                                      SrcM.getDataLayout()),
+                               false);
+      assert(FnBodySym && "Couldn't find function body.");
+      assert(FnPtrSym && "Couldn't find function body pointer.");
+
+      TargetAddress FnBodyAddr = FnBodySym.getAddress();
+      void *FnPtrAddr = reinterpret_cast<void*>(
+          static_cast<uintptr_t>(FnPtrSym.getAddress()));
+
+      // If this is the function we're calling record the address so we can
+      // return it from this function.
+      if (SubF == &F)
+        CalledAddr = FnBodyAddr;
+
+      memcpy(FnPtrAddr, &FnBodyAddr, sizeof(uintptr_t));
+    }
+
+    return CalledAddr;
+  }
+
+  template <typename PartitionT>
+  BaseLayerModuleSetHandleT emitPartition(CODLogicalDylib &LD,
+                                          LogicalModuleHandle LMH,
+                                          const PartitionT &Partition) {
+    auto &LMResources = LD.getLogicalModuleResources(LMH);
+    Module &SrcM = *LMResources.SourceModule;
+
+    // Create the module.
+    std::string NewName = SrcM.getName();
+    for (auto *F : Partition) {
+      NewName += ".";
+      NewName += F->getName();
+    }
+
+    auto M = llvm::make_unique<Module>(NewName, SrcM.getContext());
+    M->setDataLayout(SrcM.getDataLayout());
+    ValueToValueMapTy VMap;
+    GlobalDeclMaterializer GDM(*M, &LMResources.StubsToClone);
+
+    // Create decls in the new module.
+    for (auto *F : Partition)
+      cloneFunctionDecl(*M, *F, &VMap);
+
+    // Move the function bodies.
+    for (auto *F : Partition)
+      moveFunctionBody(*F, VMap, &GDM);
+
+    // Create memory manager and symbol resolver.
+    auto MemMgr = llvm::make_unique<SectionMemoryManager>();
+    auto Resolver = createLambdaResolver(
+        [this, &LD, LMH](const std::string &Name) {
+          if (auto Symbol = LD.findSymbolInternally(LMH, Name))
+            return RuntimeDyld::SymbolInfo(Symbol.getAddress(),
+                                           Symbol.getFlags());
+          return LD.getDylibResources().ExternalSymbolResolver(Name);
+        },
+        [this, &LD, LMH](const std::string &Name) {
+          if (auto Symbol = LD.findSymbolInternally(LMH, Name))
+            return RuntimeDyld::SymbolInfo(Symbol.getAddress(),
+                                           Symbol.getFlags());
+          return RuntimeDyld::SymbolInfo(nullptr);
+        });
+    std::vector<std::unique_ptr<Module>> PartMSet;
+    PartMSet.push_back(std::move(M));
+    return BaseLayer.addModuleSet(std::move(PartMSet), std::move(MemMgr),
+                                  std::move(Resolver));
+  }
+
   BaseLayerT &BaseLayer;
   CompileCallbackMgrT &CompileCallbackMgr;
   LogicalDylibList LogicalDylibs;
+  bool CloneStubsIntoPartitions;
 };
 
-template <typename BaseLayerT, typename CompileCallbackMgrT>
-typename CompileOnDemandLayer<BaseLayerT, CompileCallbackMgrT>::
-           UncompiledPartition&
-CompileOnDemandLayer<BaseLayerT, CompileCallbackMgrT>::LogicalDylib::
-  createUncompiledPartition(LMHandle LMH, std::shared_ptr<Module> SrcM) {
-  UncompiledPartitions.push_back(
-      llvm::make_unique<UncompiledPartition>(*this, LMH, std::move(SrcM)));
-  UncompiledPartitions.back()->setID(UncompiledPartitions.size() - 1);
-  return *UncompiledPartitions.back();
-}
-
-template <typename BaseLayerT, typename CompileCallbackMgrT>
-std::unique_ptr<typename CompileOnDemandLayer<BaseLayerT, CompileCallbackMgrT>::
-                  UncompiledPartition>
-CompileOnDemandLayer<BaseLayerT, CompileCallbackMgrT>::LogicalDylib::
-  takeUPOwnership(UncompiledPartitionID ID) {
-
-  std::swap(UncompiledPartitions[ID], UncompiledPartitions.back());
-  UncompiledPartitions[ID]->setID(ID);
-  auto UP = std::move(UncompiledPartitions.back());
-  UncompiledPartitions.pop_back();
-  return UP;
-}
-
 } // End namespace orc.
 } // End namespace llvm.
 
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/LogicalDylib.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/LogicalDylib.h
new file mode 100644
index 0000000..28700ef
--- /dev/null
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/LogicalDylib.h
@@ -0,0 +1,115 @@
+//===--- LogicalDylib.h - Simulates dylib-style symbol lookup ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Simulates symbol resolution inside a dylib.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_LOGICALDYLIB_H
+#define LLVM_EXECUTIONENGINE_ORC_LOGICALDYLIB_H
+
+namespace llvm {
+namespace orc {
+
+template <typename BaseLayerT,
+          typename LogicalModuleResources,
+          typename LogicalDylibResources>
+class LogicalDylib {
+public:
+  typedef typename BaseLayerT::ModuleSetHandleT BaseLayerModuleSetHandleT;
+private:
+
+  typedef std::vector<BaseLayerModuleSetHandleT> BaseLayerHandleList;
+
+  struct LogicalModule {
+    LogicalModuleResources Resources;
+    BaseLayerHandleList BaseLayerHandles;
+  };
+  typedef std::vector<LogicalModule> LogicalModuleList;
+
+public:
+
+  typedef typename BaseLayerHandleList::iterator BaseLayerHandleIterator;
+  typedef typename LogicalModuleList::iterator LogicalModuleHandle;
+
+  LogicalDylib(BaseLayerT &BaseLayer) : BaseLayer(BaseLayer) {}
+
+  ~LogicalDylib() {
+    for (auto &LM : LogicalModules)
+      for (auto BLH : LM.BaseLayerHandles)
+        BaseLayer.removeModuleSet(BLH);
+  }
+
+  LogicalModuleHandle createLogicalModule() {
+    LogicalModules.push_back(LogicalModule());
+    return std::prev(LogicalModules.end());
+  }
+
+  void addToLogicalModule(LogicalModuleHandle LMH,
+                          BaseLayerModuleSetHandleT BaseLayerHandle) {
+    LMH->BaseLayerHandles.push_back(BaseLayerHandle);
+  }
+
+  LogicalModuleResources& getLogicalModuleResources(LogicalModuleHandle LMH) {
+    return LMH->Resources;
+  }
+
+  BaseLayerHandleIterator moduleHandlesBegin(LogicalModuleHandle LMH) {
+    return LMH->BaseLayerHandles.begin();
+  }
+
+  BaseLayerHandleIterator moduleHandlesEnd(LogicalModuleHandle LMH) {
+    return LMH->BaseLayerHandles.end();
+  }
+
+  JITSymbol findSymbolInLogicalModule(LogicalModuleHandle LMH,
+                                      const std::string &Name) {
+    for (auto BLH : LMH->BaseLayerHandles)
+      if (auto Symbol = BaseLayer.findSymbolIn(BLH, Name, false))
+        return Symbol;
+    return nullptr;
+  }
+
+  JITSymbol findSymbolInternally(LogicalModuleHandle LMH,
+                                 const std::string &Name) {
+    if (auto Symbol = findSymbolInLogicalModule(LMH, Name))
+      return Symbol;
+
+    for (auto LMI = LogicalModules.begin(), LME = LogicalModules.end();
+           LMI != LME; ++LMI) {
+      if (LMI != LMH)
+        if (auto Symbol = findSymbolInLogicalModule(LMI, Name))
+          return Symbol;
+    }
+
+    return nullptr;
+  }
+
+  JITSymbol findSymbol(const std::string &Name, bool ExportedSymbolsOnly) {
+    for (auto &LM : LogicalModules)
+      for (auto BLH : LM.BaseLayerHandles)
+        if (auto Symbol =
+            BaseLayer.findSymbolIn(BLH, Name, ExportedSymbolsOnly))
+          return Symbol;
+    return nullptr;
+  }
+
+  LogicalDylibResources& getDylibResources() { return DylibResources; }
+
+protected:
+  BaseLayerT BaseLayer;
+  LogicalModuleList LogicalModules;
+  LogicalDylibResources DylibResources;
+
+};
+
+} // End namespace orc.
+} // End namespace llvm.
+
+#endif // LLVM_EXECUTIONENGINE_ORC_LOGICALDYLIB_H
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/SectionMemoryManager.h b/contrib/llvm/include/llvm/ExecutionEngine/SectionMemoryManager.h
index 0b0dcb0..bbf9968 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/SectionMemoryManager.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/SectionMemoryManager.h
@@ -100,7 +100,7 @@ private:
   MemoryGroup RODataMem;
 };
 
-}
+} // namespace llvm
 
 #endif // LLVM_EXECUTION_ENGINE_SECTION_MEMORY_MANAGER_H
 
diff --git a/contrib/llvm/include/llvm/IR/Argument.h b/contrib/llvm/include/llvm/IR/Argument.h
index fc04fe7..12c8df5 100644
--- a/contrib/llvm/include/llvm/IR/Argument.h
+++ b/contrib/llvm/include/llvm/IR/Argument.h
@@ -131,6 +131,6 @@ public:
   }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/AssemblyAnnotationWriter.h b/contrib/llvm/include/llvm/IR/AssemblyAnnotationWriter.h
index 19e32a2..1ae3018 100644
--- a/contrib/llvm/include/llvm/IR/AssemblyAnnotationWriter.h
+++ b/contrib/llvm/include/llvm/IR/AssemblyAnnotationWriter.h
@@ -58,6 +58,6 @@ public:
   virtual void printInfoComment(const Value &, formatted_raw_ostream &) {}
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/Attributes.h b/contrib/llvm/include/llvm/IR/Attributes.h
index e2a0a7e..1d92d18 100644
--- a/contrib/llvm/include/llvm/IR/Attributes.h
+++ b/contrib/llvm/include/llvm/IR/Attributes.h
@@ -108,6 +108,7 @@ public:
     StackProtect,          ///< Stack protection.
     StackProtectReq,       ///< Stack protection required.
     StackProtectStrong,    ///< Strong Stack protection.
+    SafeStack,             ///< Safe Stack protection.
     StructRet,             ///< Hidden pointer to structure to return
     SanitizeAddress,       ///< AddressSanitizer is on.
     SanitizeThread,        ///< ThreadSanitizer is on.
@@ -574,6 +575,6 @@ AttrBuilder typeIncompatible(const Type *Ty);
 
 } // end AttributeFuncs namespace
 
-} // end llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/AutoUpgrade.h b/contrib/llvm/include/llvm/IR/AutoUpgrade.h
index a4b3c41..9ecabec6 100644
--- a/contrib/llvm/include/llvm/IR/AutoUpgrade.h
+++ b/contrib/llvm/include/llvm/IR/AutoUpgrade.h
@@ -66,6 +66,6 @@ namespace llvm {
 
   /// Upgrade a metadata string constant in place.
   void UpgradeMDStringConstant(std::string &String);
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/BasicBlock.h b/contrib/llvm/include/llvm/IR/BasicBlock.h
index 66581bf..b0fad4f 100644
--- a/contrib/llvm/include/llvm/IR/BasicBlock.h
+++ b/contrib/llvm/include/llvm/IR/BasicBlock.h
@@ -346,6 +346,6 @@ inline BasicBlock *ilist_traits<BasicBlock>::createSentinel() const {
 // Create wrappers for C Binding types (see CBindingWrapping.h).
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(BasicBlock, LLVMBasicBlockRef)
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/CFG.h b/contrib/llvm/include/llvm/IR/CFG.h
index f78220a..e6e21b4 100644
--- a/contrib/llvm/include/llvm/IR/CFG.h
+++ b/contrib/llvm/include/llvm/IR/CFG.h
@@ -396,6 +396,6 @@ template <> struct GraphTraits<Inverse<const Function*> > :
   }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/CallSite.h b/contrib/llvm/include/llvm/IR/CallSite.h
index 170d263..0270caa 100644
--- a/contrib/llvm/include/llvm/IR/CallSite.h
+++ b/contrib/llvm/include/llvm/IR/CallSite.h
@@ -27,6 +27,7 @@
 #define LLVM_IR_CALLSITE_H
 
 #include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Instructions.h"
@@ -150,6 +151,9 @@ public:
   }
 
   IterTy arg_end() const { return (*this)->op_end() - getArgumentEndOffset(); }
+  iterator_range<IterTy> args() const {
+    return iterator_range<IterTy>(arg_begin(), arg_end());
+  }
   bool arg_empty() const { return arg_end() == arg_begin(); }
   unsigned arg_size() const { return unsigned(arg_end() - arg_begin()); }
 
@@ -393,6 +397,6 @@ public:
   ImmutableCallSite(CallSite CS) : CallSiteBase(CS.getInstruction()) {}
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/CallingConv.h b/contrib/llvm/include/llvm/IR/CallingConv.h
index 9872e6e..846e58c 100644
--- a/contrib/llvm/include/llvm/IR/CallingConv.h
+++ b/contrib/llvm/include/llvm/IR/CallingConv.h
@@ -146,8 +146,8 @@ namespace CallingConv {
     /// in SSE registers.
     X86_VectorCall = 80
   };
-} // End CallingConv namespace
+} // namespace CallingConv
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/Comdat.h b/contrib/llvm/include/llvm/IR/Comdat.h
index 4d4c15f..50b11be 100644
--- a/contrib/llvm/include/llvm/IR/Comdat.h
+++ b/contrib/llvm/include/llvm/IR/Comdat.h
@@ -61,6 +61,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const Comdat &C) {
   return OS;
 }
 
-} // end llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/Constant.h b/contrib/llvm/include/llvm/IR/Constant.h
index 75499e0..7db09d0 100644
--- a/contrib/llvm/include/llvm/IR/Constant.h
+++ b/contrib/llvm/include/llvm/IR/Constant.h
@@ -187,6 +187,6 @@ public:
   }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/ConstantFolder.h b/contrib/llvm/include/llvm/IR/ConstantFolder.h
index fb6ca3b..4e87cd0 100644
--- a/contrib/llvm/include/llvm/IR/ConstantFolder.h
+++ b/contrib/llvm/include/llvm/IR/ConstantFolder.h
@@ -240,6 +240,6 @@ public:
   }
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/ConstantRange.h b/contrib/llvm/include/llvm/IR/ConstantRange.h
index 9ded3ca..8a7488e 100644
--- a/contrib/llvm/include/llvm/IR/ConstantRange.h
+++ b/contrib/llvm/include/llvm/IR/ConstantRange.h
@@ -273,6 +273,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const ConstantRange &CR) {
   return OS;
 }
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/Constants.h b/contrib/llvm/include/llvm/IR/Constants.h
index e97bda5..b2ef77b 100644
--- a/contrib/llvm/include/llvm/IR/Constants.h
+++ b/contrib/llvm/include/llvm/IR/Constants.h
@@ -1232,6 +1232,6 @@ public:
   }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/DataLayout.h b/contrib/llvm/include/llvm/IR/DataLayout.h
index 3e1f974..81cf665 100644
--- a/contrib/llvm/include/llvm/IR/DataLayout.h
+++ b/contrib/llvm/include/llvm/IR/DataLayout.h
@@ -542,6 +542,6 @@ inline uint64_t DataLayout::getTypeSizeInBits(Type *Ty) const {
   }
 }
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/DebugInfoMetadata.h b/contrib/llvm/include/llvm/IR/DebugInfoMetadata.h
index 0125de5..03dd901 100644
--- a/contrib/llvm/include/llvm/IR/DebugInfoMetadata.h
+++ b/contrib/llvm/include/llvm/IR/DebugInfoMetadata.h
@@ -2092,7 +2092,7 @@ class DIObjCProperty : public DINode {
   static DIObjCProperty *
   getImpl(LLVMContext &Context, StringRef Name, DIFile *File, unsigned Line,
           StringRef GetterName, StringRef SetterName, unsigned Attributes,
-          DIType *Type, StorageType Storage, bool ShouldCreate = true) {
+          DITypeRef Type, StorageType Storage, bool ShouldCreate = true) {
     return getImpl(Context, getCanonicalMDString(Context, Name), File, Line,
                    getCanonicalMDString(Context, GetterName),
                    getCanonicalMDString(Context, SetterName), Attributes, Type,
@@ -2114,7 +2114,7 @@ public:
   DEFINE_MDNODE_GET(DIObjCProperty,
                     (StringRef Name, DIFile *File, unsigned Line,
                      StringRef GetterName, StringRef SetterName,
-                     unsigned Attributes, DIType *Type),
+                     unsigned Attributes, DITypeRef Type),
                     (Name, File, Line, GetterName, SetterName, Attributes,
                      Type))
   DEFINE_MDNODE_GET(DIObjCProperty,
@@ -2132,12 +2132,7 @@ public:
   DIFile *getFile() const { return cast_or_null<DIFile>(getRawFile()); }
   StringRef getGetterName() const { return getStringOperand(2); }
   StringRef getSetterName() const { return getStringOperand(3); }
-
-  /// \brief Get the type.
-  ///
-  /// \note Objective-C doesn't have an ODR, so there is no benefit in storing
-  /// a type ref here.
-  DIType *getType() const { return cast_or_null<DIType>(getRawType()); }
+  DITypeRef getType() const { return DITypeRef(getRawType()); }
 
   StringRef getFilename() const {
     if (auto *F = getFile())
diff --git a/contrib/llvm/include/llvm/IR/DerivedTypes.h b/contrib/llvm/include/llvm/IR/DerivedTypes.h
index 38f1af0..9f2671a 100644
--- a/contrib/llvm/include/llvm/IR/DerivedTypes.h
+++ b/contrib/llvm/include/llvm/IR/DerivedTypes.h
@@ -140,7 +140,8 @@ public:
     return T->getTypeID() == FunctionTyID;
   }
 };
-
+static_assert(AlignOf<FunctionType>::Alignment >= AlignOf<Type *>::Alignment,
+              "Alignment sufficient for objects appended to FunctionType");
 
 /// CompositeType - Common super class of ArrayType, StructType, PointerType
 /// and VectorType.
@@ -476,6 +477,6 @@ public:
   }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/DiagnosticInfo.h b/contrib/llvm/include/llvm/IR/DiagnosticInfo.h
index 6db5a40..f38313f 100644
--- a/contrib/llvm/include/llvm/IR/DiagnosticInfo.h
+++ b/contrib/llvm/include/llvm/IR/DiagnosticInfo.h
@@ -32,6 +32,7 @@ class LLVMContextImpl;
 class Twine;
 class Value;
 class DebugLoc;
+class SMDiagnostic;
 
 /// \brief Defines the different supported severity of a diagnostic.
 enum DiagnosticSeverity {
@@ -56,6 +57,7 @@ enum DiagnosticKind {
   DK_OptimizationRemarkMissed,
   DK_OptimizationRemarkAnalysis,
   DK_OptimizationFailure,
+  DK_MIRParser,
   DK_FirstPluginKind
 };
 
@@ -386,6 +388,24 @@ public:
   bool isEnabled() const override;
 };
 
+/// Diagnostic information for machine IR parser.
+class DiagnosticInfoMIRParser : public DiagnosticInfo {
+  const SMDiagnostic &Diagnostic;
+
+public:
+  DiagnosticInfoMIRParser(DiagnosticSeverity Severity,
+                          const SMDiagnostic &Diagnostic)
+      : DiagnosticInfo(DK_MIRParser, Severity), Diagnostic(Diagnostic) {}
+
+  const SMDiagnostic &getDiagnostic() const { return Diagnostic; }
+
+  void print(DiagnosticPrinter &DP) const override;
+
+  static bool classof(const DiagnosticInfo *DI) {
+    return DI->getKind() == DK_MIRParser;
+  }
+};
+
 // Create wrappers for C Binding types (see CBindingWrapping.h).
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(DiagnosticInfo, LLVMDiagnosticInfoRef)
 
diff --git a/contrib/llvm/include/llvm/IR/DiagnosticPrinter.h b/contrib/llvm/include/llvm/IR/DiagnosticPrinter.h
index db5779a..735e3ad 100644
--- a/contrib/llvm/include/llvm/IR/DiagnosticPrinter.h
+++ b/contrib/llvm/include/llvm/IR/DiagnosticPrinter.h
@@ -22,6 +22,7 @@ namespace llvm {
 // Forward declarations.
 class Module;
 class raw_ostream;
+class SMDiagnostic;
 class StringRef;
 class Twine;
 class Value;
@@ -51,6 +52,9 @@ public:
   // IR related types.
   virtual DiagnosticPrinter &operator<<(const Value &V) = 0;
   virtual DiagnosticPrinter &operator<<(const Module &M) = 0;
+
+  // Other types.
+  virtual DiagnosticPrinter &operator<<(const SMDiagnostic &Diag) = 0;
 };
 
 /// \brief Basic diagnostic printer that uses an underlying raw_ostream.
@@ -81,6 +85,9 @@ public:
   // IR related types.
   DiagnosticPrinter &operator<<(const Value &V) override;
   DiagnosticPrinter &operator<<(const Module &M) override;
+
+  // Other types.
+  DiagnosticPrinter &operator<<(const SMDiagnostic &Diag) override;
 };
 } // End namespace llvm
 
diff --git a/contrib/llvm/include/llvm/IR/Dominators.h b/contrib/llvm/include/llvm/IR/Dominators.h
index c1f208e..93f5ede 100644
--- a/contrib/llvm/include/llvm/IR/Dominators.h
+++ b/contrib/llvm/include/llvm/IR/Dominators.h
@@ -230,6 +230,6 @@ public:
   void print(raw_ostream &OS, const Module *M = nullptr) const override;
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/Function.h b/contrib/llvm/include/llvm/IR/Function.h
index 6c228ea..f66ac0b 100644
--- a/contrib/llvm/include/llvm/IR/Function.h
+++ b/contrib/llvm/include/llvm/IR/Function.h
@@ -25,6 +25,7 @@
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/OperandTraits.h"
 #include "llvm/Support/Compiler.h"
 
 namespace llvm {
@@ -119,11 +120,22 @@ private:
 public:
   static Function *Create(FunctionType *Ty, LinkageTypes Linkage,
                           const Twine &N = "", Module *M = nullptr) {
-    return new(0) Function(Ty, Linkage, N, M);
+    return new(1) Function(Ty, Linkage, N, M);
   }
 
   ~Function() override;
 
+  /// \brief Provide fast operand accessors
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  /// \brief Get the personality function associated with this function.
+  bool hasPersonalityFn() const { return getNumOperands() != 0; }
+  Constant *getPersonalityFn() const {
+    assert(hasPersonalityFn());
+    return cast<Constant>(Op<0>());
+  }
+  void setPersonalityFn(Constant *C);
+
   Type *getReturnType() const;           // Return the type of the ret val
   FunctionType *getFunctionType() const; // Return the FunctionType for me
 
@@ -601,6 +613,11 @@ ilist_traits<Argument>::getSymTab(Function *F) {
   return F ? &F->getValueSymbolTable() : nullptr;
 }
 
-} // End llvm namespace
+template <>
+struct OperandTraits<Function> : public OptionalOperandTraits<Function> {};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(Function, Value)
+
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/GVMaterializer.h b/contrib/llvm/include/llvm/IR/GVMaterializer.h
index 1d6c915..433de3f 100644
--- a/contrib/llvm/include/llvm/IR/GVMaterializer.h
+++ b/contrib/llvm/include/llvm/IR/GVMaterializer.h
@@ -59,6 +59,6 @@ public:
   virtual std::vector<StructType *> getIdentifiedStructTypes() const = 0;
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/GlobalAlias.h b/contrib/llvm/include/llvm/IR/GlobalAlias.h
index ce73b7a..2316749 100644
--- a/contrib/llvm/include/llvm/IR/GlobalAlias.h
+++ b/contrib/llvm/include/llvm/IR/GlobalAlias.h
@@ -118,6 +118,6 @@ struct OperandTraits<GlobalAlias> :
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GlobalAlias, Constant)
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/GlobalObject.h b/contrib/llvm/include/llvm/IR/GlobalObject.h
index f055241..5f58c9c 100644
--- a/contrib/llvm/include/llvm/IR/GlobalObject.h
+++ b/contrib/llvm/include/llvm/IR/GlobalObject.h
@@ -71,6 +71,6 @@ public:
   }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/GlobalValue.h b/contrib/llvm/include/llvm/IR/GlobalValue.h
index 1dfe0c2..5e1c5ff 100644
--- a/contrib/llvm/include/llvm/IR/GlobalValue.h
+++ b/contrib/llvm/include/llvm/IR/GlobalValue.h
@@ -83,11 +83,12 @@ protected:
 
   unsigned ThreadLocal : 3; // Is this symbol "Thread Local", if so, what is
                             // the desired model?
+  static const unsigned GlobalValueSubClassDataBits = 19;
 
 private:
   // Give subclasses access to what otherwise would be wasted padding.
   // (19 + 3 + 2 + 1 + 2 + 5) == 32.
-  unsigned SubClassData : 19;
+  unsigned SubClassData : GlobalValueSubClassDataBits;
 
 protected:
   /// \brief The intrinsic ID for this subclass (which must be a Function).
@@ -98,12 +99,11 @@ protected:
   /// This is stored here to save space in Function on 64-bit hosts.
   Intrinsic::ID IntID;
 
-  static const unsigned GlobalValueSubClassDataBits = 19;
   unsigned getGlobalValueSubClassData() const {
     return SubClassData;
   }
   void setGlobalValueSubClassData(unsigned V) {
-    assert(V < (1 << 19) && "It will not fit");
+    assert(V < (1 << GlobalValueSubClassDataBits) && "It will not fit");
     SubClassData = V;
   }
 
@@ -367,6 +367,6 @@ public:
   }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/GlobalVariable.h b/contrib/llvm/include/llvm/IR/GlobalVariable.h
index 9f57705..4269a70 100644
--- a/contrib/llvm/include/llvm/IR/GlobalVariable.h
+++ b/contrib/llvm/include/llvm/IR/GlobalVariable.h
@@ -67,7 +67,8 @@ public:
                  bool isExternallyInitialized = false);
 
   ~GlobalVariable() override {
-    NumOperands = 1; // FIXME: needed by operator delete
+    // FIXME: needed by operator delete
+    setGlobalVariableNumOperands(1);
   }
 
   /// Provide fast operand accessors
@@ -182,6 +183,6 @@ struct OperandTraits<GlobalVariable> :
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GlobalVariable, Value)
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/IRBuilder.h b/contrib/llvm/include/llvm/IR/IRBuilder.h
index adf6924..0472ec5 100644
--- a/contrib/llvm/include/llvm/IR/IRBuilder.h
+++ b/contrib/llvm/include/llvm/IR/IRBuilder.h
@@ -245,7 +245,8 @@ public:
   /// filled in with the null terminated string value specified.  The new global
   /// variable will be marked mergable with any others of the same contents.  If
   /// Name is specified, it is the name of the global variable created.
-  GlobalVariable *CreateGlobalString(StringRef Str, const Twine &Name = "");
+  GlobalVariable *CreateGlobalString(StringRef Str, const Twine &Name = "",
+                                     unsigned AddressSpace = 0);
 
   /// \brief Get a constant value representing either true or false.
   ConstantInt *getInt1(bool V) {
@@ -1191,8 +1192,9 @@ public:
 
   /// \brief Same as CreateGlobalString, but return a pointer with "i8*" type
   /// instead of a pointer to array of i8.
-  Value *CreateGlobalStringPtr(StringRef Str, const Twine &Name = "") {
-    GlobalVariable *gv = CreateGlobalString(Str, Name);
+  Value *CreateGlobalStringPtr(StringRef Str, const Twine &Name = "",
+                               unsigned AddressSpace = 0) {
+    GlobalVariable *gv = CreateGlobalString(Str, Name, AddressSpace);
     Value *zero = ConstantInt::get(Type::getInt32Ty(Context), 0);
     Value *Args[] = { zero, zero };
     return CreateInBoundsGEP(gv->getValueType(), gv, Args, Name);
@@ -1556,9 +1558,9 @@ public:
     return Insert(InsertValueInst::Create(Agg, Val, Idxs), Name);
   }
 
-  LandingPadInst *CreateLandingPad(Type *Ty, Value *PersFn, unsigned NumClauses,
+  LandingPadInst *CreateLandingPad(Type *Ty, unsigned NumClauses,
                                    const Twine &Name = "") {
-    return Insert(LandingPadInst::Create(Ty, PersFn, NumClauses), Name);
+    return Insert(LandingPadInst::Create(Ty, NumClauses), Name);
   }
 
   //===--------------------------------------------------------------------===//
@@ -1677,6 +1679,6 @@ public:
 // Create wrappers for C Binding types (see CBindingWrapping.h).
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(IRBuilder<>, LLVMBuilderRef)
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/IRPrintingPasses.h b/contrib/llvm/include/llvm/IR/IRPrintingPasses.h
index 5f1d56f..3969c83 100644
--- a/contrib/llvm/include/llvm/IR/IRPrintingPasses.h
+++ b/contrib/llvm/include/llvm/IR/IRPrintingPasses.h
@@ -83,6 +83,6 @@ public:
   static StringRef name() { return "PrintFunctionPass"; }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/InlineAsm.h b/contrib/llvm/include/llvm/IR/InlineAsm.h
index 08b5102..b5174c8 100644
--- a/contrib/llvm/include/llvm/IR/InlineAsm.h
+++ b/contrib/llvm/include/llvm/IR/InlineAsm.h
@@ -358,6 +358,6 @@ public:
 
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/InstIterator.h b/contrib/llvm/include/llvm/IR/InstIterator.h
index f3ce649..a73d489 100644
--- a/contrib/llvm/include/llvm/IR/InstIterator.h
+++ b/contrib/llvm/include/llvm/IR/InstIterator.h
@@ -153,6 +153,6 @@ inline iterator_range<const_inst_iterator> inst_range(const Function &F) {
   return iterator_range<const_inst_iterator>(inst_begin(F), inst_end(F));
 }
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/InstVisitor.h b/contrib/llvm/include/llvm/IR/InstVisitor.h
index 581e860..0eb337e 100644
--- a/contrib/llvm/include/llvm/IR/InstVisitor.h
+++ b/contrib/llvm/include/llvm/IR/InstVisitor.h
@@ -284,6 +284,6 @@ private:
 
 #undef DELEGATE
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/InstrTypes.h b/contrib/llvm/include/llvm/IR/InstrTypes.h
index 108b9eb..9df7043 100644
--- a/contrib/llvm/include/llvm/IR/InstrTypes.h
+++ b/contrib/llvm/include/llvm/IR/InstrTypes.h
@@ -894,6 +894,6 @@ struct OperandTraits<CmpInst> : public FixedNumOperandTraits<CmpInst, 2> {
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CmpInst, Value)
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/Instruction.h b/contrib/llvm/include/llvm/IR/Instruction.h
index 752c3f0..6fea926 100644
--- a/contrib/llvm/include/llvm/IR/Instruction.h
+++ b/contrib/llvm/include/llvm/IR/Instruction.h
@@ -536,6 +536,6 @@ public:
   enum { NumLowBitsAvailable = 2 };
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/Instructions.h b/contrib/llvm/include/llvm/IR/Instructions.h
index 8d8c530..369b7db 100644
--- a/contrib/llvm/include/llvm/IR/Instructions.h
+++ b/contrib/llvm/include/llvm/IR/Instructions.h
@@ -2226,7 +2226,7 @@ class PHINode : public Instruction {
   PHINode(const PHINode &PN);
   // allocate space for exactly zero operands
   void *operator new(size_t s) {
-    return User::operator new(s, 0);
+    return User::operator new(s);
   }
   explicit PHINode(Type *Ty, unsigned NumReservedValues,
                    const Twine &NameStr = "",
@@ -2234,7 +2234,7 @@ class PHINode : public Instruction {
     : Instruction(Ty, Instruction::PHI, nullptr, 0, InsertBefore),
       ReservedSpace(NumReservedValues) {
     setName(NameStr);
-    OperandList = allocHungoffUses(ReservedSpace);
+    allocHungoffUses(ReservedSpace);
   }
 
   PHINode(Type *Ty, unsigned NumReservedValues, const Twine &NameStr,
@@ -2242,13 +2242,15 @@ class PHINode : public Instruction {
     : Instruction(Ty, Instruction::PHI, nullptr, 0, InsertAtEnd),
       ReservedSpace(NumReservedValues) {
     setName(NameStr);
-    OperandList = allocHungoffUses(ReservedSpace);
+    allocHungoffUses(ReservedSpace);
   }
 protected:
   // allocHungoffUses - this is more complicated than the generic
   // User::allocHungoffUses, because we have to allocate Uses for the incoming
   // values and pointers to the incoming blocks, all in one allocation.
-  Use *allocHungoffUses(unsigned) const;
+  void allocHungoffUses(unsigned N) {
+    User::allocHungoffUses(N, /* IsPhi */ true);
+  }
 
   PHINode *clone_impl() const override;
 public:
@@ -2263,7 +2265,6 @@ public:
                          const Twine &NameStr, BasicBlock *InsertAtEnd) {
     return new PHINode(Ty, NumReservedValues, NameStr, InsertAtEnd);
   }
-  ~PHINode() override;
 
   /// Provide fast operand accessors
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
@@ -2349,12 +2350,12 @@ public:
     assert(BB && "PHI node got a null basic block!");
     assert(getType() == V->getType() &&
            "All operands to PHI node must be the same type as the PHI node!");
-    if (NumOperands == ReservedSpace)
+    if (getNumOperands() == ReservedSpace)
       growOperands();  // Get more space!
     // Initialize some new operands.
-    ++NumOperands;
-    setIncomingValue(NumOperands - 1, V);
-    setIncomingBlock(NumOperands - 1, BB);
+    setNumHungOffUseOperands(getNumOperands() + 1);
+    setIncomingValue(getNumOperands() - 1, V);
+    setIncomingBlock(getNumOperands() - 1, BB);
   }
 
   /// removeIncomingValue - Remove an incoming value.  This is useful if a
@@ -2433,38 +2434,30 @@ private:
   void *operator new(size_t, unsigned) = delete;
   // Allocate space for exactly zero operands.
   void *operator new(size_t s) {
-    return User::operator new(s, 0);
+    return User::operator new(s);
   }
   void growOperands(unsigned Size);
-  void init(Value *PersFn, unsigned NumReservedValues, const Twine &NameStr);
+  void init(unsigned NumReservedValues, const Twine &NameStr);
+
+  explicit LandingPadInst(Type *RetTy, unsigned NumReservedValues,
+                          const Twine &NameStr, Instruction *InsertBefore);
+  explicit LandingPadInst(Type *RetTy, unsigned NumReservedValues,
+                          const Twine &NameStr, BasicBlock *InsertAtEnd);
 
-  explicit LandingPadInst(Type *RetTy, Value *PersonalityFn,
-                          unsigned NumReservedValues, const Twine &NameStr,
-                          Instruction *InsertBefore);
-  explicit LandingPadInst(Type *RetTy, Value *PersonalityFn,
-                          unsigned NumReservedValues, const Twine &NameStr,
-                          BasicBlock *InsertAtEnd);
 protected:
   LandingPadInst *clone_impl() const override;
 public:
   /// Constructors - NumReservedClauses is a hint for the number of incoming
   /// clauses that this landingpad will have (use 0 if you really have no idea).
-  static LandingPadInst *Create(Type *RetTy, Value *PersonalityFn,
-                                unsigned NumReservedClauses,
+  static LandingPadInst *Create(Type *RetTy, unsigned NumReservedClauses,
                                 const Twine &NameStr = "",
                                 Instruction *InsertBefore = nullptr);
-  static LandingPadInst *Create(Type *RetTy, Value *PersonalityFn,
-                                unsigned NumReservedClauses,
+  static LandingPadInst *Create(Type *RetTy, unsigned NumReservedClauses,
                                 const Twine &NameStr, BasicBlock *InsertAtEnd);
-  ~LandingPadInst() override;
 
   /// Provide fast operand accessors
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
-  /// getPersonalityFn - Get the personality function associated with this
-  /// landing pad.
-  Value *getPersonalityFn() const { return getOperand(0); }
-
   /// isCleanup - Return 'true' if this landingpad instruction is a
   /// cleanup. I.e., it should be run when unwinding even if its landing pad
   /// doesn't catch the exception.
@@ -2482,21 +2475,21 @@ public:
   /// Get the value of the clause at index Idx. Use isCatch/isFilter to
   /// determine what type of clause this is.
   Constant *getClause(unsigned Idx) const {
-    return cast<Constant>(OperandList[Idx + 1]);
+    return cast<Constant>(getOperandList()[Idx]);
   }
 
   /// isCatch - Return 'true' if the clause and index Idx is a catch clause.
   bool isCatch(unsigned Idx) const {
-    return !isa<ArrayType>(OperandList[Idx + 1]->getType());
+    return !isa<ArrayType>(getOperandList()[Idx]->getType());
   }
 
   /// isFilter - Return 'true' if the clause and index Idx is a filter clause.
   bool isFilter(unsigned Idx) const {
-    return isa<ArrayType>(OperandList[Idx + 1]->getType());
+    return isa<ArrayType>(getOperandList()[Idx]->getType());
   }
 
   /// getNumClauses - Get the number of clauses for this landing pad.
-  unsigned getNumClauses() const { return getNumOperands() - 1; }
+  unsigned getNumClauses() const { return getNumOperands(); }
 
   /// reserveClauses - Grow the size of the operand list to accommodate the new
   /// number of clauses.
@@ -2512,7 +2505,7 @@ public:
 };
 
 template <>
-struct OperandTraits<LandingPadInst> : public HungoffOperandTraits<2> {
+struct OperandTraits<LandingPadInst> : public HungoffOperandTraits<1> {
 };
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(LandingPadInst, Value)
@@ -2708,7 +2701,7 @@ class SwitchInst : public TerminatorInst {
   void growOperands();
   // allocate space for exactly zero operands
   void *operator new(size_t s) {
-    return User::operator new(s, 0);
+    return User::operator new(s);
   }
   /// SwitchInst ctor - Create a new switch instruction, specifying a value to
   /// switch on and a default destination.  The number of additional cases can
@@ -2855,8 +2848,6 @@ public:
     return new SwitchInst(Value, Default, NumCases, InsertAtEnd);
   }
 
-  ~SwitchInst() override;
-
   /// Provide fast operand accessors
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
@@ -3017,7 +3008,7 @@ class IndirectBrInst : public TerminatorInst {
   void growOperands();
   // allocate space for exactly zero operands
   void *operator new(size_t s) {
-    return User::operator new(s, 0);
+    return User::operator new(s);
   }
   /// IndirectBrInst ctor - Create a new indirectbr instruction, specifying an
   /// Address to jump to.  The number of expected destinations can be specified
@@ -3041,7 +3032,6 @@ public:
                                 BasicBlock *InsertAtEnd) {
     return new IndirectBrInst(Address, NumDests, InsertAtEnd);
   }
-  ~IndirectBrInst() override;
 
   /// Provide fast operand accessors.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
@@ -3993,6 +3983,6 @@ public:
   }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicInst.h b/contrib/llvm/include/llvm/IR/IntrinsicInst.h
index 2c8b6eb..102cbef 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/contrib/llvm/include/llvm/IR/IntrinsicInst.h
@@ -372,6 +372,6 @@ namespace llvm {
       return cast<ConstantInt>(const_cast<Value *>(getArgOperand(3)));
     }
   };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/Intrinsics.h b/contrib/llvm/include/llvm/IR/Intrinsics.h
index e12ccac..01781d5 100644
--- a/contrib/llvm/include/llvm/IR/Intrinsics.h
+++ b/contrib/llvm/include/llvm/IR/Intrinsics.h
@@ -52,6 +52,11 @@ namespace Intrinsic {
   /// Returns true if the intrinsic can be overloaded.
   bool isOverloaded(ID id);
 
+  /// Returns true if the intrinsic is a leaf, i.e. it does not make any calls
+  /// itself.  Most intrinsics are leafs, the exceptions being the patchpoint
+  /// and statepoint intrinsics. These call (or invoke) their "target" argument.
+  bool isLeaf(ID id);
+
   /// Return the attributes for an intrinsic.
   AttributeSet getAttributes(LLVMContext &C, ID id);
 
@@ -121,8 +126,8 @@ namespace Intrinsic {
   /// of IITDescriptors.
   void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl<IITDescriptor> &T);
 
-} // End Intrinsic namespace
+} // namespace Intrinsic
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/Intrinsics.td b/contrib/llvm/include/llvm/IR/Intrinsics.td
index beeffde..e6f6d0f 100644
--- a/contrib/llvm/include/llvm/IR/Intrinsics.td
+++ b/contrib/llvm/include/llvm/IR/Intrinsics.td
@@ -428,8 +428,7 @@ def int_eh_endcatch : Intrinsic<[], []>;
 // Represents the list of actions to take when an exception is thrown.
 def int_eh_actions : Intrinsic<[llvm_ptr_ty], [llvm_vararg_ty], []>;
 
-def int_eh_exceptioncode : Intrinsic<[llvm_i32_ty], []>;
-def int_eh_exceptioninfo : Intrinsic<[llvm_ptr_ty], []>;
+def int_eh_exceptioncode : Intrinsic<[llvm_i32_ty], [], [IntrReadMem]>;
 
 // __builtin_unwind_init is an undocumented GCC intrinsic that causes all
 // callee-saved registers to be saved and restored (regardless of whether they
@@ -636,6 +635,6 @@ include "llvm/IR/IntrinsicsXCore.td"
 include "llvm/IR/IntrinsicsHexagon.td"
 include "llvm/IR/IntrinsicsNVVM.td"
 include "llvm/IR/IntrinsicsMips.td"
-include "llvm/IR/IntrinsicsR600.td"
+include "llvm/IR/IntrinsicsAMDGPU.td"
 include "llvm/IR/IntrinsicsBPF.td"
 include "llvm/IR/IntrinsicsSystemZ.td"
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsR600.td b/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 5055667..510e5ad 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicsR600.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1,4 +1,4 @@
-//===- IntrinsicsR600.td - Defines R600 intrinsics ---------*- tablegen -*-===//
+//===- IntrinsicsAMDGPU.td - Defines AMDGPU intrinsics -----*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/contrib/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 7965469..d680085 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -608,6 +608,11 @@ let TargetPrefix = "ppc" in {  // All PPC intrinsics start with "llvm.ppc.".
   def int_ppc_altivec_vsel : GCCBuiltin<"__builtin_altivec_vsel_4si">,
               Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, 
                          llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vgbbd : GCCBuiltin<"__builtin_altivec_vgbbd">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vbpermq : GCCBuiltin<"__builtin_altivec_vbpermq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
 }
 
 def int_ppc_altivec_vexptefp  : PowerPC_Vec_FF_Intrinsic<"vexptefp">;
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsX86.td b/contrib/llvm/include/llvm/IR/IntrinsicsX86.td
index 0826aa2..1bed31c 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -18,9 +18,12 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 }
 
 //===----------------------------------------------------------------------===//
-// SEH LSDA for Windows
+// SEH intrinsics for Windows
 let TargetPrefix = "x86" in {
   def int_x86_seh_lsda : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], [IntrNoMem]>;
+  def int_x86_seh_exceptioninfo : Intrinsic<[llvm_ptr_ty],
+                                            [llvm_ptr_ty, llvm_ptr_ty],
+					    [IntrReadMem]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1454,30 +1457,150 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx2_pmins_d : GCCBuiltin<"__builtin_ia32_pminsd256">,
               Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
                          llvm_v8i32_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx512_mask_pmaxs_b_128 : GCCBuiltin<"__builtin_ia32_pmaxsb128_mask">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, 
+                         llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmaxs_b_256 : GCCBuiltin<"__builtin_ia32_pmaxsb256_mask">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, 
+                         llvm_v32i8_ty,  llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmaxs_b_512 : GCCBuiltin<"__builtin_ia32_pmaxsb512_mask">,
+              Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty, 
+                         llvm_v64i8_ty,  llvm_i64_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmaxu_b_128 : GCCBuiltin<"__builtin_ia32_pmaxub128_mask">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, 
+                         llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmaxu_b_256 : GCCBuiltin<"__builtin_ia32_pmaxub256_mask">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, 
+                         llvm_v32i8_ty,  llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmaxu_b_512 : GCCBuiltin<"__builtin_ia32_pmaxub512_mask">,
+              Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty, 
+                         llvm_v64i8_ty,  llvm_i64_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmaxs_w_128 : GCCBuiltin<"__builtin_ia32_pmaxsw128_mask">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, 
+                         llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmaxs_w_256 : GCCBuiltin<"__builtin_ia32_pmaxsw256_mask">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, 
+                         llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmaxs_w_512 : GCCBuiltin<"__builtin_ia32_pmaxsw512_mask">,
+              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, 
+                   llvm_v32i16_ty,  llvm_i32_ty],[IntrNoMem]>;
+  def int_x86_avx512_mask_pmaxu_w_128 : GCCBuiltin<"__builtin_ia32_pmaxuw128_mask">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, 
+                         llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmaxu_w_256 : GCCBuiltin<"__builtin_ia32_pmaxuw256_mask">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, 
+                         llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmaxu_w_512 : GCCBuiltin<"__builtin_ia32_pmaxuw512_mask">,
+              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
+                         llvm_v32i16_ty,  llvm_i32_ty],[IntrNoMem]>;
+  def int_x86_avx512_mask_pmins_b_128 : GCCBuiltin<"__builtin_ia32_pminsb128_mask">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, 
+                         llvm_v16i8_ty,llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmins_b_256 : GCCBuiltin<"__builtin_ia32_pminsb256_mask">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, 
+                         llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmins_b_512 : GCCBuiltin<"__builtin_ia32_pminsb512_mask">,
+              Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
+                         llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pminu_b_128 : GCCBuiltin<"__builtin_ia32_pminub128_mask">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
+                         llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pminu_b_256 : GCCBuiltin<"__builtin_ia32_pminub256_mask">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, 
+                         llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pminu_b_512 : GCCBuiltin<"__builtin_ia32_pminub512_mask">,
+              Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty, 
+                         llvm_v64i8_ty,  llvm_i64_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmins_w_128 : GCCBuiltin<"__builtin_ia32_pminsw128_mask">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, 
+                         llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmins_w_256 : GCCBuiltin<"__builtin_ia32_pminsw256_mask">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, 
+                         llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmins_w_512 : GCCBuiltin<"__builtin_ia32_pminsw512_mask">,
+              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
+                         llvm_v32i16_ty,  llvm_i32_ty],[IntrNoMem]>;
+  def int_x86_avx512_mask_pminu_w_128 : GCCBuiltin<"__builtin_ia32_pminuw128_mask">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, 
+                         llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pminu_w_256 : GCCBuiltin<"__builtin_ia32_pminuw256_mask">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, 
+                         llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pminu_w_512 : GCCBuiltin<"__builtin_ia32_pminuw512_mask">,
+              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, 
+                         llvm_v32i16_ty,  llvm_i32_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_pmaxu_d_512 : GCCBuiltin<"__builtin_ia32_pmaxud512_mask">,
               Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
                          llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmaxu_d_256 : GCCBuiltin<"__builtin_ia32_pmaxud256_mask">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
+                         llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmaxu_d_128 : GCCBuiltin<"__builtin_ia32_pmaxud128_mask">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
+                         llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_pmaxs_d_512 : GCCBuiltin<"__builtin_ia32_pmaxsd512_mask">,
               Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
                          llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmaxs_d_256 : GCCBuiltin<"__builtin_ia32_pmaxsd256_mask">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
+                         llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmaxs_d_128 : GCCBuiltin<"__builtin_ia32_pmaxsd128_mask">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
+                         llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_pmaxu_q_512 : GCCBuiltin<"__builtin_ia32_pmaxuq512_mask">,
               Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
                          llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmaxu_q_256 : GCCBuiltin<"__builtin_ia32_pmaxuq256_mask">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
+                         llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmaxu_q_128 : GCCBuiltin<"__builtin_ia32_pmaxuq128_mask">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
+                         llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_pmaxs_q_512 : GCCBuiltin<"__builtin_ia32_pmaxsq512_mask">,
               Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
                          llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmaxs_q_256 : GCCBuiltin<"__builtin_ia32_pmaxsq256_mask">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
+                         llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmaxs_q_128 : GCCBuiltin<"__builtin_ia32_pmaxsq128_mask">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
+                         llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_pminu_d_512 : GCCBuiltin<"__builtin_ia32_pminud512_mask">,
               Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
                          llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pminu_d_256 : GCCBuiltin<"__builtin_ia32_pminud256_mask">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
+                         llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pminu_d_128 : GCCBuiltin<"__builtin_ia32_pminud128_mask">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
+                         llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_pmins_d_512 : GCCBuiltin<"__builtin_ia32_pminsd512_mask">,
               Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
                          llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmins_d_256 : GCCBuiltin<"__builtin_ia32_pminsd256_mask">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
+                         llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmins_d_128 : GCCBuiltin<"__builtin_ia32_pminsd128_mask">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
+                         llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_pminu_q_512 : GCCBuiltin<"__builtin_ia32_pminuq512_mask">,
               Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
                          llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pminu_q_256 : GCCBuiltin<"__builtin_ia32_pminuq256_mask">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
+                         llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pminu_q_128 : GCCBuiltin<"__builtin_ia32_pminuq128_mask">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
+                         llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_pmins_q_512 : GCCBuiltin<"__builtin_ia32_pminsq512_mask">,
               Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
                          llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmins_q_256 : GCCBuiltin<"__builtin_ia32_pminsq256_mask">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
+                         llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmins_q_128 : GCCBuiltin<"__builtin_ia32_pminsq128_mask">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
+                         llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
 }
 
 // Integer shift ops.
@@ -2974,12 +3097,12 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
   def int_x86_avx512_cvttss2usi64 : GCCBuiltin<"__builtin_ia32_cvttss2usi64">,
               Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_avx512_cvtusi2ss : GCCBuiltin<"__builtin_ia32_cvtusi2ss">,
+  def int_x86_avx512_cvtusi2ss : GCCBuiltin<"__builtin_ia32_cvtusi2ss32">,
               Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_cvtusi642ss : GCCBuiltin<"__builtin_ia32_cvtusi642ss">,
+                         llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_cvtusi642ss : GCCBuiltin<"__builtin_ia32_cvtusi2ss64">,
               Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_i64_ty], [IntrNoMem]>;
+                         llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
 
   def int_x86_avx512_cvtsd2usi : GCCBuiltin<"__builtin_ia32_cvtsd2usi">,
               Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
@@ -2989,12 +3112,25 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
   def int_x86_avx512_cvttsd2usi64 : GCCBuiltin<"__builtin_ia32_cvttsd2usi64">,
               Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_avx512_cvtusi2sd : GCCBuiltin<"__builtin_ia32_cvtusi2sd">,
+  def int_x86_avx512_cvtusi2sd : GCCBuiltin<"__builtin_ia32_cvtusi2sd32">,
               Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
                          llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_cvtusi642sd : GCCBuiltin<"__builtin_ia32_cvtusi642sd">,
+  def int_x86_avx512_cvtusi642sd : GCCBuiltin<"__builtin_ia32_cvtusi2sd64">,
               Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                         llvm_i64_ty], [IntrNoMem]>;
+                         llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_cvtsi2ss32 : GCCBuiltin<"__builtin_ia32_cvtsi2ss32">,
+                Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                           llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_cvtsi2ss64 : GCCBuiltin<"__builtin_ia32_cvtsi2ss64">,
+                Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                           llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_cvtsi2sd32 : GCCBuiltin<"__builtin_ia32_cvtsi2sd32">,
+                Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                           llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_cvtsi2sd64 : GCCBuiltin<"__builtin_ia32_cvtsi2sd64">,
+                Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                           llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
 }
 
 // Pack ops.
@@ -3741,6 +3877,24 @@ let TargetPrefix = "x86" in {
   def int_x86_avx512_mask_pmull_q_512 : GCCBuiltin<"__builtin_ia32_pmullq512_mask">,
               Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
                      llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pavg_b_512 : GCCBuiltin<"__builtin_ia32_pavgb512_mask">,
+          Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty, 
+                    llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pavg_w_512 : GCCBuiltin<"__builtin_ia32_pavgw512_mask">,
+          Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,  
+                    llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pavg_b_128 : GCCBuiltin<"__builtin_ia32_pavgb128_mask">,
+          Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,  
+                    llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pavg_b_256 : GCCBuiltin<"__builtin_ia32_pavgb256_mask">,
+          Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,  
+                    llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pavg_w_128 : GCCBuiltin<"__builtin_ia32_pavgw128_mask">,
+          Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,  
+                    llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pavg_w_256 : GCCBuiltin<"__builtin_ia32_pavgw256_mask">,
+          Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, 
+                    llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
 }
 
 // Gather and Scatter ops
diff --git a/contrib/llvm/include/llvm/IR/LLVMContext.h b/contrib/llvm/include/llvm/IR/LLVMContext.h
index e6c2209..53c8b3a 100644
--- a/contrib/llvm/include/llvm/IR/LLVMContext.h
+++ b/contrib/llvm/include/llvm/IR/LLVMContext.h
@@ -209,6 +209,6 @@ inline LLVMContextRef *wrap(const LLVMContext **Tys) {
   return reinterpret_cast<LLVMContextRef*>(const_cast<LLVMContext**>(Tys));
 }
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/LegacyPassManager.h b/contrib/llvm/include/llvm/IR/LegacyPassManager.h
index 5257a0e..7c678fb 100644
--- a/contrib/llvm/include/llvm/IR/LegacyPassManager.h
+++ b/contrib/llvm/include/llvm/IR/LegacyPassManager.h
@@ -93,11 +93,11 @@ private:
   Module *M;
 };
 
-} // End legacy namespace
+} // namespace legacy
 
 // Create wrappers for C Binding types (see CBindingWrapping.h).
 DEFINE_STDCXX_CONVERSION_FUNCTIONS(legacy::PassManagerBase, LLVMPassManagerRef)
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/LegacyPassManagers.h b/contrib/llvm/include/llvm/IR/LegacyPassManagers.h
index 7f7889a..e2f1ab4 100644
--- a/contrib/llvm/include/llvm/IR/LegacyPassManagers.h
+++ b/contrib/llvm/include/llvm/IR/LegacyPassManagers.h
@@ -474,6 +474,6 @@ public:
 
 Timer *getPassTimer(Pass *);
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/LegacyPassNameParser.h b/contrib/llvm/include/llvm/IR/LegacyPassNameParser.h
index 39ae80d..3f98e76 100644
--- a/contrib/llvm/include/llvm/IR/LegacyPassNameParser.h
+++ b/contrib/llvm/include/llvm/IR/LegacyPassNameParser.h
@@ -134,6 +134,6 @@ public:
   }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/Mangler.h b/contrib/llvm/include/llvm/IR/Mangler.h
index 1e6b5b1..6bda319 100644
--- a/contrib/llvm/include/llvm/IR/Mangler.h
+++ b/contrib/llvm/include/llvm/IR/Mangler.h
@@ -64,6 +64,6 @@ public:
                          ManglerPrefixTy PrefixTy = Mangler::Default) const;
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/Metadata.h b/contrib/llvm/include/llvm/IR/Metadata.h
index 60718f5..bf4a030 100644
--- a/contrib/llvm/include/llvm/IR/Metadata.h
+++ b/contrib/llvm/include/llvm/IR/Metadata.h
@@ -1203,6 +1203,6 @@ public:
   }
 };
 
-} // end llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/Module.h b/contrib/llvm/include/llvm/IR/Module.h
index d8636df..598a58e 100644
--- a/contrib/llvm/include/llvm/IR/Module.h
+++ b/contrib/llvm/include/llvm/IR/Module.h
@@ -512,28 +512,28 @@ public:
   const GlobalListType   &getGlobalList() const       { return GlobalList; }
   /// Get the Module's list of global variables.
   GlobalListType         &getGlobalList()             { return GlobalList; }
-  static iplist<GlobalVariable> Module::*getSublistAccess(GlobalVariable*) {
+  static GlobalListType Module::*getSublistAccess(GlobalVariable*) {
     return &Module::GlobalList;
   }
   /// Get the Module's list of functions (constant).
   const FunctionListType &getFunctionList() const     { return FunctionList; }
   /// Get the Module's list of functions.
   FunctionListType       &getFunctionList()           { return FunctionList; }
-  static iplist<Function> Module::*getSublistAccess(Function*) {
+  static FunctionListType Module::*getSublistAccess(Function*) {
     return &Module::FunctionList;
   }
   /// Get the Module's list of aliases (constant).
   const AliasListType    &getAliasList() const        { return AliasList; }
   /// Get the Module's list of aliases.
   AliasListType          &getAliasList()              { return AliasList; }
-  static iplist<GlobalAlias> Module::*getSublistAccess(GlobalAlias*) {
+  static AliasListType Module::*getSublistAccess(GlobalAlias*) {
     return &Module::AliasList;
   }
   /// Get the Module's list of named metadata (constant).
   const NamedMDListType  &getNamedMDList() const      { return NamedMDList; }
   /// Get the Module's list of named metadata.
   NamedMDListType        &getNamedMDList()            { return NamedMDList; }
-  static ilist<NamedMDNode> Module::*getSublistAccess(NamedMDNode*) {
+  static NamedMDListType Module::*getSublistAccess(NamedMDNode*) {
     return &Module::NamedMDList;
   }
   /// Get the symbol table of global variable and function identifiers
@@ -694,6 +694,6 @@ inline Module *unwrap(LLVMModuleProviderRef MP) {
   return reinterpret_cast<Module*>(MP);
 }
   
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/NoFolder.h b/contrib/llvm/include/llvm/IR/NoFolder.h
index 61f4817..55b6798 100644
--- a/contrib/llvm/include/llvm/IR/NoFolder.h
+++ b/contrib/llvm/include/llvm/IR/NoFolder.h
@@ -294,6 +294,6 @@ public:
   }
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/OperandTraits.h b/contrib/llvm/include/llvm/IR/OperandTraits.h
index 0e4b195..91ec8d2 100644
--- a/contrib/llvm/include/llvm/IR/OperandTraits.h
+++ b/contrib/llvm/include/llvm/IR/OperandTraits.h
@@ -92,10 +92,10 @@ struct VariadicOperandTraits {
 template <unsigned MINARITY = 1>
 struct HungoffOperandTraits {
   static Use *op_begin(User* U) {
-    return U->OperandList;
+    return U->getOperandList();
   }
   static Use *op_end(User* U) {
-    return U->OperandList + U->getNumOperands();
+    return U->getOperandList() + U->getNumOperands();
   }
   static unsigned operands(const User *U) {
     return U->getNumOperands();
@@ -155,6 +155,6 @@ template <int Idx_nocapture> const Use &CLASS::Op() const { \
 }
 
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/Operator.h b/contrib/llvm/include/llvm/IR/Operator.h
index 1b9102e..82f516e 100644
--- a/contrib/llvm/include/llvm/IR/Operator.h
+++ b/contrib/llvm/include/llvm/IR/Operator.h
@@ -491,6 +491,6 @@ public:
   }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/PassManager.h b/contrib/llvm/include/llvm/IR/PassManager.h
index 4166bab..2ff1a6f 100644
--- a/contrib/llvm/include/llvm/IR/PassManager.h
+++ b/contrib/llvm/include/llvm/IR/PassManager.h
@@ -890,6 +890,6 @@ struct InvalidateAllAnalysesPass {
   static StringRef name() { return "InvalidateAllAnalysesPass"; }
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/PassManagerInternal.h b/contrib/llvm/include/llvm/IR/PassManagerInternal.h
index 92de10b..7921b4f 100644
--- a/contrib/llvm/include/llvm/IR/PassManagerInternal.h
+++ b/contrib/llvm/include/llvm/IR/PassManagerInternal.h
@@ -345,6 +345,6 @@ struct AnalysisPassModel<IRUnitT, PassT, false> : AnalysisPassConcept<IRUnitT> {
 };
 
 } // End namespace detail
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/Statepoint.h b/contrib/llvm/include/llvm/IR/Statepoint.h
index cd09618..8159cde 100644
--- a/contrib/llvm/include/llvm/IR/Statepoint.h
+++ b/contrib/llvm/include/llvm/IR/Statepoint.h
@@ -13,8 +13,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef __LLVM_IR_STATEPOINT_H
-#define __LLVM_IR_STATEPOINT_H
+#ifndef LLVM_IR_STATEPOINT_H
+#define LLVM_IR_STATEPOINT_H
 
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/IR/BasicBlock.h"
@@ -198,7 +198,7 @@ public:
   /// May contain several relocations for the same base/derived pair.
   /// For example this could happen due to relocations on unwinding
   /// path of invoke.
-  std::vector<GCRelocateOperands> getRelocates(ImmutableStatepoint &IS);
+  std::vector<GCRelocateOperands> getRelocates();
 
 #ifndef NDEBUG
   /// Asserts if this statepoint is malformed.  Common cases for failure
@@ -315,12 +315,11 @@ public:
 
 template <typename InstructionTy, typename ValueTy, typename CallSiteTy>
 std::vector<GCRelocateOperands>
-StatepointBase<InstructionTy, ValueTy, CallSiteTy>::getRelocates(
-    ImmutableStatepoint &IS) {
+StatepointBase<InstructionTy, ValueTy, CallSiteTy>::getRelocates() {
 
   std::vector<GCRelocateOperands> Result;
 
-  ImmutableCallSite StatepointCS = IS.getCallSite();
+  CallSiteTy StatepointCS = getCallSite();
 
   // Search for relocated pointers.  Note that working backwards from the
   // gc_relocates ensures that we only get pairs which are actually relocated
@@ -349,6 +348,6 @@ StatepointBase<InstructionTy, ValueTy, CallSiteTy>::getRelocates(
   }
   return Result;
 }
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/SymbolTableListTraits.h b/contrib/llvm/include/llvm/IR/SymbolTableListTraits.h
index 0a5149c..ef69498 100644
--- a/contrib/llvm/include/llvm/IR/SymbolTableListTraits.h
+++ b/contrib/llvm/include/llvm/IR/SymbolTableListTraits.h
@@ -73,6 +73,6 @@ public:
   static ValueSymbolTable *toPtr(ValueSymbolTable &R) { return &R; }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/Type.h b/contrib/llvm/include/llvm/IR/Type.h
index 6ab0bd0..a626046 100644
--- a/contrib/llvm/include/llvm/IR/Type.h
+++ b/contrib/llvm/include/llvm/IR/Type.h
@@ -484,6 +484,6 @@ inline LLVMTypeRef *wrap(Type **Tys) {
   return reinterpret_cast<LLVMTypeRef*>(const_cast<Type**>(Tys));
 }
   
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/TypeFinder.h b/contrib/llvm/include/llvm/IR/TypeFinder.h
index 73a63ad..aa50d0e 100644
--- a/contrib/llvm/include/llvm/IR/TypeFinder.h
+++ b/contrib/llvm/include/llvm/IR/TypeFinder.h
@@ -74,6 +74,6 @@ private:
   void incorporateMDNode(const MDNode *V);
 };
 
-} // end llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/Use.h b/contrib/llvm/include/llvm/IR/Use.h
index 160d71b..8f87df6 100644
--- a/contrib/llvm/include/llvm/IR/Use.h
+++ b/contrib/llvm/include/llvm/IR/Use.h
@@ -168,6 +168,6 @@ template <> struct simplify_type<const Use> {
 // Create wrappers for C Binding types (see CBindingWrapping.h).
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(Use, LLVMUseRef)
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/User.h b/contrib/llvm/include/llvm/IR/User.h
index 4559005..41d5770 100644
--- a/contrib/llvm/include/llvm/IR/User.h
+++ b/contrib/llvm/include/llvm/IR/User.h
@@ -22,6 +22,7 @@
 #include "llvm/ADT/iterator.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/IR/Value.h"
+#include "llvm/Support/AlignOf.h"
 #include "llvm/Support/ErrorHandling.h"
 
 namespace llvm {
@@ -34,33 +35,45 @@ struct OperandTraits;
 
 class User : public Value {
   User(const User &) = delete;
-  void *operator new(size_t) = delete;
   template <unsigned>
   friend struct HungoffOperandTraits;
   virtual void anchor();
+
 protected:
-  /// \brief This is a pointer to the array of Uses for this User.
+  /// Allocate a User with an operand pointer co-allocated.
+  ///
+  /// This is used for subclasses which need to allocate a variable number
+  /// of operands, ie, 'hung off uses'.
+  void *operator new(size_t Size);
+
+  /// Allocate a User with the operands co-allocated.
   ///
-  /// For nodes of fixed arity (e.g. a binary operator) this array will live
-  /// prefixed to some derived class instance.  For nodes of resizable variable
-  /// arity (e.g. PHINodes, SwitchInst etc.), this memory will be dynamically
-  /// allocated and should be destroyed by the classes' virtual dtor.
-  Use *OperandList;
+  /// This is used for subclasses which have a fixed number of operands.
+  void *operator new(size_t Size, unsigned Us);
 
-  void *operator new(size_t s, unsigned Us);
   User(Type *ty, unsigned vty, Use *OpList, unsigned NumOps)
-      : Value(ty, vty), OperandList(OpList) {
-    NumOperands = NumOps;
-  }
-  Use *allocHungoffUses(unsigned) const;
-  void dropHungoffUses() {
-    Use::zap(OperandList, OperandList + NumOperands, true);
-    OperandList = nullptr;
-    // Reset NumOperands so User::operator delete() does the right thing.
-    NumOperands = 0;
+      : Value(ty, vty) {
+    assert(NumOps < (1u << NumUserOperandsBits) && "Too many operands");
+    NumUserOperands = NumOps;
+    // If we have hung off uses, then the operand list should initially be
+    // null.
+    assert((!HasHungOffUses || !getOperandList()) &&
+           "Error in initializing hung off uses for User");
   }
+
+  /// \brief Allocate the array of Uses, followed by a pointer
+  /// (with bottom bit set) to the User.
+  /// \param IsPhi identifies callers which are phi nodes and which need
+  /// N BasicBlock* allocated along with N
+  void allocHungoffUses(unsigned N, bool IsPhi = false);
+
+  /// \brief Grow the number of hung off uses.  Note that allocHungoffUses
+  /// should be called if there are no uses.
+  void growHungoffUses(unsigned N, bool IsPhi = false);
+
 public:
-  ~User() override { Use::zap(OperandList, OperandList + NumOperands); }
+  ~User() override {
+  }
   /// \brief Free memory allocated for User and Use objects.
   void operator delete(void *Usr);
   /// \brief Placement delete - required by std, but never called.
@@ -83,28 +96,81 @@ protected:
   template <int Idx> const Use &Op() const {
     return OpFrom<Idx>(this);
   }
+private:
+  Use *&getHungOffOperands() { return *(reinterpret_cast<Use **>(this) - 1); }
+
+  Use *getIntrusiveOperands() {
+    return reinterpret_cast<Use *>(this) - NumUserOperands;
+  }
+
+  void setOperandList(Use *NewList) {
+    assert(HasHungOffUses &&
+           "Setting operand list only required for hung off uses");
+    getHungOffOperands() = NewList;
+  }
 public:
+  Use *getOperandList() {
+    return HasHungOffUses ? getHungOffOperands() : getIntrusiveOperands();
+  }
+  const Use *getOperandList() const {
+    return const_cast<User *>(this)->getOperandList();
+  }
   Value *getOperand(unsigned i) const {
-    assert(i < NumOperands && "getOperand() out of range!");
-    return OperandList[i];
+    assert(i < NumUserOperands && "getOperand() out of range!");
+    return getOperandList()[i];
   }
   void setOperand(unsigned i, Value *Val) {
-    assert(i < NumOperands && "setOperand() out of range!");
+    assert(i < NumUserOperands && "setOperand() out of range!");
     assert((!isa<Constant>((const Value*)this) ||
             isa<GlobalValue>((const Value*)this)) &&
            "Cannot mutate a constant with setOperand!");
-    OperandList[i] = Val;
+    getOperandList()[i] = Val;
   }
   const Use &getOperandUse(unsigned i) const {
-    assert(i < NumOperands && "getOperandUse() out of range!");
-    return OperandList[i];
+    assert(i < NumUserOperands && "getOperandUse() out of range!");
+    return getOperandList()[i];
   }
   Use &getOperandUse(unsigned i) {
-    assert(i < NumOperands && "getOperandUse() out of range!");
-    return OperandList[i];
+    assert(i < NumUserOperands && "getOperandUse() out of range!");
+    return getOperandList()[i];
   }
 
-  unsigned getNumOperands() const { return NumOperands; }
+  unsigned getNumOperands() const { return NumUserOperands; }
+
+  /// Set the number of operands on a GlobalVariable.
+  ///
+  /// GlobalVariable always allocates space for a single operands, but
+  /// doesn't always use it.
+  ///
+  /// FIXME: As that the number of operands is used to find the start of
+  /// the allocated memory in operator delete, we need to always think we have
+  /// 1 operand before delete.
+  void setGlobalVariableNumOperands(unsigned NumOps) {
+    assert(NumOps <= 1 && "GlobalVariable can only have 0 or 1 operands");
+    NumUserOperands = NumOps;
+  }
+
+  /// Set the number of operands on a Function.
+  ///
+  /// Function always allocates space for a single operands, but
+  /// doesn't always use it.
+  ///
+  /// FIXME: As that the number of operands is used to find the start of
+  /// the allocated memory in operator delete, we need to always think we have
+  /// 1 operand before delete.
+  void setFunctionNumOperands(unsigned NumOps) {
+    assert(NumOps <= 1 && "Function can only have 0 or 1 operands");
+    NumUserOperands = NumOps;
+  }
+
+  /// \brief Subclasses with hung off uses need to manage the operand count
+  /// themselves.  In these instances, the operand count isn't used to find the
+  /// OperandList, so there's no issue in having the operand count change.
+  void setNumHungOffUseOperands(unsigned NumOps) {
+    assert(HasHungOffUses && "Must have hung off uses to use this method");
+    assert(NumOps < (1u << NumUserOperandsBits) && "Too many operands");
+    NumUserOperands = NumOps;
+  }
 
   // ---------------------------------------------------------------------------
   // Operand Iterator interface...
@@ -114,14 +180,18 @@ public:
   typedef iterator_range<op_iterator> op_range;
   typedef iterator_range<const_op_iterator> const_op_range;
 
-  inline op_iterator       op_begin()       { return OperandList; }
-  inline const_op_iterator op_begin() const { return OperandList; }
-  inline op_iterator       op_end()         { return OperandList+NumOperands; }
-  inline const_op_iterator op_end()   const { return OperandList+NumOperands; }
-  inline op_range operands() {
+  op_iterator       op_begin()       { return getOperandList(); }
+  const_op_iterator op_begin() const { return getOperandList(); }
+  op_iterator       op_end()         {
+    return getOperandList() + NumUserOperands;
+  }
+  const_op_iterator op_end()   const {
+    return getOperandList() + NumUserOperands;
+  }
+  op_range operands() {
     return op_range(op_begin(), op_end());
   }
-  inline const_op_range operands() const {
+  const_op_range operands() const {
     return const_op_range(op_begin(), op_end());
   }
 
@@ -136,13 +206,13 @@ public:
     Value *operator->() const { return operator*(); }
   };
 
-  inline value_op_iterator value_op_begin() {
+  value_op_iterator value_op_begin() {
     return value_op_iterator(op_begin());
   }
-  inline value_op_iterator value_op_end() {
+  value_op_iterator value_op_end() {
     return value_op_iterator(op_end());
   }
-  inline iterator_range<value_op_iterator> operand_values() {
+  iterator_range<value_op_iterator> operand_values() {
     return iterator_range<value_op_iterator>(value_op_begin(), value_op_end());
   }
 
@@ -170,6 +240,11 @@ public:
     return isa<Instruction>(V) || isa<Constant>(V);
   }
 };
+// Either Use objects, or a Use pointer can be prepended to User.
+static_assert(AlignOf<Use>::Alignment >= AlignOf<User>::Alignment,
+              "Alignment is insufficient after objects prepended to User");
+static_assert(AlignOf<Use *>::Alignment >= AlignOf<User>::Alignment,
+              "Alignment is insufficient after objects prepended to User");
 
 template<> struct simplify_type<User::op_iterator> {
   typedef Value* SimpleType;
@@ -184,6 +259,6 @@ template<> struct simplify_type<User::const_op_iterator> {
   }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/Value.h b/contrib/llvm/include/llvm/IR/Value.h
index 19a1d6c..6b36ba6 100644
--- a/contrib/llvm/include/llvm/IR/Value.h
+++ b/contrib/llvm/include/llvm/IR/Value.h
@@ -100,10 +100,15 @@ protected:
   /// This is stored here to save space in User on 64-bit hosts.  Since most
   /// instances of Value have operands, 32-bit hosts aren't significantly
   /// affected.
-  unsigned NumOperands : 30;
+  ///
+  /// Note, this should *NOT* be used directly by any class other than User.
+  /// User uses this value to find the Use list.
+  static const unsigned NumUserOperandsBits = 29;
+  unsigned NumUserOperands : 29;
 
   bool IsUsedByMD : 1;
   bool HasName : 1;
+  bool HasHungOffUses : 1;
 
 private:
   template <typename UseT> // UseT == 'Use' or 'const Use'
@@ -711,6 +716,6 @@ inline LLVMValueRef *wrap(const Value **Vals) {
   return reinterpret_cast<LLVMValueRef*>(const_cast<Value**>(Vals));
 }
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/ValueHandle.h b/contrib/llvm/include/llvm/IR/ValueHandle.h
index 355748e..e92aed3 100644
--- a/contrib/llvm/include/llvm/IR/ValueHandle.h
+++ b/contrib/llvm/include/llvm/IR/ValueHandle.h
@@ -380,6 +380,6 @@ public:
   virtual void allUsesReplacedWith(Value *) {}
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/ValueSymbolTable.h b/contrib/llvm/include/llvm/IR/ValueSymbolTable.h
index bf1fade..8219f50 100644
--- a/contrib/llvm/include/llvm/IR/ValueSymbolTable.h
+++ b/contrib/llvm/include/llvm/IR/ValueSymbolTable.h
@@ -128,6 +128,6 @@ private:
 /// @}
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/Verifier.h b/contrib/llvm/include/llvm/IR/Verifier.h
index 89039d2..7da4d97 100644
--- a/contrib/llvm/include/llvm/IR/Verifier.h
+++ b/contrib/llvm/include/llvm/IR/Verifier.h
@@ -72,6 +72,6 @@ public:
   static StringRef name() { return "VerifierPass"; }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/IRReader/IRReader.h b/contrib/llvm/include/llvm/IRReader/IRReader.h
index 2d9ace0..bdaea6d 100644
--- a/contrib/llvm/include/llvm/IRReader/IRReader.h
+++ b/contrib/llvm/include/llvm/IRReader/IRReader.h
@@ -43,6 +43,6 @@ std::unique_ptr<Module> parseIR(MemoryBufferRef Buffer, SMDiagnostic &Err,
 /// for it.
 std::unique_ptr<Module> parseIRFile(StringRef Filename, SMDiagnostic &Err,
                                     LLVMContext &Context);
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/InitializePasses.h b/contrib/llvm/include/llvm/InitializePasses.h
index 4f95c88..33ffadb 100644
--- a/contrib/llvm/include/llvm/InitializePasses.h
+++ b/contrib/llvm/include/llvm/InitializePasses.h
@@ -187,6 +187,7 @@ void initializeMachineBlockPlacementPass(PassRegistry&);
 void initializeMachineBlockPlacementStatsPass(PassRegistry&);
 void initializeMachineBranchProbabilityInfoPass(PassRegistry&);
 void initializeMachineCSEPass(PassRegistry&);
+void initializeImplicitNullChecksPass(PassRegistry&);
 void initializeMachineDominatorTreePass(PassRegistry&);
 void initializeMachineDominanceFrontierPass(PassRegistry&);
 void initializeMachinePostDominatorTreePass(PassRegistry&);
@@ -241,6 +242,7 @@ void initializeRegionOnlyViewerPass(PassRegistry&);
 void initializeRegionPrinterPass(PassRegistry&);
 void initializeRegionViewerPass(PassRegistry&);
 void initializeRewriteStatepointsForGCPass(PassRegistry&);
+void initializeSafeStackPass(PassRegistry&);
 void initializeSCCPPass(PassRegistry&);
 void initializeSROAPass(PassRegistry&);
 void initializeSROA_DTPass(PassRegistry&);
@@ -300,6 +302,6 @@ void initializePlaceSafepointsPass(PassRegistry&);
 void initializeDwarfEHPreparePass(PassRegistry&);
 void initializeFloat2IntPass(PassRegistry&);
 void initializeLoopDistributePass(PassRegistry&);
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/LTO/LTOCodeGenerator.h b/contrib/llvm/include/llvm/LTO/LTOCodeGenerator.h
index 0c46fc0..c079f79 100644
--- a/contrib/llvm/include/llvm/LTO/LTOCodeGenerator.h
+++ b/contrib/llvm/include/llvm/LTO/LTOCodeGenerator.h
@@ -177,5 +177,5 @@ private:
   bool ShouldInternalize = true;
   bool ShouldEmbedUselists = false;
 };
-}
+} // namespace llvm
 #endif
diff --git a/contrib/llvm/include/llvm/LTO/LTOModule.h b/contrib/llvm/include/llvm/LTO/LTOModule.h
index 53c2b8e..c2eb362 100644
--- a/contrib/llvm/include/llvm/LTO/LTOModule.h
+++ b/contrib/llvm/include/llvm/LTO/LTOModule.h
@@ -143,6 +143,12 @@ public:
     return nullptr;
   }
 
+  const GlobalValue *getSymbolGV(uint32_t index) {
+    if (index < _symbols.size())
+      return _symbols[index].symbol;
+    return nullptr;
+  }
+
   /// Get the number of dependent libraries
   uint32_t getDependentLibraryCount() {
     return _deplibs.size();
@@ -218,5 +224,5 @@ private:
   static LTOModule *makeLTOModule(MemoryBufferRef Buffer, TargetOptions options,
                                   std::string &errMsg, LLVMContext *Context);
 };
-}
+} // namespace llvm
 #endif
diff --git a/contrib/llvm/include/llvm/LibDriver/LibDriver.h b/contrib/llvm/include/llvm/LibDriver/LibDriver.h
new file mode 100644
index 0000000..99c783c
--- /dev/null
+++ b/contrib/llvm/include/llvm/LibDriver/LibDriver.h
@@ -0,0 +1,24 @@
+//===- llvm/LibDriver/LibDriver.h - lib.exe-compatible driver ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines an interface to a lib.exe-compatible driver that also understands
+// bitcode files. Used by llvm-lib and lld-link2 /lib.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBDRIVER_LIBDRIVER_H
+#define LLVM_LIBDRIVER_LIBDRIVER_H
+
+namespace llvm {
+
+int libDriverMain(int argc, const char **argv);
+
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/LineEditor/LineEditor.h b/contrib/llvm/include/llvm/LineEditor/LineEditor.h
index bb106f8..e644b19 100644
--- a/contrib/llvm/include/llvm/LineEditor/LineEditor.h
+++ b/contrib/llvm/include/llvm/LineEditor/LineEditor.h
@@ -148,6 +148,6 @@ private:
   std::unique_ptr<const CompleterConcept> Completer;
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/LinkAllPasses.h b/contrib/llvm/include/llvm/LinkAllPasses.h
index bf21678..8ac1b21 100644
--- a/contrib/llvm/include/llvm/LinkAllPasses.h
+++ b/contrib/llvm/include/llvm/LinkAllPasses.h
@@ -98,7 +98,7 @@ namespace {
       (void) llvm::createLICMPass();
       (void) llvm::createLazyValueInfoPass();
       (void) llvm::createLoopExtractorPass();
-      (void)llvm::createLoopInterchangePass();
+      (void) llvm::createLoopInterchangePass();
       (void) llvm::createLoopSimplifyPass();
       (void) llvm::createLoopStrengthReducePass();
       (void) llvm::createLoopRerollPass();
@@ -131,6 +131,7 @@ namespace {
       (void) llvm::createRegionPrinterPass();
       (void) llvm::createRegionViewerPass();
       (void) llvm::createSCCPPass();
+      (void) llvm::createSafeStackPass();
       (void) llvm::createScalarReplAggregatesPass();
       (void) llvm::createSingleLoopExtractorPass();
       (void) llvm::createStripSymbolsPass();
diff --git a/contrib/llvm/include/llvm/Linker/Linker.h b/contrib/llvm/include/llvm/Linker/Linker.h
index c43b90e..de23acb 100644
--- a/contrib/llvm/include/llvm/Linker/Linker.h
+++ b/contrib/llvm/include/llvm/Linker/Linker.h
@@ -90,6 +90,6 @@ private:
   DiagnosticHandlerFunction DiagnosticHandler;
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/MC/MCAsmBackend.h b/contrib/llvm/include/llvm/MC/MCAsmBackend.h
index 2bfad2d..07bba90 100644
--- a/contrib/llvm/include/llvm/MC/MCAsmBackend.h
+++ b/contrib/llvm/include/llvm/MC/MCAsmBackend.h
@@ -138,6 +138,6 @@ public:
   }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/MC/MCAsmInfo.h b/contrib/llvm/include/llvm/MC/MCAsmInfo.h
index 9bb0fa6..f72959a5 100644
--- a/contrib/llvm/include/llvm/MC/MCAsmInfo.h
+++ b/contrib/llvm/include/llvm/MC/MCAsmInfo.h
@@ -39,7 +39,7 @@ enum class EncodingType {
   X86,     /// Windows x86, uses no CFI, just EH tables
   MIPS = Alpha,
 };
-}
+} // namespace WinEH
 
 enum class ExceptionHandling {
   None,     /// No exception support
@@ -555,6 +555,6 @@ public:
 
   bool shouldUseLogicalShr() const { return UseLogicalShr; }
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/MC/MCAsmInfoCOFF.h b/contrib/llvm/include/llvm/MC/MCAsmInfoCOFF.h
index 56444f3..24f03e4 100644
--- a/contrib/llvm/include/llvm/MC/MCAsmInfoCOFF.h
+++ b/contrib/llvm/include/llvm/MC/MCAsmInfoCOFF.h
@@ -30,7 +30,7 @@ namespace llvm {
   protected:
     explicit MCAsmInfoGNUCOFF();
   };
-}
+} // namespace llvm
 
 
 #endif // LLVM_MC_MCASMINFOCOFF_H
diff --git a/contrib/llvm/include/llvm/MC/MCAssembler.h b/contrib/llvm/include/llvm/MC/MCAssembler.h
index a6178c2..0642af8 100644
--- a/contrib/llvm/include/llvm/MC/MCAssembler.h
+++ b/contrib/llvm/include/llvm/MC/MCAssembler.h
@@ -49,7 +49,7 @@ class MCFragment : public ilist_node<MCFragment> {
   void operator=(const MCFragment &) = delete;
 
 public:
-  enum FragmentType {
+  enum FragmentType : uint8_t {
     FT_Align,
     FT_Data,
     FT_CompactEncodedInst,
@@ -65,6 +65,18 @@ public:
 private:
   FragmentType Kind;
 
+protected:
+  bool HasInstructions;
+
+private:
+  /// \brief Should this fragment be aligned to the end of a bundle?
+  bool AlignToBundleEnd;
+
+  uint8_t BundlePadding;
+
+  /// LayoutOrder - The layout order of this fragment.
+  unsigned LayoutOrder;
+
   /// The data for the section this fragment is in.
   MCSection *Parent;
 
@@ -81,18 +93,25 @@ private:
   /// initialized.
   uint64_t Offset;
 
-  /// LayoutOrder - The layout order of this fragment.
-  unsigned LayoutOrder;
-
   /// @}
 
 protected:
-  MCFragment(FragmentType Kind, MCSection *Parent = nullptr);
+  MCFragment(FragmentType Kind, bool HasInstructions,
+             uint8_t BundlePadding, MCSection *Parent = nullptr);
 
-public:
-  // Only for sentinel.
+  ~MCFragment();
+private:
+
+  // This is a friend so that the sentinal can be created.
+  friend struct ilist_sentinel_traits<MCFragment>;
   MCFragment();
-  virtual ~MCFragment();
+
+public:
+  /// Destroys the current fragment.
+  ///
+  /// This must be used instead of delete as MCFragment is non-virtual.
+  /// This method will dispatch to the appropriate subclass.
+  void destroy();
 
   FragmentType getKind() const { return Kind; }
 
@@ -107,22 +126,22 @@ public:
 
   /// \brief Does this fragment have instructions emitted into it? By default
   /// this is false, but specific fragment types may set it to true.
-  virtual bool hasInstructions() const { return false; }
+  bool hasInstructions() const { return HasInstructions; }
 
   /// \brief Should this fragment be placed at the end of an aligned bundle?
-  virtual bool alignToBundleEnd() const { return false; }
-  virtual void setAlignToBundleEnd(bool V) {}
+  bool alignToBundleEnd() const { return AlignToBundleEnd; }
+  void setAlignToBundleEnd(bool V) { AlignToBundleEnd = V; }
 
   /// \brief Get the padding size that must be inserted before this fragment.
   /// Used for bundling. By default, no padding is inserted.
   /// Note that padding size is restricted to 8 bits. This is an optimization
   /// to reduce the amount of space used for each fragment. In practice, larger
   /// padding should never be required.
-  virtual uint8_t getBundlePadding() const { return 0; }
+  uint8_t getBundlePadding() const { return BundlePadding; }
 
   /// \brief Set the padding size for this fragment. By default it's a no-op,
   /// and only some fragments have a meaningful implementation.
-  virtual void setBundlePadding(uint8_t N) {}
+  void setBundlePadding(uint8_t N) { BundlePadding = N; }
 
   void dump();
 };
@@ -131,22 +150,12 @@ public:
 /// data.
 ///
 class MCEncodedFragment : public MCFragment {
-  virtual void anchor();
-
-  uint8_t BundlePadding;
+protected:
+  MCEncodedFragment(MCFragment::FragmentType FType, bool HasInstructions,
+                    MCSection *Sec)
+      : MCFragment(FType, HasInstructions, 0, Sec) {}
 
 public:
-  MCEncodedFragment(MCFragment::FragmentType FType, MCSection *Sec = nullptr)
-      : MCFragment(FType, Sec), BundlePadding(0) {}
-  ~MCEncodedFragment() override;
-
-  virtual SmallVectorImpl<char> &getContents() = 0;
-  virtual const SmallVectorImpl<char> &getContents() const = 0;
-
-  uint8_t getBundlePadding() const override { return BundlePadding; }
-
-  void setBundlePadding(uint8_t N) override { BundlePadding = N; }
-
   static bool classof(const MCFragment *F) {
     MCFragment::FragmentType Kind = F->getKind();
     switch (Kind) {
@@ -161,28 +170,52 @@ public:
 };
 
 /// Interface implemented by fragments that contain encoded instructions and/or
-/// data and also have fixups registered.
+/// data.
 ///
-class MCEncodedFragmentWithFixups : public MCEncodedFragment {
-  void anchor() override;
+template<unsigned ContentsSize>
+class MCEncodedFragmentWithContents : public MCEncodedFragment {
+  SmallVector<char, ContentsSize> Contents;
+
+protected:
+  MCEncodedFragmentWithContents(MCFragment::FragmentType FType,
+                                bool HasInstructions,
+                                MCSection *Sec)
+      : MCEncodedFragment(FType, HasInstructions, Sec) {}
 
 public:
-  MCEncodedFragmentWithFixups(MCFragment::FragmentType FType,
-                              MCSection *Sec = nullptr)
-      : MCEncodedFragment(FType, Sec) {}
+  SmallVectorImpl<char> &getContents() { return Contents; }
+  const SmallVectorImpl<char> &getContents() const { return Contents; }
+};
 
-  ~MCEncodedFragmentWithFixups() override;
+/// Interface implemented by fragments that contain encoded instructions and/or
+/// data and also have fixups registered.
+///
+template<unsigned ContentsSize, unsigned FixupsSize>
+class MCEncodedFragmentWithFixups :
+  public MCEncodedFragmentWithContents<ContentsSize> {
 
+  /// Fixups - The list of fixups in this fragment.
+  SmallVector<MCFixup, FixupsSize> Fixups;
+
+protected:
+  MCEncodedFragmentWithFixups(MCFragment::FragmentType FType,
+                              bool HasInstructions,
+                              MCSection *Sec)
+      : MCEncodedFragmentWithContents<ContentsSize>(FType, HasInstructions,
+                                                    Sec) {}
+
+public:
   typedef SmallVectorImpl<MCFixup>::const_iterator const_fixup_iterator;
   typedef SmallVectorImpl<MCFixup>::iterator fixup_iterator;
 
-  virtual SmallVectorImpl<MCFixup> &getFixups() = 0;
-  virtual const SmallVectorImpl<MCFixup> &getFixups() const = 0;
+  SmallVectorImpl<MCFixup> &getFixups() { return Fixups; }
+  const SmallVectorImpl<MCFixup> &getFixups() const { return Fixups; }
 
-  virtual fixup_iterator fixup_begin() = 0;
-  virtual const_fixup_iterator fixup_begin() const = 0;
-  virtual fixup_iterator fixup_end() = 0;
-  virtual const_fixup_iterator fixup_end() const = 0;
+  fixup_iterator fixup_begin() { return Fixups.begin(); }
+  const_fixup_iterator fixup_begin() const { return Fixups.begin(); }
+
+  fixup_iterator fixup_end() { return Fixups.end(); }
+  const_fixup_iterator fixup_end() const { return Fixups.end(); }
 
   static bool classof(const MCFragment *F) {
     MCFragment::FragmentType Kind = F->getKind();
@@ -192,43 +225,12 @@ public:
 
 /// Fragment for data and encoded instructions.
 ///
-class MCDataFragment : public MCEncodedFragmentWithFixups {
-  void anchor() override;
-
-  /// \brief Does this fragment contain encoded instructions anywhere in it?
-  bool HasInstructions;
-
-  /// \brief Should this fragment be aligned to the end of a bundle?
-  bool AlignToBundleEnd;
-
-  SmallVector<char, 32> Contents;
-
-  /// Fixups - The list of fixups in this fragment.
-  SmallVector<MCFixup, 4> Fixups;
-
+class MCDataFragment : public MCEncodedFragmentWithFixups<32, 4> {
 public:
   MCDataFragment(MCSection *Sec = nullptr)
-      : MCEncodedFragmentWithFixups(FT_Data, Sec), HasInstructions(false),
-        AlignToBundleEnd(false) {}
-
-  SmallVectorImpl<char> &getContents() override { return Contents; }
-  const SmallVectorImpl<char> &getContents() const override { return Contents; }
-
-  SmallVectorImpl<MCFixup> &getFixups() override { return Fixups; }
-
-  const SmallVectorImpl<MCFixup> &getFixups() const override { return Fixups; }
+      : MCEncodedFragmentWithFixups<32, 4>(FT_Data, false, Sec) {}
 
-  bool hasInstructions() const override { return HasInstructions; }
-  virtual void setHasInstructions(bool V) { HasInstructions = V; }
-
-  bool alignToBundleEnd() const override { return AlignToBundleEnd; }
-  void setAlignToBundleEnd(bool V) override { AlignToBundleEnd = V; }
-
-  fixup_iterator fixup_begin() override { return Fixups.begin(); }
-  const_fixup_iterator fixup_begin() const override { return Fixups.begin(); }
-
-  fixup_iterator fixup_end() override { return Fixups.end(); }
-  const_fixup_iterator fixup_end() const override { return Fixups.end(); }
+  void setHasInstructions(bool V) { HasInstructions = V; }
 
   static bool classof(const MCFragment *F) {
     return F->getKind() == MCFragment::FT_Data;
@@ -240,27 +242,12 @@ public:
 /// it can be used instead of MCDataFragment and lead to lower memory
 /// consumption.
 ///
-class MCCompactEncodedInstFragment : public MCEncodedFragment {
-  void anchor() override;
-
-  /// \brief Should this fragment be aligned to the end of a bundle?
-  bool AlignToBundleEnd;
-
-  SmallVector<char, 4> Contents;
-
+class MCCompactEncodedInstFragment : public MCEncodedFragmentWithContents<4> {
 public:
   MCCompactEncodedInstFragment(MCSection *Sec = nullptr)
-      : MCEncodedFragment(FT_CompactEncodedInst, Sec), AlignToBundleEnd(false) {
+      : MCEncodedFragmentWithContents(FT_CompactEncodedInst, true, Sec) {
   }
 
-  bool hasInstructions() const override { return true; }
-
-  SmallVectorImpl<char> &getContents() override { return Contents; }
-  const SmallVectorImpl<char> &getContents() const override { return Contents; }
-
-  bool alignToBundleEnd() const override { return AlignToBundleEnd; }
-  void setAlignToBundleEnd(bool V) override { AlignToBundleEnd = V; }
-
   static bool classof(const MCFragment *F) {
     return F->getKind() == MCFragment::FT_CompactEncodedInst;
   }
@@ -269,8 +256,7 @@ public:
 /// A relaxable fragment holds on to its MCInst, since it may need to be
 /// relaxed during the assembler layout and relaxation stage.
 ///
-class MCRelaxableFragment : public MCEncodedFragmentWithFixups {
-  void anchor() override;
+class MCRelaxableFragment : public MCEncodedFragmentWithFixups<8, 1> {
 
   /// Inst - The instruction this is a fragment for.
   MCInst Inst;
@@ -280,48 +266,32 @@ class MCRelaxableFragment : public MCEncodedFragmentWithFixups {
   /// in the assembler are not seen here.
   const MCSubtargetInfo STI;
 
-  /// Contents - Binary data for the currently encoded instruction.
-  SmallVector<char, 8> Contents;
-
-  /// Fixups - The list of fixups in this fragment.
-  SmallVector<MCFixup, 1> Fixups;
-
 public:
   MCRelaxableFragment(const MCInst &Inst, const MCSubtargetInfo &STI,
                       MCSection *Sec = nullptr)
-      : MCEncodedFragmentWithFixups(FT_Relaxable, Sec), Inst(Inst), STI(STI) {}
-
-  SmallVectorImpl<char> &getContents() override { return Contents; }
-  const SmallVectorImpl<char> &getContents() const override { return Contents; }
+      : MCEncodedFragmentWithFixups(FT_Relaxable, true, Sec),
+        Inst(Inst), STI(STI) {}
 
   const MCInst &getInst() const { return Inst; }
   void setInst(const MCInst &Value) { Inst = Value; }
 
   const MCSubtargetInfo &getSubtargetInfo() { return STI; }
 
-  SmallVectorImpl<MCFixup> &getFixups() override { return Fixups; }
-
-  const SmallVectorImpl<MCFixup> &getFixups() const override { return Fixups; }
-
-  bool hasInstructions() const override { return true; }
-
-  fixup_iterator fixup_begin() override { return Fixups.begin(); }
-  const_fixup_iterator fixup_begin() const override { return Fixups.begin(); }
-
-  fixup_iterator fixup_end() override { return Fixups.end(); }
-  const_fixup_iterator fixup_end() const override { return Fixups.end(); }
-
   static bool classof(const MCFragment *F) {
     return F->getKind() == MCFragment::FT_Relaxable;
   }
 };
 
 class MCAlignFragment : public MCFragment {
-  virtual void anchor();
 
   /// Alignment - The alignment to ensure, in bytes.
   unsigned Alignment;
 
+  /// EmitNops - Flag to indicate that (optimal) NOPs should be emitted instead
+  /// of using the provided value. The exact interpretation of this flag is
+  /// target dependent.
+  bool EmitNops : 1;
+
   /// Value - Value to use for filling padding bytes.
   int64_t Value;
 
@@ -332,16 +302,12 @@ class MCAlignFragment : public MCFragment {
   /// cannot be satisfied in this width then this fragment is ignored.
   unsigned MaxBytesToEmit;
 
-  /// EmitNops - Flag to indicate that (optimal) NOPs should be emitted instead
-  /// of using the provided value. The exact interpretation of this flag is
-  /// target dependent.
-  bool EmitNops : 1;
-
 public:
   MCAlignFragment(unsigned Alignment, int64_t Value, unsigned ValueSize,
                   unsigned MaxBytesToEmit, MCSection *Sec = nullptr)
-      : MCFragment(FT_Align, Sec), Alignment(Alignment), Value(Value),
-        ValueSize(ValueSize), MaxBytesToEmit(MaxBytesToEmit), EmitNops(false) {}
+      : MCFragment(FT_Align, false, 0, Sec), Alignment(Alignment),
+        EmitNops(false), Value(Value),
+        ValueSize(ValueSize), MaxBytesToEmit(MaxBytesToEmit) {}
 
   /// \name Accessors
   /// @{
@@ -365,7 +331,6 @@ public:
 };
 
 class MCFillFragment : public MCFragment {
-  virtual void anchor();
 
   /// Value - Value to use for filling bytes.
   int64_t Value;
@@ -380,7 +345,7 @@ class MCFillFragment : public MCFragment {
 public:
   MCFillFragment(int64_t Value, unsigned ValueSize, uint64_t Size,
                  MCSection *Sec = nullptr)
-      : MCFragment(FT_Fill, Sec), Value(Value), ValueSize(ValueSize),
+      : MCFragment(FT_Fill, false, 0, Sec), Value(Value), ValueSize(ValueSize),
         Size(Size) {
     assert((!ValueSize || (Size % ValueSize) == 0) &&
            "Fill size must be a multiple of the value size!");
@@ -403,7 +368,6 @@ public:
 };
 
 class MCOrgFragment : public MCFragment {
-  virtual void anchor();
 
   /// Offset - The offset this fragment should start at.
   const MCExpr *Offset;
@@ -413,7 +377,7 @@ class MCOrgFragment : public MCFragment {
 
 public:
   MCOrgFragment(const MCExpr &Offset, int8_t Value, MCSection *Sec = nullptr)
-      : MCFragment(FT_Org, Sec), Offset(&Offset), Value(Value) {}
+      : MCFragment(FT_Org, false, 0, Sec), Offset(&Offset), Value(Value) {}
 
   /// \name Accessors
   /// @{
@@ -430,7 +394,6 @@ public:
 };
 
 class MCLEBFragment : public MCFragment {
-  virtual void anchor();
 
   /// Value - The value this fragment should contain.
   const MCExpr *Value;
@@ -442,7 +405,7 @@ class MCLEBFragment : public MCFragment {
 
 public:
   MCLEBFragment(const MCExpr &Value_, bool IsSigned_, MCSection *Sec = nullptr)
-      : MCFragment(FT_LEB, Sec), Value(&Value_), IsSigned(IsSigned_) {
+      : MCFragment(FT_LEB, false, 0, Sec), Value(&Value_), IsSigned(IsSigned_) {
     Contents.push_back(0);
   }
 
@@ -464,7 +427,6 @@ public:
 };
 
 class MCDwarfLineAddrFragment : public MCFragment {
-  virtual void anchor();
 
   /// LineDelta - the value of the difference between the two line numbers
   /// between two .loc dwarf directives.
@@ -479,7 +441,8 @@ class MCDwarfLineAddrFragment : public MCFragment {
 public:
   MCDwarfLineAddrFragment(int64_t LineDelta, const MCExpr &AddrDelta,
                           MCSection *Sec = nullptr)
-      : MCFragment(FT_Dwarf, Sec), LineDelta(LineDelta), AddrDelta(&AddrDelta) {
+      : MCFragment(FT_Dwarf, false, 0, Sec), LineDelta(LineDelta),
+        AddrDelta(&AddrDelta) {
     Contents.push_back(0);
   }
 
@@ -501,7 +464,6 @@ public:
 };
 
 class MCDwarfCallFrameFragment : public MCFragment {
-  virtual void anchor();
 
   /// AddrDelta - The expression for the difference of the two symbols that
   /// make up the address delta between two .cfi_* dwarf directives.
@@ -511,7 +473,7 @@ class MCDwarfCallFrameFragment : public MCFragment {
 
 public:
   MCDwarfCallFrameFragment(const MCExpr &AddrDelta, MCSection *Sec = nullptr)
-      : MCFragment(FT_DwarfFrame, Sec), AddrDelta(&AddrDelta) {
+      : MCFragment(FT_DwarfFrame, false, 0, Sec), AddrDelta(&AddrDelta) {
     Contents.push_back(0);
   }
 
@@ -531,13 +493,11 @@ public:
 };
 
 class MCSafeSEHFragment : public MCFragment {
-  virtual void anchor();
-
   const MCSymbol *Sym;
 
 public:
   MCSafeSEHFragment(const MCSymbol *Sym, MCSection *Sec = nullptr)
-      : MCFragment(FT_SafeSEH, Sec), Sym(Sym) {}
+      : MCFragment(FT_SafeSEH, false, 0, Sec), Sym(Sym) {}
 
   /// \name Accessors
   /// @{
@@ -621,8 +581,6 @@ private:
 
   SymbolDataListType Symbols;
 
-  DenseSet<const MCSymbol *> LocalsUsedInReloc;
-
   std::vector<IndirectSymbolData> IndirectSymbols;
 
   std::vector<DataRegionData> DataRegions;
@@ -713,9 +671,6 @@ private:
                                         MCFragment &F, const MCFixup &Fixup);
 
 public:
-  void addLocalUsedInReloc(const MCSymbol &Sym);
-  bool isLocalUsedInReloc(const MCSymbol &Sym) const;
-
   /// Compute the effective fragment size assuming it is laid out at the given
   /// \p SectionAddress and \p FragmentOffset.
   uint64_t computeFragmentSize(const MCAsmLayout &Layout,
diff --git a/contrib/llvm/include/llvm/MC/MCCodeEmitter.h b/contrib/llvm/include/llvm/MC/MCCodeEmitter.h
index b6c1915..b4445d1 100644
--- a/contrib/llvm/include/llvm/MC/MCCodeEmitter.h
+++ b/contrib/llvm/include/llvm/MC/MCCodeEmitter.h
@@ -41,6 +41,6 @@ public:
                                  const MCSubtargetInfo &STI) const = 0;
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/MC/MCContext.h b/contrib/llvm/include/llvm/MC/MCContext.h
index 1790905..52017fd 100644
--- a/contrib/llvm/include/llvm/MC/MCContext.h
+++ b/contrib/llvm/include/llvm/MC/MCContext.h
@@ -207,7 +207,7 @@ namespace llvm {
     bool AutoReset;
 
     MCSymbol *createSymbolImpl(const StringMapEntry<bool> *Name,
-                               bool IsTemporary);
+                               bool CanBeUnnamed);
     MCSymbol *createSymbol(StringRef Name, bool AlwaysAddSuffix,
                            bool IsTemporary);
 
@@ -249,9 +249,10 @@ namespace llvm {
 
     /// Create and return a new assembler temporary symbol with a unique but
     /// unspecified name.
-    MCSymbol *createTempSymbol();
+    MCSymbol *createTempSymbol(bool CanBeUnnamed = true);
 
-    MCSymbol *createTempSymbol(const Twine &Name, bool AlwaysAddSuffix);
+    MCSymbol *createTempSymbol(const Twine &Name, bool AlwaysAddSuffix,
+                               bool CanBeUnnamed = true);
 
     /// Create the definition of a directional local symbol for numbered label
     /// (used for "1:" definitions).
diff --git a/contrib/llvm/include/llvm/MC/MCELFObjectWriter.h b/contrib/llvm/include/llvm/MC/MCELFObjectWriter.h
index 01f694d..855013a 100644
--- a/contrib/llvm/include/llvm/MC/MCELFObjectWriter.h
+++ b/contrib/llvm/include/llvm/MC/MCELFObjectWriter.h
@@ -132,6 +132,6 @@ public:
 MCObjectWriter *createELFObjectWriter(MCELFObjectTargetWriter *MOTW,
                                       raw_pwrite_stream &OS,
                                       bool IsLittleEndian);
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/MC/MCELFStreamer.h b/contrib/llvm/include/llvm/MC/MCELFStreamer.h
index 241db0dc..a5b257f 100644
--- a/contrib/llvm/include/llvm/MC/MCELFStreamer.h
+++ b/contrib/llvm/include/llvm/MC/MCELFStreamer.h
@@ -93,7 +93,7 @@ private:
   void fixSymbolsInTLSFixups(const MCExpr *expr);
 
   /// \brief Merge the content of the fragment \p EF into the fragment \p DF.
-  void mergeFragment(MCDataFragment *, MCEncodedFragmentWithFixups *);
+  void mergeFragment(MCDataFragment *, MCDataFragment *);
 
   bool SeenIdent;
 
diff --git a/contrib/llvm/include/llvm/MC/MCExternalSymbolizer.h b/contrib/llvm/include/llvm/MC/MCExternalSymbolizer.h
index 2c7d237..a88b32e 100644
--- a/contrib/llvm/include/llvm/MC/MCExternalSymbolizer.h
+++ b/contrib/llvm/include/llvm/MC/MCExternalSymbolizer.h
@@ -53,6 +53,6 @@ public:
                                        uint64_t Address) override;
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/MC/MCFixedLenDisassembler.h b/contrib/llvm/include/llvm/MC/MCFixedLenDisassembler.h
index ad99943..9fbdf9c 100644
--- a/contrib/llvm/include/llvm/MC/MCFixedLenDisassembler.h
+++ b/contrib/llvm/include/llvm/MC/MCFixedLenDisassembler.h
@@ -26,7 +26,7 @@ enum DecoderOps {
   OPC_Fail              // OPC_Fail()
 };
 
-} // namespace MCDecode
+} // namespace MCD
 } // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/MC/MCFixup.h b/contrib/llvm/include/llvm/MC/MCFixup.h
index 8ab477c..c09f55a 100644
--- a/contrib/llvm/include/llvm/MC/MCFixup.h
+++ b/contrib/llvm/include/llvm/MC/MCFixup.h
@@ -108,6 +108,6 @@ public:
   SMLoc getLoc() const { return Loc; }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/MC/MCFixupKindInfo.h b/contrib/llvm/include/llvm/MC/MCFixupKindInfo.h
index 58183bd..b779781 100644
--- a/contrib/llvm/include/llvm/MC/MCFixupKindInfo.h
+++ b/contrib/llvm/include/llvm/MC/MCFixupKindInfo.h
@@ -38,6 +38,6 @@ struct MCFixupKindInfo {
   unsigned Flags;
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/MC/MCInstrAnalysis.h b/contrib/llvm/include/llvm/MC/MCInstrAnalysis.h
index 8f5159e..a0a6810 100644
--- a/contrib/llvm/include/llvm/MC/MCInstrAnalysis.h
+++ b/contrib/llvm/include/llvm/MC/MCInstrAnalysis.h
@@ -66,6 +66,6 @@ public:
                  uint64_t &Target) const;
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/MC/MCInstrDesc.h b/contrib/llvm/include/llvm/MC/MCInstrDesc.h
index 3209a2c..fe67e44 100644
--- a/contrib/llvm/include/llvm/MC/MCInstrDesc.h
+++ b/contrib/llvm/include/llvm/MC/MCInstrDesc.h
@@ -49,7 +49,7 @@ enum OperandType {
   OPERAND_PCREL = 4,
   OPERAND_FIRST_TARGET = 5
 };
-}
+} // namespace MCOI
 
 /// \brief This holds information about one operand of a machine instruction,
 /// indicating the register class for register operands, etc.
@@ -128,7 +128,7 @@ enum Flag {
   InsertSubreg,
   Convergent
 };
-}
+} // namespace MCID
 
 /// \brief Describe properties that are true of each instruction in the target
 /// description file.  This captures information about side effects, register
diff --git a/contrib/llvm/include/llvm/MC/MCInstrInfo.h b/contrib/llvm/include/llvm/MC/MCInstrInfo.h
index 70c8658..d75c4ca 100644
--- a/contrib/llvm/include/llvm/MC/MCInstrInfo.h
+++ b/contrib/llvm/include/llvm/MC/MCInstrInfo.h
@@ -54,6 +54,6 @@ public:
   }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/MC/MCInstrItineraries.h b/contrib/llvm/include/llvm/MC/MCInstrItineraries.h
index 161705d..a58bd7b 100644
--- a/contrib/llvm/include/llvm/MC/MCInstrItineraries.h
+++ b/contrib/llvm/include/llvm/MC/MCInstrItineraries.h
@@ -234,6 +234,6 @@ public:
   }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/MC/MCMachObjectWriter.h b/contrib/llvm/include/llvm/MC/MCMachObjectWriter.h
index 175d73e..10b7905 100644
--- a/contrib/llvm/include/llvm/MC/MCMachObjectWriter.h
+++ b/contrib/llvm/include/llvm/MC/MCMachObjectWriter.h
@@ -264,6 +264,6 @@ MCObjectWriter *createMachObjectWriter(MCMachObjectTargetWriter *MOTW,
                                        raw_pwrite_stream &OS,
                                        bool IsLittleEndian);
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/MC/MCObjectFileInfo.h b/contrib/llvm/include/llvm/MC/MCObjectFileInfo.h
index 0515f1c..99e3f92 100644
--- a/contrib/llvm/include/llvm/MC/MCObjectFileInfo.h
+++ b/contrib/llvm/include/llvm/MC/MCObjectFileInfo.h
@@ -20,7 +20,6 @@
 namespace llvm {
 class MCContext;
 class MCSection;
-class StringRef;
 
 class MCObjectFileInfo {
 protected:
@@ -139,6 +138,9 @@ protected:
   /// StackMap section.
   MCSection *StackMapSection;
 
+  /// FaultMap section.
+  MCSection *FaultMapSection;
+
   /// EH frame section.
   ///
   /// It is initialized on demand so it can be overwritten (with uniquing).
@@ -185,8 +187,12 @@ protected:
   MCSection *SXDataSection;
 
 public:
-  void InitMCObjectFileInfo(StringRef TT, Reloc::Model RM, CodeModel::Model CM,
-                            MCContext &ctx);
+  void InitMCObjectFileInfo(const Triple &TT, Reloc::Model RM,
+                            CodeModel::Model CM, MCContext &ctx);
+  LLVM_ATTRIBUTE_DEPRECATED(
+      void InitMCObjectFileInfo(StringRef TT, Reloc::Model RM,
+                                CodeModel::Model CM, MCContext &ctx),
+      "StringRef GNU Triple argument replaced by a llvm::Triple object");
 
   bool getSupportsWeakOmittedEHFrame() const {
     return SupportsWeakOmittedEHFrame;
@@ -262,6 +268,7 @@ public:
   MCSection *getTLSBSSSection() const { return TLSBSSSection; }
 
   MCSection *getStackMapSection() const { return StackMapSection; }
+  MCSection *getFaultMapSection() const { return FaultMapSection; }
 
   // ELF specific sections.
   MCSection *getDataRelSection() const { return DataRelSection; }
diff --git a/contrib/llvm/include/llvm/MC/MCObjectStreamer.h b/contrib/llvm/include/llvm/MC/MCObjectStreamer.h
index 462b3b4..ce1fc80 100644
--- a/contrib/llvm/include/llvm/MC/MCObjectStreamer.h
+++ b/contrib/llvm/include/llvm/MC/MCObjectStreamer.h
@@ -135,8 +135,7 @@ public:
   /// data fragment.  Otherwise, do nothing and return \c false.
   ///
   /// \pre Offset of \c Hi is greater than the offset \c Lo.
-  /// \return true on success.
-  bool emitAbsoluteSymbolDiff(const MCSymbol *Hi, const MCSymbol *Lo,
+  void emitAbsoluteSymbolDiff(const MCSymbol *Hi, const MCSymbol *Lo,
                               unsigned Size) override;
 
   bool mayHaveInstructions(MCSection &Sec) const override;
diff --git a/contrib/llvm/include/llvm/MC/MCObjectWriter.h b/contrib/llvm/include/llvm/MC/MCObjectWriter.h
index 2211673..ca7fba5 100644
--- a/contrib/llvm/include/llvm/MC/MCObjectWriter.h
+++ b/contrib/llvm/include/llvm/MC/MCObjectWriter.h
@@ -188,6 +188,6 @@ public:
   /// @}
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/MC/MCParser/MCAsmLexer.h b/contrib/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
index 71f15b3..0bf8aa6 100644
--- a/contrib/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
+++ b/contrib/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
@@ -190,6 +190,6 @@ public:
   void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/MC/MCParser/MCAsmParser.h b/contrib/llvm/include/llvm/MC/MCParser/MCAsmParser.h
index 0538b94..c840958 100644
--- a/contrib/llvm/include/llvm/MC/MCParser/MCAsmParser.h
+++ b/contrib/llvm/include/llvm/MC/MCParser/MCAsmParser.h
@@ -203,6 +203,6 @@ public:
 MCAsmParser *createMCAsmParser(SourceMgr &, MCContext &, MCStreamer &,
                                const MCAsmInfo &);
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/MC/MCParser/MCAsmParserExtension.h b/contrib/llvm/include/llvm/MC/MCParser/MCAsmParserExtension.h
index 077fd21..46f716e 100644
--- a/contrib/llvm/include/llvm/MC/MCParser/MCAsmParserExtension.h
+++ b/contrib/llvm/include/llvm/MC/MCParser/MCAsmParserExtension.h
@@ -84,6 +84,6 @@ public:
   /// @}
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/MC/MCRegisterInfo.h b/contrib/llvm/include/llvm/MC/MCRegisterInfo.h
index 8e25ee1..7a41abc 100644
--- a/contrib/llvm/include/llvm/MC/MCRegisterInfo.h
+++ b/contrib/llvm/include/llvm/MC/MCRegisterInfo.h
@@ -686,6 +686,6 @@ public:
   }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/MC/MCRelocationInfo.h b/contrib/llvm/include/llvm/MC/MCRelocationInfo.h
index 40e0217..8fc5c9f 100644
--- a/contrib/llvm/include/llvm/MC/MCRelocationInfo.h
+++ b/contrib/llvm/include/llvm/MC/MCRelocationInfo.h
@@ -50,6 +50,6 @@ public:
                                                      unsigned VariantKind);
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/MC/MCSchedule.h b/contrib/llvm/include/llvm/MC/MCSchedule.h
index 1adfedd..635eab9 100644
--- a/contrib/llvm/include/llvm/MC/MCSchedule.h
+++ b/contrib/llvm/include/llvm/MC/MCSchedule.h
@@ -245,6 +245,6 @@ struct MCSchedModel {
   }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/MC/MCSection.h b/contrib/llvm/include/llvm/MC/MCSection.h
index 5f6e8ec..2d0d4df 100644
--- a/contrib/llvm/include/llvm/MC/MCSection.h
+++ b/contrib/llvm/include/llvm/MC/MCSection.h
@@ -31,6 +31,18 @@ class MCSection;
 class MCSymbol;
 class raw_ostream;
 
+template<>
+struct ilist_node_traits<MCFragment> {
+  MCFragment *createNode(const MCFragment &V);
+  static void deleteNode(MCFragment *V);
+
+  void addNodeToList(MCFragment *) {}
+  void removeNodeFromList(MCFragment *) {}
+  void transferNodesFromList(ilist_node_traits &    /*SrcTraits*/,
+                             ilist_iterator<MCFragment> /*first*/,
+                             ilist_iterator<MCFragment> /*last*/) {}
+};
+
 /// Instances of this class represent a uniqued identifier for a section in the
 /// current translation unit.  The MCContext class uniques and creates these.
 class MCSection {
diff --git a/contrib/llvm/include/llvm/MC/MCStreamer.h b/contrib/llvm/include/llvm/MC/MCStreamer.h
index 628fb76..50d8d31 100644
--- a/contrib/llvm/include/llvm/MC/MCStreamer.h
+++ b/contrib/llvm/include/llvm/MC/MCStreamer.h
@@ -85,6 +85,9 @@ public:
   // Allow a target to add behavior to the emitAssignment of MCStreamer.
   virtual void emitAssignment(MCSymbol *Symbol, const MCExpr *Value);
 
+  virtual void prettyPrintAsm(MCInstPrinter &InstPrinter, raw_ostream &OS,
+                              const MCInst &Inst, const MCSubtargetInfo &STI);
+
   virtual void finish();
 };
 
@@ -636,14 +639,11 @@ public:
                                      unsigned Isa, unsigned Discriminator,
                                      StringRef FileName);
 
-  /// Emit the absolute difference between two symbols if possible.
+  /// Emit the absolute difference between two symbols.
   ///
   /// \pre Offset of \c Hi is greater than the offset \c Lo.
-  /// \return true on success.
-  virtual bool emitAbsoluteSymbolDiff(const MCSymbol *Hi, const MCSymbol *Lo,
-                                      unsigned Size) {
-    return false;
-  }
+  virtual void emitAbsoluteSymbolDiff(const MCSymbol *Hi, const MCSymbol *Lo,
+                                      unsigned Size);
 
   virtual MCSymbol *getDwarfLineTableSymbol(unsigned CUID);
   virtual void EmitCFISections(bool EH, bool Debug);
diff --git a/contrib/llvm/include/llvm/MC/MCSubtargetInfo.h b/contrib/llvm/include/llvm/MC/MCSubtargetInfo.h
index ee5d563..0a23306 100644
--- a/contrib/llvm/include/llvm/MC/MCSubtargetInfo.h
+++ b/contrib/llvm/include/llvm/MC/MCSubtargetInfo.h
@@ -27,7 +27,7 @@ class StringRef;
 /// MCSubtargetInfo - Generic base class for all target subtargets.
 ///
 class MCSubtargetInfo {
-  std::string TargetTriple;            // Target triple
+  Triple TargetTriple;                        // Target triple
   std::string CPU; // CPU being targeted.
   ArrayRef<SubtargetFeatureKV> ProcFeatures;  // Processor feature list
   ArrayRef<SubtargetFeatureKV> ProcDesc;  // Processor descriptions
@@ -45,20 +45,17 @@ class MCSubtargetInfo {
   FeatureBitset FeatureBits;           // Feature bits for current CPU + FS
 
 public:
-  void InitMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS,
+  void InitMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS,
                            ArrayRef<SubtargetFeatureKV> PF,
                            ArrayRef<SubtargetFeatureKV> PD,
                            const SubtargetInfoKV *ProcSched,
                            const MCWriteProcResEntry *WPR,
                            const MCWriteLatencyEntry *WL,
-                           const MCReadAdvanceEntry *RA,
-                           const InstrStage *IS,
+                           const MCReadAdvanceEntry *RA, const InstrStage *IS,
                            const unsigned *OC, const unsigned *FP);
 
   /// getTargetTriple - Return the target triple string.
-  StringRef getTargetTriple() const {
-    return TargetTriple;
-  }
+  const Triple &getTargetTriple() const { return TargetTriple; }
 
   /// getCPU - Return the CPU string.
   StringRef getCPU() const {
@@ -163,6 +160,6 @@ public:
   }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/MC/MCSymbol.h b/contrib/llvm/include/llvm/MC/MCSymbol.h
index 078f3d7..0acf6e5 100644
--- a/contrib/llvm/include/llvm/MC/MCSymbol.h
+++ b/contrib/llvm/include/llvm/MC/MCSymbol.h
@@ -17,7 +17,6 @@
 #include "llvm/ADT/PointerUnion.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCExpr.h"
 #include "llvm/Support/Compiler.h"
 
 namespace llvm {
@@ -52,10 +51,6 @@ protected:
   // FIXME: Use a PointerInt wrapper for this?
   static MCSection *AbsolutePseudoSection;
 
-  /// Name - The name of the symbol.  The referred-to string data is actually
-  /// held by the StringMap that lives in MCContext.
-  const StringMapEntry<bool> *Name;
-
   /// If a symbol has a Fragment, the section is implied, so we only need
   /// one pointer.
   /// FIXME: We might be able to simplify this by having the asm streamer create
@@ -91,10 +86,18 @@ protected:
   /// This symbol is private extern.
   mutable unsigned IsPrivateExtern : 1;
 
+  /// True if this symbol is named.
+  /// A named symbol will have a pointer to the name allocated in the bytes
+  /// immediately prior to the MCSymbol.
+  unsigned HasName : 1;
+
   /// LLVM RTTI discriminator. This is actually a SymbolKind enumerator, but is
   /// unsigned to avoid sign extension and achieve better bitpacking with MSVC.
   unsigned Kind : 2;
 
+  /// True if we have created a relocation that uses this symbol.
+  mutable unsigned IsUsedInReloc : 1;
+
   /// Index field, for use by the object file implementation.
   mutable uint32_t Index = 0;
 
@@ -118,15 +121,43 @@ protected:
 protected: // MCContext creates and uniques these.
   friend class MCExpr;
   friend class MCContext;
+
+  /// \brief The name for a symbol.
+  /// MCSymbol contains a uint64_t so is probably aligned to 8.  On a 32-bit
+  /// system, the name is a pointer so isn't going to satisfy the 8 byte
+  /// alignment of uint64_t.  Account for that here.
+  typedef union {
+    const StringMapEntry<bool> *NameEntry;
+    uint64_t AlignmentPadding;
+  } NameEntryStorageTy;
+
   MCSymbol(SymbolKind Kind, const StringMapEntry<bool> *Name, bool isTemporary)
-      : Name(Name), Value(nullptr), IsTemporary(isTemporary),
-        IsRedefinable(false), IsUsed(false), IsRegistered(false),
-        IsExternal(false), IsPrivateExtern(false),
-        Kind(Kind) {
+      : Value(nullptr), IsTemporary(isTemporary), IsRedefinable(false),
+        IsUsed(false), IsRegistered(false), IsExternal(false),
+        IsPrivateExtern(false), HasName(!!Name), Kind(Kind),
+        IsUsedInReloc(false) {
     Offset = 0;
+    if (Name)
+      getNameEntryPtr() = Name;
   }
 
+  // Provide custom new/delete as we will only allocate space for a name
+  // if we need one.
+  void *operator new(size_t s, const StringMapEntry<bool> *Name,
+                     MCContext &Ctx);
+
 private:
+
+  void operator delete(void *);
+  /// \brief Placement delete - required by std, but never called.
+  void operator delete(void*, unsigned) {
+    llvm_unreachable("Constructor throws?");
+  }
+  /// \brief Placement delete - required by std, but never called.
+  void operator delete(void*, unsigned, bool) {
+    llvm_unreachable("Constructor throws?");
+  }
+
   MCSymbol(const MCSymbol &) = delete;
   void operator=(const MCSymbol &) = delete;
   MCSection *getSectionPtr() const {
@@ -139,13 +170,31 @@ private:
     return Section = Value->findAssociatedSection();
   }
 
+  /// \brief Get a reference to the name field.  Requires that we have a name
+  const StringMapEntry<bool> *&getNameEntryPtr() {
+    assert(HasName && "Name is required");
+    NameEntryStorageTy *Name = reinterpret_cast<NameEntryStorageTy *>(this);
+    return (*(Name - 1)).NameEntry;
+  }
+  const StringMapEntry<bool> *&getNameEntryPtr() const {
+    return const_cast<MCSymbol*>(this)->getNameEntryPtr();
+  }
+
 public:
   /// getName - Get the symbol name.
-  StringRef getName() const { return Name ? Name->first() : ""; }
+  StringRef getName() const {
+    if (!HasName)
+      return StringRef();
+
+    return getNameEntryPtr()->first();
+  }
 
   bool isRegistered() const { return IsRegistered; }
   void setIsRegistered(bool Value) const { IsRegistered = Value; }
 
+  void setUsedInReloc() const { IsUsedInReloc = true; }
+  bool isUsedInReloc() const { return IsUsedInReloc; }
+
   /// \name Accessors
   /// @{
 
diff --git a/contrib/llvm/include/llvm/MC/MCSymbolCOFF.h b/contrib/llvm/include/llvm/MC/MCSymbolCOFF.h
index 2172c67..3b853f7 100644
--- a/contrib/llvm/include/llvm/MC/MCSymbolCOFF.h
+++ b/contrib/llvm/include/llvm/MC/MCSymbolCOFF.h
@@ -59,6 +59,6 @@ public:
 
   static bool classof(const MCSymbol *S) { return S->isCOFF(); }
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/MC/MCSymbolELF.h b/contrib/llvm/include/llvm/MC/MCSymbolELF.h
index 0cc1115..b0ce3fe 100644
--- a/contrib/llvm/include/llvm/MC/MCSymbolELF.h
+++ b/contrib/llvm/include/llvm/MC/MCSymbolELF.h
@@ -38,9 +38,6 @@ public:
 
   bool isBindingSet() const;
 
-  void setUsedInReloc() const;
-  bool isUsedInReloc() const;
-
   void setIsWeakrefUsedInReloc() const;
   bool isWeakrefUsedInReloc() const;
 
@@ -52,6 +49,6 @@ public:
 private:
   void setIsBindingSet() const;
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/MC/MCSymbolMachO.h b/contrib/llvm/include/llvm/MC/MCSymbolMachO.h
index 166ae9e..a162080 100644
--- a/contrib/llvm/include/llvm/MC/MCSymbolMachO.h
+++ b/contrib/llvm/include/llvm/MC/MCSymbolMachO.h
@@ -118,6 +118,6 @@ public:
 
   static bool classof(const MCSymbol *S) { return S->isMachO(); }
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/MC/MCSymbolizer.h b/contrib/llvm/include/llvm/MC/MCSymbolizer.h
index 2ef1767..41c1b0d 100644
--- a/contrib/llvm/include/llvm/MC/MCSymbolizer.h
+++ b/contrib/llvm/include/llvm/MC/MCSymbolizer.h
@@ -80,6 +80,6 @@ public:
                                                uint64_t Address) = 0;
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/MC/MCTargetAsmParser.h b/contrib/llvm/include/llvm/MC/MCTargetAsmParser.h
index 36db391..4ee53ad 100644
--- a/contrib/llvm/include/llvm/MC/MCTargetAsmParser.h
+++ b/contrib/llvm/include/llvm/MC/MCTargetAsmParser.h
@@ -201,6 +201,6 @@ public:
   virtual void onLabelParsed(MCSymbol *Symbol) { };
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/MC/MCWin64EH.h b/contrib/llvm/include/llvm/MC/MCWin64EH.h
index 0e81a19..f2211d7 100644
--- a/contrib/llvm/include/llvm/MC/MCWin64EH.h
+++ b/contrib/llvm/include/llvm/MC/MCWin64EH.h
@@ -57,7 +57,7 @@ public:
   void Emit(MCStreamer &Streamer) const override;
   void EmitUnwindInfo(MCStreamer &Streamer, WinEH::FrameInfo *FI) const override;
 };
-}
+} // namespace Win64EH
 } // end namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/MC/MCWinCOFFObjectWriter.h b/contrib/llvm/include/llvm/MC/MCWinCOFFObjectWriter.h
index e2e95c7..edf87f5 100644
--- a/contrib/llvm/include/llvm/MC/MCWinCOFFObjectWriter.h
+++ b/contrib/llvm/include/llvm/MC/MCWinCOFFObjectWriter.h
@@ -42,6 +42,6 @@ class raw_pwrite_stream;
   /// \returns The constructed object writer.
   MCObjectWriter *createWinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW,
                                             raw_pwrite_stream &OS);
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/MC/MCWinCOFFStreamer.h b/contrib/llvm/include/llvm/MC/MCWinCOFFStreamer.h
index 6fbc754..fcca838 100644
--- a/contrib/llvm/include/llvm/MC/MCWinCOFFStreamer.h
+++ b/contrib/llvm/include/llvm/MC/MCWinCOFFStreamer.h
@@ -75,7 +75,7 @@ protected:
 private:
   LLVM_ATTRIBUTE_NORETURN void FatalError(const Twine &Msg) const;
 };
-}
+} // namespace llvm
 
 #endif
 
diff --git a/contrib/llvm/include/llvm/MC/MCWinEH.h b/contrib/llvm/include/llvm/MC/MCWinEH.h
index 723d7a3..d22791e 100644
--- a/contrib/llvm/include/llvm/MC/MCWinEH.h
+++ b/contrib/llvm/include/llvm/MC/MCWinEH.h
@@ -78,7 +78,7 @@ public:
   virtual void Emit(MCStreamer &Streamer) const = 0;
   virtual void EmitUnwindInfo(MCStreamer &Streamer, FrameInfo *FI) const = 0;
 };
-}
-}
+} // namespace WinEH
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/MC/MachineLocation.h b/contrib/llvm/include/llvm/MC/MachineLocation.h
index 2a18615..1c42182 100644
--- a/contrib/llvm/include/llvm/MC/MachineLocation.h
+++ b/contrib/llvm/include/llvm/MC/MachineLocation.h
@@ -78,6 +78,6 @@ inline bool operator!=(const MachineLocation &LHS, const MachineLocation &RHS) {
   return !(LHS == RHS);
 }
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/MC/StringTableBuilder.h b/contrib/llvm/include/llvm/MC/StringTableBuilder.h
index 897d449..700a8a6 100644
--- a/contrib/llvm/include/llvm/MC/StringTableBuilder.h
+++ b/contrib/llvm/include/llvm/MC/StringTableBuilder.h
@@ -62,6 +62,6 @@ private:
   }
 };
 
-} // end llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/MC/YAML.h b/contrib/llvm/include/llvm/MC/YAML.h
index 383cdc6..ae83298 100644
--- a/contrib/llvm/include/llvm/MC/YAML.h
+++ b/contrib/llvm/include/llvm/MC/YAML.h
@@ -89,6 +89,6 @@ template <> struct ScalarTraits<BinaryRef> {
   static StringRef input(StringRef, void *, BinaryRef &);
   static bool mustQuote(StringRef S) { return needsQuotes(S); }
 };
-}
-}
+} // namespace yaml
+} // namespace llvm
 #endif
diff --git a/contrib/llvm/include/llvm/Object/Archive.h b/contrib/llvm/include/llvm/Object/Archive.h
index 8da6919..3a52a9d 100644
--- a/contrib/llvm/include/llvm/Object/Archive.h
+++ b/contrib/llvm/include/llvm/Object/Archive.h
@@ -217,7 +217,7 @@ private:
   unsigned IsThin : 1;
 };
 
-}
-}
+} // namespace object
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Object/ArchiveWriter.h b/contrib/llvm/include/llvm/Object/ArchiveWriter.h
index 1616e46..8a394fa 100644
--- a/contrib/llvm/include/llvm/Object/ArchiveWriter.h
+++ b/contrib/llvm/include/llvm/Object/ArchiveWriter.h
@@ -46,6 +46,6 @@ std::pair<StringRef, std::error_code>
 writeArchive(StringRef ArcName, std::vector<NewArchiveIterator> &NewMembers,
              bool WriteSymtab);
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Object/Binary.h b/contrib/llvm/include/llvm/Object/Binary.h
index a3d6d0d..949edf8 100644
--- a/contrib/llvm/include/llvm/Object/Binary.h
+++ b/contrib/llvm/include/llvm/Object/Binary.h
@@ -178,7 +178,7 @@ template <typename T> const T* OwningBinary<T>::getBinary() const {
 }
 
 ErrorOr<OwningBinary<Binary>> createBinary(StringRef Path);
-}
-}
+} // namespace object
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Object/COFF.h b/contrib/llvm/include/llvm/Object/COFF.h
index 564eb7a..ad657b5 100644
--- a/contrib/llvm/include/llvm/Object/COFF.h
+++ b/contrib/llvm/include/llvm/Object/COFF.h
@@ -641,7 +641,6 @@ protected:
   std::error_code getRelocationOffset(DataRefImpl Rel,
                                       uint64_t &Res) const override;
   symbol_iterator getRelocationSymbol(DataRefImpl Rel) const override;
-  section_iterator getRelocationSection(DataRefImpl Rel) const override;
   std::error_code getRelocationType(DataRefImpl Rel,
                                     uint64_t &Res) const override;
   std::error_code
diff --git a/contrib/llvm/include/llvm/Object/COFFYAML.h b/contrib/llvm/include/llvm/Object/COFFYAML.h
index 12a2522..5ba3db3 100644
--- a/contrib/llvm/include/llvm/Object/COFFYAML.h
+++ b/contrib/llvm/include/llvm/Object/COFFYAML.h
@@ -37,7 +37,7 @@ inline DLLCharacteristics operator|(DLLCharacteristics a,
   uint16_t Ret = static_cast<uint16_t>(a) | static_cast<uint16_t>(b);
   return static_cast<DLLCharacteristics>(Ret);
 }
-}
+} // namespace COFF
 
 // The structure of the yaml files is not an exact 1:1 match to COFF. In order
 // to use yaml::IO, we use these structures which are closer to the source.
@@ -87,8 +87,8 @@ namespace COFFYAML {
     std::vector<Symbol> Symbols;
     Object();
   };
-}
-}
+} // namespace COFFYAML
+} // namespace llvm
 
 LLVM_YAML_IS_SEQUENCE_VECTOR(COFFYAML::Section)
 LLVM_YAML_IS_SEQUENCE_VECTOR(COFFYAML::Symbol)
diff --git a/contrib/llvm/include/llvm/Object/ELFObjectFile.h b/contrib/llvm/include/llvm/Object/ELFObjectFile.h
index 78d77be..7fc56ad 100644
--- a/contrib/llvm/include/llvm/Object/ELFObjectFile.h
+++ b/contrib/llvm/include/llvm/Object/ELFObjectFile.h
@@ -40,14 +40,15 @@ protected:
   ELFObjectFileBase(unsigned int Type, MemoryBufferRef Source);
 
 public:
-  virtual std::error_code getRelocationAddend(DataRefImpl Rel,
-                                              int64_t &Res) const = 0;
+  virtual ErrorOr<int64_t> getRelocationAddend(DataRefImpl Rel) const = 0;
+
+  // FIXME: This is a bit of a hack. Every caller should know if it expecting
+  // and addend or not.
+  virtual bool hasRelocationAddend(DataRefImpl Rel) const = 0;
+
   virtual std::pair<symbol_iterator, symbol_iterator>
   getELFDynamicSymbolIterators() const = 0;
 
-  virtual std::error_code getSymbolVersion(SymbolRef Symb, StringRef &Version,
-                                           bool &IsDefault) const = 0;
-
   virtual uint64_t getSectionFlags(SectionRef Sec) const = 0;
   virtual uint32_t getSectionType(SectionRef Sec) const = 0;
 
@@ -112,7 +113,6 @@ protected:
   std::error_code getRelocationOffset(DataRefImpl Rel,
                                       uint64_t &Res) const override;
   symbol_iterator getRelocationSymbol(DataRefImpl Rel) const override;
-  section_iterator getRelocationSection(DataRefImpl Rel) const override;
   std::error_code getRelocationType(DataRefImpl Rel,
                                     uint64_t &Res) const override;
   std::error_code
@@ -208,10 +208,8 @@ public:
   section_iterator section_begin() const override;
   section_iterator section_end() const override;
 
-  std::error_code getRelocationAddend(DataRefImpl Rel,
-                                      int64_t &Res) const override;
-  std::error_code getSymbolVersion(SymbolRef Symb, StringRef &Version,
-                                   bool &IsDefault) const override;
+  ErrorOr<int64_t> getRelocationAddend(DataRefImpl Rel) const override;
+  bool hasRelocationAddend(DataRefImpl Rel) const override;
 
   uint64_t getSectionFlags(SectionRef Sec) const override;
   uint32_t getSectionType(SectionRef Sec) const override;
@@ -261,20 +259,6 @@ std::error_code ELFObjectFile<ELFT>::getSymbolName(DataRefImpl Symb,
 }
 
 template <class ELFT>
-std::error_code ELFObjectFile<ELFT>::getSymbolVersion(SymbolRef SymRef,
-                                                      StringRef &Version,
-                                                      bool &IsDefault) const {
-  DataRefImpl Symb = SymRef.getRawDataRefImpl();
-  const Elf_Sym *symb = getSymbol(Symb);
-  ErrorOr<StringRef> Ver =
-      EF.getSymbolVersion(EF.getSection(Symb.d.b), symb, IsDefault);
-  if (!Ver)
-    return Ver.getError();
-  Version = *Ver;
-  return std::error_code();
-}
-
-template <class ELFT>
 uint64_t ELFObjectFile<ELFT>::getSectionFlags(SectionRef Sec) const {
   DataRefImpl DRI = Sec.getRawDataRefImpl();
   return toELFShdrIter(DRI)->sh_flags;
@@ -584,20 +568,6 @@ ELFObjectFile<ELFT>::getRelocationSymbol(DataRefImpl Rel) const {
   return symbol_iterator(SymbolRef(SymbolData, this));
 }
 
-// ELF relocations can target sections, by targetting a symbol of type
-// STT_SECTION
-template <class ELFT>
-section_iterator
-ELFObjectFile<ELFT>::getRelocationSection(DataRefImpl Rel) const {
-  symbol_iterator Sym = getRelocationSymbol(Rel);
-  if (Sym == symbol_end())
-    return section_end();
-  const Elf_Sym *ESym = getSymbol(Sym->getRawDataRefImpl());
-  if (ESym->getType() != ELF::STT_SECTION)
-    return section_end();
-  return getSymbolSection(ESym);
-}
-
 template <class ELFT>
 std::error_code
 ELFObjectFile<ELFT>::getRelocationAddress(DataRefImpl Rel,
@@ -686,22 +656,16 @@ std::error_code ELFObjectFile<ELFT>::getRelocationTypeName(
 }
 
 template <class ELFT>
-std::error_code
-ELFObjectFile<ELFT>::getRelocationAddend(DataRefImpl Rel,
-                                         int64_t &Result) const {
-  const Elf_Shdr *sec = getRelSection(Rel);
-  switch (sec->sh_type) {
-  default:
-    report_fatal_error("Invalid section type in Rel!");
-  case ELF::SHT_REL: {
-    Result = 0;
-    return std::error_code();
-  }
-  case ELF::SHT_RELA: {
-    Result = getRela(Rel)->r_addend;
-    return std::error_code();
-  }
-  }
+ErrorOr<int64_t>
+ELFObjectFile<ELFT>::getRelocationAddend(DataRefImpl Rel) const {
+  if (getRelSection(Rel)->sh_type != ELF::SHT_RELA)
+    return object_error::parse_failed;
+  return (int64_t)getRela(Rel)->r_addend;
+}
+
+template <class ELFT>
+bool ELFObjectFile<ELFT>::hasRelocationAddend(DataRefImpl Rel) const {
+  return getRelSection(Rel)->sh_type == ELF::SHT_RELA;
 }
 
 template <class ELFT>
@@ -879,26 +843,12 @@ template <class ELFT> bool ELFObjectFile<ELFT>::isRelocatableObject() const {
   return EF.getHeader()->e_type == ELF::ET_REL;
 }
 
-inline std::error_code getELFRelocationAddend(const RelocationRef R,
-                                              int64_t &Addend) {
-  const ObjectFile *Obj = R.getObjectFile();
-  DataRefImpl DRI = R.getRawDataRefImpl();
-  return cast<ELFObjectFileBase>(Obj)->getRelocationAddend(DRI, Addend);
-}
-
 inline std::pair<symbol_iterator, symbol_iterator>
 getELFDynamicSymbolIterators(const SymbolicFile *Obj) {
   return cast<ELFObjectFileBase>(Obj)->getELFDynamicSymbolIterators();
 }
 
-inline std::error_code GetELFSymbolVersion(const ObjectFile *Obj,
-                                           const SymbolRef &Sym,
-                                           StringRef &Version,
-                                           bool &IsDefault) {
-  return cast<ELFObjectFileBase>(Obj)
-      ->getSymbolVersion(Sym, Version, IsDefault);
-}
-}
-}
+} // namespace object
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Object/ELFTypes.h b/contrib/llvm/include/llvm/Object/ELFTypes.h
index 3f323b5..2eda0c1 100644
--- a/contrib/llvm/include/llvm/Object/ELFTypes.h
+++ b/contrib/llvm/include/llvm/Object/ELFTypes.h
@@ -156,11 +156,13 @@ struct Elf_Sym_Impl : Elf_Sym_Base<ELFT> {
   using Elf_Sym_Base<ELFT>::st_info;
   using Elf_Sym_Base<ELFT>::st_shndx;
   using Elf_Sym_Base<ELFT>::st_other;
+  using Elf_Sym_Base<ELFT>::st_value;
 
   // These accessors and mutators correspond to the ELF32_ST_BIND,
   // ELF32_ST_TYPE, and ELF32_ST_INFO macros defined in the ELF specification:
   unsigned char getBinding() const { return st_info >> 4; }
   unsigned char getType() const { return st_info & 0x0f; }
+  uint64_t getValue() const { return st_value; }
   void setBinding(unsigned char b) { setBindingAndType(b, getType()); }
   void setType(unsigned char t) { setBindingAndType(getBinding(), t); }
   void setBindingAndType(unsigned char b, unsigned char t) {
@@ -182,10 +184,7 @@ struct Elf_Sym_Impl : Elf_Sym_Base<ELFT> {
   bool isCommon() const {
     return getType() == ELF::STT_COMMON || st_shndx == ELF::SHN_COMMON;
   }
-  bool isDefined() const {
-    return !isUndefined() &&
-           !(st_shndx >= ELF::SHN_LORESERVE && st_shndx < ELF::SHN_ABS);
-  }
+  bool isDefined() const { return !isUndefined(); }
   bool isProcessorSpecific() const {
     return st_shndx >= ELF::SHN_LOPROC && st_shndx <= ELF::SHN_HIPROC;
   }
@@ -193,7 +192,9 @@ struct Elf_Sym_Impl : Elf_Sym_Base<ELFT> {
     return st_shndx >= ELF::SHN_LOOS && st_shndx <= ELF::SHN_HIOS;
   }
   bool isReserved() const {
-    return st_shndx > ELF::SHN_HIOS && st_shndx < ELF::SHN_ABS;
+    // ELF::SHN_HIRESERVE is 0xffff so st_shndx <= ELF::SHN_HIRESERVE is always
+    // true and some compilers warn about it.
+    return st_shndx >= ELF::SHN_LORESERVE;
   }
   bool isUndefined() const { return st_shndx == ELF::SHN_UNDEF; }
 };
diff --git a/contrib/llvm/include/llvm/Object/IRObjectFile.h b/contrib/llvm/include/llvm/Object/IRObjectFile.h
index ef65528..f713570 100644
--- a/contrib/llvm/include/llvm/Object/IRObjectFile.h
+++ b/contrib/llvm/include/llvm/Object/IRObjectFile.h
@@ -68,7 +68,7 @@ public:
   static ErrorOr<std::unique_ptr<IRObjectFile>> create(MemoryBufferRef Object,
                                                        LLVMContext &Context);
 };
-}
-}
+} // namespace object
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Object/MachO.h b/contrib/llvm/include/llvm/Object/MachO.h
index b163534..4350a75 100644
--- a/contrib/llvm/include/llvm/Object/MachO.h
+++ b/contrib/llvm/include/llvm/Object/MachO.h
@@ -236,7 +236,7 @@ public:
   std::error_code getRelocationOffset(DataRefImpl Rel,
                                       uint64_t &Res) const override;
   symbol_iterator getRelocationSymbol(DataRefImpl Rel) const override;
-  section_iterator getRelocationSection(DataRefImpl Rel) const override;
+  section_iterator getRelocationSection(DataRefImpl Rel) const;
   std::error_code getRelocationType(DataRefImpl Rel,
                                     uint64_t &Res) const override;
   std::error_code
@@ -503,8 +503,8 @@ inline const ObjectFile *DiceRef::getObjectFile() const {
   return OwningObject;
 }
 
-}
-}
+} // namespace object
+} // namespace llvm
 
 #endif
 
diff --git a/contrib/llvm/include/llvm/Object/MachOUniversal.h b/contrib/llvm/include/llvm/Object/MachOUniversal.h
index 05119b2..ebc8b90 100644
--- a/contrib/llvm/include/llvm/Object/MachOUniversal.h
+++ b/contrib/llvm/include/llvm/Object/MachOUniversal.h
@@ -112,7 +112,7 @@ public:
   getObjectForArch(Triple::ArchType Arch) const;
 };
 
-}
-}
+} // namespace object
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Object/ObjectFile.h b/contrib/llvm/include/llvm/Object/ObjectFile.h
index a1ae19e..e00fe0e 100644
--- a/contrib/llvm/include/llvm/Object/ObjectFile.h
+++ b/contrib/llvm/include/llvm/Object/ObjectFile.h
@@ -53,7 +53,6 @@ public:
   std::error_code getAddress(uint64_t &Result) const;
   std::error_code getOffset(uint64_t &Result) const;
   symbol_iterator getSymbol() const;
-  section_iterator getSection() const;
   std::error_code getType(uint64_t &Result) const;
 
   /// @brief Indicates whether this relocation should hidden when listing
@@ -241,7 +240,6 @@ protected:
   virtual std::error_code getRelocationOffset(DataRefImpl Rel,
                                               uint64_t &Res) const = 0;
   virtual symbol_iterator getRelocationSymbol(DataRefImpl Rel) const = 0;
-  virtual section_iterator getRelocationSection(DataRefImpl Rel) const = 0;
   virtual std::error_code getRelocationType(DataRefImpl Rel,
                                             uint64_t &Res) const = 0;
   virtual std::error_code
@@ -459,10 +457,6 @@ inline symbol_iterator RelocationRef::getSymbol() const {
   return OwningObject->getRelocationSymbol(RelocationPimpl);
 }
 
-inline section_iterator RelocationRef::getSection() const {
-  return OwningObject->getRelocationSection(RelocationPimpl);
-}
-
 inline std::error_code RelocationRef::getType(uint64_t &Result) const {
   return OwningObject->getRelocationType(RelocationPimpl, Result);
 }
diff --git a/contrib/llvm/include/llvm/Object/RelocVisitor.h b/contrib/llvm/include/llvm/Object/RelocVisitor.h
index 02ffda5..f80ee0a 100644
--- a/contrib/llvm/include/llvm/Object/RelocVisitor.h
+++ b/contrib/llvm/include/llvm/Object/RelocVisitor.h
@@ -239,36 +239,13 @@ private:
     return RelocToApply();
   }
 
-  int64_t getELFAddend32LE(RelocationRef R) {
-    const ELF32LEObjectFile *Obj = cast<ELF32LEObjectFile>(R.getObjectFile());
+  int64_t getELFAddend(RelocationRef R) {
+    const auto *Obj = cast<ELFObjectFileBase>(R.getObjectFile());
     DataRefImpl DRI = R.getRawDataRefImpl();
-    int64_t Addend;
-    Obj->getRelocationAddend(DRI, Addend);
-    return Addend;
-  }
-
-  int64_t getELFAddend64LE(RelocationRef R) {
-    const ELF64LEObjectFile *Obj = cast<ELF64LEObjectFile>(R.getObjectFile());
-    DataRefImpl DRI = R.getRawDataRefImpl();
-    int64_t Addend;
-    Obj->getRelocationAddend(DRI, Addend);
-    return Addend;
-  }
-
-  int64_t getELFAddend32BE(RelocationRef R) {
-    const ELF32BEObjectFile *Obj = cast<ELF32BEObjectFile>(R.getObjectFile());
-    DataRefImpl DRI = R.getRawDataRefImpl();
-    int64_t Addend;
-    Obj->getRelocationAddend(DRI, Addend);
-    return Addend;
-  }
-
-  int64_t getELFAddend64BE(RelocationRef R) {
-    const ELF64BEObjectFile *Obj = cast<ELF64BEObjectFile>(R.getObjectFile());
-    DataRefImpl DRI = R.getRawDataRefImpl();
-    int64_t Addend;
-    Obj->getRelocationAddend(DRI, Addend);
-    return Addend;
+    ErrorOr<int64_t> AddendOrErr = Obj->getRelocationAddend(DRI);
+    if (std::error_code EC = AddendOrErr.getError())
+      report_fatal_error(EC.message());
+    return *AddendOrErr;
   }
 
   uint8_t getLengthMachO64(RelocationRef R) {
@@ -286,15 +263,13 @@ private:
   // Ideally the Addend here will be the addend in the data for
   // the relocation. It's not actually the case for Rel relocations.
   RelocToApply visitELF_386_32(RelocationRef R, uint64_t Value) {
-    int64_t Addend = getELFAddend32LE(R);
-    return RelocToApply(Value + Addend, 4);
+    return RelocToApply(Value, 4);
   }
 
   RelocToApply visitELF_386_PC32(RelocationRef R, uint64_t Value) {
-    int64_t Addend = getELFAddend32LE(R);
     uint64_t Address;
     R.getOffset(Address);
-    return RelocToApply(Value + Addend - Address, 4);
+    return RelocToApply(Value - Address, 4);
   }
 
   /// X86-64 ELF
@@ -302,65 +277,59 @@ private:
     return RelocToApply(0, 0);
   }
   RelocToApply visitELF_X86_64_64(RelocationRef R, uint64_t Value) {
-    int64_t Addend = getELFAddend64LE(R);
+    int64_t Addend = getELFAddend(R);
     return RelocToApply(Value + Addend, 8);
   }
   RelocToApply visitELF_X86_64_PC32(RelocationRef R, uint64_t Value) {
-    int64_t Addend = getELFAddend64LE(R);
+    int64_t Addend = getELFAddend(R);
     uint64_t Address;
     R.getOffset(Address);
     return RelocToApply(Value + Addend - Address, 4);
   }
   RelocToApply visitELF_X86_64_32(RelocationRef R, uint64_t Value) {
-    int64_t Addend = getELFAddend64LE(R);
+    int64_t Addend = getELFAddend(R);
     uint32_t Res = (Value + Addend) & 0xFFFFFFFF;
     return RelocToApply(Res, 4);
   }
   RelocToApply visitELF_X86_64_32S(RelocationRef R, uint64_t Value) {
-    int64_t Addend = getELFAddend64LE(R);
+    int64_t Addend = getELFAddend(R);
     int32_t Res = (Value + Addend) & 0xFFFFFFFF;
     return RelocToApply(Res, 4);
   }
 
   /// PPC64 ELF
   RelocToApply visitELF_PPC64_ADDR32(RelocationRef R, uint64_t Value) {
-    int64_t Addend;
-    getELFRelocationAddend(R, Addend);
+    int64_t Addend = getELFAddend(R);
     uint32_t Res = (Value + Addend) & 0xFFFFFFFF;
     return RelocToApply(Res, 4);
   }
   RelocToApply visitELF_PPC64_ADDR64(RelocationRef R, uint64_t Value) {
-    int64_t Addend;
-    getELFRelocationAddend(R, Addend);
+    int64_t Addend = getELFAddend(R);
     return RelocToApply(Value + Addend, 8);
   }
 
   /// PPC32 ELF
   RelocToApply visitELF_PPC_ADDR32(RelocationRef R, uint64_t Value) {
-    int64_t Addend = getELFAddend32BE(R);
+    int64_t Addend = getELFAddend(R);
     uint32_t Res = (Value + Addend) & 0xFFFFFFFF;
     return RelocToApply(Res, 4);
   }
 
   /// MIPS ELF
   RelocToApply visitELF_MIPS_32(RelocationRef R, uint64_t Value) {
-    int64_t Addend;
-    getELFRelocationAddend(R, Addend);
-    uint32_t Res = (Value + Addend) & 0xFFFFFFFF;
+    uint32_t Res = (Value)&0xFFFFFFFF;
     return RelocToApply(Res, 4);
   }
 
   RelocToApply visitELF_MIPS_64(RelocationRef R, uint64_t Value) {
-    int64_t Addend;
-    getELFRelocationAddend(R, Addend);
+    int64_t Addend = getELFAddend(R);
     uint64_t Res = (Value + Addend);
     return RelocToApply(Res, 8);
   }
 
   // AArch64 ELF
   RelocToApply visitELF_AARCH64_ABS32(RelocationRef R, uint64_t Value) {
-    int64_t Addend;
-    getELFRelocationAddend(R, Addend);
+    int64_t Addend = getELFAddend(R);
     int64_t Res =  Value + Addend;
 
     // Overflow check allows for both signed and unsigned interpretation.
@@ -371,14 +340,13 @@ private:
   }
 
   RelocToApply visitELF_AARCH64_ABS64(RelocationRef R, uint64_t Value) {
-    int64_t Addend;
-    getELFRelocationAddend(R, Addend);
+    int64_t Addend = getELFAddend(R);
     return RelocToApply(Value + Addend, 8);
   }
 
   // SystemZ ELF
   RelocToApply visitELF_390_32(RelocationRef R, uint64_t Value) {
-    int64_t Addend = getELFAddend64BE(R);
+    int64_t Addend = getELFAddend(R);
     int64_t Res = Value + Addend;
 
     // Overflow check allows for both signed and unsigned interpretation.
@@ -389,29 +357,27 @@ private:
   }
 
   RelocToApply visitELF_390_64(RelocationRef R, uint64_t Value) {
-    int64_t Addend = getELFAddend64BE(R);
+    int64_t Addend = getELFAddend(R);
     return RelocToApply(Value + Addend, 8);
   }
 
   RelocToApply visitELF_SPARC_32(RelocationRef R, uint32_t Value) {
-    int32_t Addend = getELFAddend32BE(R);
+    int32_t Addend = getELFAddend(R);
     return RelocToApply(Value + Addend, 4);
   }
 
   RelocToApply visitELF_SPARCV9_32(RelocationRef R, uint64_t Value) {
-    int32_t Addend = getELFAddend64BE(R);
+    int32_t Addend = getELFAddend(R);
     return RelocToApply(Value + Addend, 4);
   }
 
   RelocToApply visitELF_SPARCV9_64(RelocationRef R, uint64_t Value) {
-    int64_t Addend = getELFAddend64BE(R);
+    int64_t Addend = getELFAddend(R);
     return RelocToApply(Value + Addend, 8);
   }
 
   RelocToApply visitELF_ARM_ABS32(RelocationRef R, uint64_t Value) {
-    int64_t Addend;
-    getELFRelocationAddend(R, Addend);
-    int64_t Res = Value + Addend;
+    int64_t Res = Value;
 
     // Overflow check allows for both signed and unsigned interpretation.
     if (Res < INT32_MIN || Res > UINT32_MAX)
@@ -446,6 +412,6 @@ private:
   }
 };
 
-}
-}
+} // namespace object
+} // namespace llvm
 #endif
diff --git a/contrib/llvm/include/llvm/Object/SymbolicFile.h b/contrib/llvm/include/llvm/Object/SymbolicFile.h
index 2bfff4c..bf46599 100644
--- a/contrib/llvm/include/llvm/Object/SymbolicFile.h
+++ b/contrib/llvm/include/llvm/Object/SymbolicFile.h
@@ -195,7 +195,7 @@ inline const SymbolicFile *BasicSymbolRef::getObject() const {
   return OwningObject;
 }
 
-}
-}
+} // namespace object
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Option/OptSpecifier.h b/contrib/llvm/include/llvm/Option/OptSpecifier.h
index 0b2aaae..f9b121e 100644
--- a/contrib/llvm/include/llvm/Option/OptSpecifier.h
+++ b/contrib/llvm/include/llvm/Option/OptSpecifier.h
@@ -35,7 +35,7 @@ namespace opt {
     bool operator==(OptSpecifier Opt) const { return ID == Opt.getID(); }
     bool operator!=(OptSpecifier Opt) const { return !(*this == Opt); }
   };
-}
-}
+} // namespace opt
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Pass.h b/contrib/llvm/include/llvm/Pass.h
index 3c4d838..ccd6f27 100644
--- a/contrib/llvm/include/llvm/Pass.h
+++ b/contrib/llvm/include/llvm/Pass.h
@@ -369,7 +369,7 @@ protected:
 /// @brief This is the storage for the -time-passes option.
 extern bool TimePassesIsEnabled;
 
-} // End llvm namespace
+} // namespace llvm
 
 // Include support files that contain important APIs commonly used by Passes,
 // but that we want to separate out to make it easier to read the header files.
diff --git a/contrib/llvm/include/llvm/PassAnalysisSupport.h b/contrib/llvm/include/llvm/PassAnalysisSupport.h
index 38adb2d..d356097 100644
--- a/contrib/llvm/include/llvm/PassAnalysisSupport.h
+++ b/contrib/llvm/include/llvm/PassAnalysisSupport.h
@@ -248,6 +248,6 @@ AnalysisType &Pass::getAnalysisID(AnalysisID PI, Function &F) {
   return *(AnalysisType*)ResultPass->getAdjustedAnalysisPointer(PI);
 }
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/PassInfo.h b/contrib/llvm/include/llvm/PassInfo.h
index d107618..6a2f942 100644
--- a/contrib/llvm/include/llvm/PassInfo.h
+++ b/contrib/llvm/include/llvm/PassInfo.h
@@ -142,6 +142,6 @@ private:
   PassInfo(const PassInfo &) = delete;
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/PassRegistry.h b/contrib/llvm/include/llvm/PassRegistry.h
index 8c28ef5..0d2cd24 100644
--- a/contrib/llvm/include/llvm/PassRegistry.h
+++ b/contrib/llvm/include/llvm/PassRegistry.h
@@ -95,6 +95,6 @@ public:
 // Create wrappers for C Binding types (see CBindingWrapping.h).
 DEFINE_STDCXX_CONVERSION_FUNCTIONS(PassRegistry, LLVMPassRegistryRef)
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/PassSupport.h b/contrib/llvm/include/llvm/PassSupport.h
index 6cb6516..af1a195 100644
--- a/contrib/llvm/include/llvm/PassSupport.h
+++ b/contrib/llvm/include/llvm/PassSupport.h
@@ -245,6 +245,6 @@ struct PassRegistrationListener {
 };
 
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Passes/PassBuilder.h b/contrib/llvm/include/llvm/Passes/PassBuilder.h
index 1e605e3..bbf80f8 100644
--- a/contrib/llvm/include/llvm/Passes/PassBuilder.h
+++ b/contrib/llvm/include/llvm/Passes/PassBuilder.h
@@ -100,6 +100,6 @@ private:
                                bool VerifyEachPass, bool DebugLogging);
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/ARMEHABI.h b/contrib/llvm/include/llvm/Support/ARMEHABI.h
index 9b052df..db045a8 100644
--- a/contrib/llvm/include/llvm/Support/ARMEHABI.h
+++ b/contrib/llvm/include/llvm/Support/ARMEHABI.h
@@ -127,8 +127,8 @@ namespace EHABI {
 
     NUM_PERSONALITY_INDEX
   };
-}
-}
-}
+} // namespace EHABI
+} // namespace ARM
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/ARMWinEH.h b/contrib/llvm/include/llvm/Support/ARMWinEH.h
index 1463629..0b37903 100644
--- a/contrib/llvm/include/llvm/Support/ARMWinEH.h
+++ b/contrib/llvm/include/llvm/Support/ARMWinEH.h
@@ -375,8 +375,8 @@ struct ExceptionDataRecord {
 inline size_t HeaderWords(const ExceptionDataRecord &XR) {
   return (XR.Data[0] & 0xff800000) ? 1 : 2;
 }
-}
-}
-}
+} // namespace WinEH
+} // namespace ARM
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/AlignOf.h b/contrib/llvm/include/llvm/Support/AlignOf.h
index 574b514..07da02d 100644
--- a/contrib/llvm/include/llvm/Support/AlignOf.h
+++ b/contrib/llvm/include/llvm/Support/AlignOf.h
@@ -44,9 +44,18 @@ private:
 ///  compile-time constant (e.g., for template instantiation).
 template <typename T>
 struct AlignOf {
+#ifndef _MSC_VER
+  // Avoid warnings from GCC like:
+  //   comparison between 'enum llvm::AlignOf<X>::<anonymous>' and 'enum
+  //   llvm::AlignOf<Y>::<anonymous>' [-Wenum-compare]
+  // by using constexpr instead of enum.
+  // (except on MSVC, since it doesn't support constexpr yet).
+  static constexpr unsigned Alignment =
+      static_cast<unsigned int>(sizeof(AlignmentCalcImpl<T>) - sizeof(T));
+#else
   enum { Alignment =
          static_cast<unsigned int>(sizeof(AlignmentCalcImpl<T>) - sizeof(T)) };
-
+#endif
   enum { Alignment_GreaterEqual_2Bytes = Alignment >= 2 ? 1 : 0 };
   enum { Alignment_GreaterEqual_4Bytes = Alignment >= 4 ? 1 : 0 };
   enum { Alignment_GreaterEqual_8Bytes = Alignment >= 8 ? 1 : 0 };
@@ -58,6 +67,10 @@ struct AlignOf {
   enum { Alignment_LessEqual_16Bytes = Alignment <= 16 ? 1 : 0 };
 };
 
+#ifndef _MSC_VER
+template <typename T> constexpr unsigned AlignOf<T>::Alignment;
+#endif
+
 /// alignOf - A templated function that returns the minimum alignment of
 ///  of a type.  This provides no extra functionality beyond the AlignOf
 ///  class besides some cosmetic cleanliness.  Example usage:
diff --git a/contrib/llvm/include/llvm/Support/ArrayRecycler.h b/contrib/llvm/include/llvm/Support/ArrayRecycler.h
index 36f644a..5907c79 100644
--- a/contrib/llvm/include/llvm/Support/ArrayRecycler.h
+++ b/contrib/llvm/include/llvm/Support/ArrayRecycler.h
@@ -138,6 +138,6 @@ public:
   }
 };
 
-} // end llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/Atomic.h b/contrib/llvm/include/llvm/Support/Atomic.h
index 9ec23e8..a3cec47 100644
--- a/contrib/llvm/include/llvm/Support/Atomic.h
+++ b/contrib/llvm/include/llvm/Support/Atomic.h
@@ -33,7 +33,7 @@ namespace llvm {
     cas_flag AtomicAdd(volatile cas_flag* ptr, cas_flag val);
     cas_flag AtomicMul(volatile cas_flag* ptr, cas_flag val);
     cas_flag AtomicDiv(volatile cas_flag* ptr, cas_flag val);
-  }
-}
+  } // namespace sys
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/BlockFrequency.h b/contrib/llvm/include/llvm/Support/BlockFrequency.h
index 4304a25..20b2782 100644
--- a/contrib/llvm/include/llvm/Support/BlockFrequency.h
+++ b/contrib/llvm/include/llvm/Support/BlockFrequency.h
@@ -69,6 +69,6 @@ public:
   }
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/BranchProbability.h b/contrib/llvm/include/llvm/Support/BranchProbability.h
index a6429dd..df89d2d 100644
--- a/contrib/llvm/include/llvm/Support/BranchProbability.h
+++ b/contrib/llvm/include/llvm/Support/BranchProbability.h
@@ -84,6 +84,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const BranchProbability &Prob) {
   return Prob.print(OS);
 }
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/COM.h b/contrib/llvm/include/llvm/Support/COM.h
index a2d5a7a..45559b0 100644
--- a/contrib/llvm/include/llvm/Support/COM.h
+++ b/contrib/llvm/include/llvm/Support/COM.h
@@ -30,7 +30,7 @@ private:
   InitializeCOMRAII(const InitializeCOMRAII &) = delete;
   void operator=(const InitializeCOMRAII &) = delete;
 };
-}
-}
+} // namespace sys
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/Casting.h b/contrib/llvm/include/llvm/Support/Casting.h
index 6ba5efa..e84676a 100644
--- a/contrib/llvm/include/llvm/Support/Casting.h
+++ b/contrib/llvm/include/llvm/Support/Casting.h
@@ -321,6 +321,6 @@ dyn_cast_or_null(Y *Val) {
   return (Val && isa<X>(Val)) ? cast<X>(Val) : nullptr;
 }
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/CodeGen.h b/contrib/llvm/include/llvm/Support/CodeGen.h
index 243f2dd..1eca568 100644
--- a/contrib/llvm/include/llvm/Support/CodeGen.h
+++ b/contrib/llvm/include/llvm/Support/CodeGen.h
@@ -90,6 +90,6 @@ namespace llvm {
     }
     llvm_unreachable("Bad CodeModel!");
   }
-}  // end llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/CommandLine.h b/contrib/llvm/include/llvm/Support/CommandLine.h
index 1ad8a3b..ed80921 100644
--- a/contrib/llvm/include/llvm/Support/CommandLine.h
+++ b/contrib/llvm/include/llvm/Support/CommandLine.h
@@ -33,6 +33,9 @@
 
 namespace llvm {
 
+class BumpPtrStringSaver;
+class StringSaver;
+
 /// cl Namespace - This namespace contains all of the command line option
 /// processing machinery.  It is intentionally a short name to make qualified
 /// usage concise.
@@ -1676,16 +1679,6 @@ StringMap<Option *> &getRegisteredOptions();
 // Standalone command line processing utilities.
 //
 
-/// \brief Saves strings in the inheritor's stable storage and returns a stable
-/// raw character pointer.
-class StringSaver {
-  virtual void anchor();
-
-public:
-  virtual const char *SaveString(const char *Str) = 0;
-  virtual ~StringSaver(){}; // Pacify -Wnon-virtual-dtor.
-};
-
 /// \brief Tokenizes a command line that can contain escapes and quotes.
 //
 /// The quoting rules match those used by GCC and other tools that use
diff --git a/contrib/llvm/include/llvm/Support/CrashRecoveryContext.h b/contrib/llvm/include/llvm/Support/CrashRecoveryContext.h
index c08c3c1..13aff7a 100644
--- a/contrib/llvm/include/llvm/Support/CrashRecoveryContext.h
+++ b/contrib/llvm/include/llvm/Support/CrashRecoveryContext.h
@@ -199,6 +199,6 @@ public:
     cleanup = 0;
   }
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/DOTGraphTraits.h b/contrib/llvm/include/llvm/Support/DOTGraphTraits.h
index 95e37c0..3d21129 100644
--- a/contrib/llvm/include/llvm/Support/DOTGraphTraits.h
+++ b/contrib/llvm/include/llvm/Support/DOTGraphTraits.h
@@ -161,6 +161,6 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits {
   DOTGraphTraits (bool simple=false) : DefaultDOTGraphTraits (simple) {}
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/DataStream.h b/contrib/llvm/include/llvm/Support/DataStream.h
index 8bc4133..9a4daec 100644
--- a/contrib/llvm/include/llvm/Support/DataStream.h
+++ b/contrib/llvm/include/llvm/Support/DataStream.h
@@ -17,6 +17,7 @@
 #ifndef LLVM_SUPPORT_DATASTREAM_H
 #define LLVM_SUPPORT_DATASTREAM_H
 
+#include <memory>
 #include <string>
 
 namespace llvm {
@@ -30,9 +31,8 @@ public:
   virtual ~DataStreamer();
 };
 
-DataStreamer *getDataFileStreamer(const std::string &Filename,
-                                  std::string *Err);
-
-}
+std::unique_ptr<DataStreamer> getDataFileStreamer(const std::string &Filename,
+                                                  std::string *Err);
+} // namespace llvm
 
 #endif  // LLVM_SUPPORT_DATASTREAM_H_
diff --git a/contrib/llvm/include/llvm/Support/Debug.h b/contrib/llvm/include/llvm/Support/Debug.h
index fff4f98..2f3fe77 100644
--- a/contrib/llvm/include/llvm/Support/Debug.h
+++ b/contrib/llvm/include/llvm/Support/Debug.h
@@ -91,6 +91,6 @@ raw_ostream &dbgs();
 //
 #define DEBUG(X) DEBUG_WITH_TYPE(DEBUG_TYPE, X)
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/DynamicLibrary.h b/contrib/llvm/include/llvm/Support/DynamicLibrary.h
index a7d2221..d6ff904 100644
--- a/contrib/llvm/include/llvm/Support/DynamicLibrary.h
+++ b/contrib/llvm/include/llvm/Support/DynamicLibrary.h
@@ -99,7 +99,7 @@ namespace sys {
     static void AddSymbol(StringRef symbolName, void *symbolValue);
   };
 
-} // End sys namespace
-} // End llvm namespace
+} // namespace sys
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/ELF.h b/contrib/llvm/include/llvm/Support/ELF.h
index 8b915ff..e23fcbb 100644
--- a/contrib/llvm/include/llvm/Support/ELF.h
+++ b/contrib/llvm/include/llvm/Support/ELF.h
@@ -133,7 +133,7 @@ enum {
   EM_386           = 3, // Intel 386
   EM_68K           = 4, // Motorola 68000
   EM_88K           = 5, // Motorola 88000
-  EM_486           = 6, // Intel 486 (deprecated)
+  EM_IAMCU         = 6, // Intel MCU
   EM_860           = 7, // Intel 80860
   EM_MIPS          = 8, // MIPS R3000
   EM_S370          = 9, // IBM System/370
diff --git a/contrib/llvm/include/llvm/Support/ELFRelocs/Sparc.def b/contrib/llvm/include/llvm/Support/ELFRelocs/Sparc.def
index d6772ea..7e01a4a 100644
--- a/contrib/llvm/include/llvm/Support/ELFRelocs/Sparc.def
+++ b/contrib/llvm/include/llvm/Support/ELFRelocs/Sparc.def
@@ -83,7 +83,7 @@ ELF_RELOC(R_SPARC_TLS_DTPOFF64,   77)
 ELF_RELOC(R_SPARC_TLS_TPOFF32,    78)
 ELF_RELOC(R_SPARC_TLS_TPOFF64,    79)
 ELF_RELOC(R_SPARC_GOTDATA_HIX22,  80)
-ELF_RELOC(R_SPARC_GOTDATA_LOX22,  81)
+ELF_RELOC(R_SPARC_GOTDATA_LOX10,  81)
 ELF_RELOC(R_SPARC_GOTDATA_OP_HIX22,  82)
-ELF_RELOC(R_SPARC_GOTDATA_OP_LOX22,  83)
+ELF_RELOC(R_SPARC_GOTDATA_OP_LOX10,  83)
 ELF_RELOC(R_SPARC_GOTDATA_OP,     84)
diff --git a/contrib/llvm/include/llvm/Support/Endian.h b/contrib/llvm/include/llvm/Support/Endian.h
index e9fe22e..fd59009 100644
--- a/contrib/llvm/include/llvm/Support/Endian.h
+++ b/contrib/llvm/include/llvm/Support/Endian.h
@@ -104,6 +104,16 @@ struct packed_endian_specific_integral {
     return *this;
   }
 
+  packed_endian_specific_integral &operator|=(value_type newValue) {
+    *this = *this | newValue;
+    return *this;
+  }
+
+  packed_endian_specific_integral &operator&=(value_type newValue) {
+    *this = *this & newValue;
+    return *this;
+  }
+
 private:
   AlignedCharArray<PickAlignment<value_type, alignment>::value,
                    sizeof(value_type)> Value;
diff --git a/contrib/llvm/include/llvm/Support/Errc.h b/contrib/llvm/include/llvm/Support/Errc.h
index 80bfe2a..7efca02 100644
--- a/contrib/llvm/include/llvm/Support/Errc.h
+++ b/contrib/llvm/include/llvm/Support/Errc.h
@@ -78,7 +78,7 @@ enum class errc {
 inline std::error_code make_error_code(errc E) {
   return std::error_code(static_cast<int>(E), std::generic_category());
 }
-}
+} // namespace llvm
 
 namespace std {
 template <> struct is_error_code_enum<llvm::errc> : std::true_type {};
diff --git a/contrib/llvm/include/llvm/Support/ErrorHandling.h b/contrib/llvm/include/llvm/Support/ErrorHandling.h
index 9afd52d..427d8ea 100644
--- a/contrib/llvm/include/llvm/Support/ErrorHandling.h
+++ b/contrib/llvm/include/llvm/Support/ErrorHandling.h
@@ -84,7 +84,7 @@ namespace llvm {
   LLVM_ATTRIBUTE_NORETURN void
   llvm_unreachable_internal(const char *msg=nullptr, const char *file=nullptr,
                             unsigned line=0);
-}
+} // namespace llvm
 
 /// Marks that the current location is not supposed to be reachable.
 /// In !NDEBUG builds, prints the message and location info to stderr.
diff --git a/contrib/llvm/include/llvm/Support/FileSystem.h b/contrib/llvm/include/llvm/Support/FileSystem.h
index a736c32..5a857e4 100644
--- a/contrib/llvm/include/llvm/Support/FileSystem.h
+++ b/contrib/llvm/include/llvm/Support/FileSystem.h
@@ -724,7 +724,7 @@ namespace detail {
     intptr_t IterationHandle;
     directory_entry CurrentEntry;
   };
-}
+} // namespace detail
 
 /// directory_iterator - Iterates through the entries in path. There is no
 /// operator++ because we need an error_code. If it's really needed we can make
@@ -786,7 +786,7 @@ namespace detail {
     uint16_t Level;
     bool HasNoPushRequest;
   };
-}
+} // namespace detail
 
 /// recursive_directory_iterator - Same as directory_iterator except for it
 /// recurses down into child directories.
diff --git a/contrib/llvm/include/llvm/Support/FileUtilities.h b/contrib/llvm/include/llvm/Support/FileUtilities.h
index 2ee2c60..8a790de 100644
--- a/contrib/llvm/include/llvm/Support/FileUtilities.h
+++ b/contrib/llvm/include/llvm/Support/FileUtilities.h
@@ -73,6 +73,6 @@ namespace llvm {
     /// will not be removed when the object is destroyed.
     void releaseFile() { DeleteIt = false; }
   };
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/FormattedStream.h b/contrib/llvm/include/llvm/Support/FormattedStream.h
index 4a135cd..145d898 100644
--- a/contrib/llvm/include/llvm/Support/FormattedStream.h
+++ b/contrib/llvm/include/llvm/Support/FormattedStream.h
@@ -156,7 +156,7 @@ formatted_raw_ostream &ferrs();
 /// debug output.  Use it like: fdbgs() << "foo" << "bar";
 formatted_raw_ostream &fdbgs();
 
-} // end llvm namespace
+} // namespace llvm
 
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/GCOV.h b/contrib/llvm/include/llvm/Support/GCOV.h
index c2e34bd..138b9db 100644
--- a/contrib/llvm/include/llvm/Support/GCOV.h
+++ b/contrib/llvm/include/llvm/Support/GCOV.h
@@ -435,6 +435,6 @@ private:
   FileCoverageList FileCoverages;
   FuncCoverageMap FuncCoverages;
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/GenericDomTree.h b/contrib/llvm/include/llvm/Support/GenericDomTree.h
index 63678bb..cd59f82 100644
--- a/contrib/llvm/include/llvm/Support/GenericDomTree.h
+++ b/contrib/llvm/include/llvm/Support/GenericDomTree.h
@@ -772,6 +772,6 @@ bool DominatorTreeBase<NodeT>::properlyDominates(const NodeT *A,
                    getNode(const_cast<NodeT *>(B)));
 }
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/GenericDomTreeConstruction.h b/contrib/llvm/include/llvm/Support/GenericDomTreeConstruction.h
index 7c065f9..76e3cc8 100644
--- a/contrib/llvm/include/llvm/Support/GenericDomTreeConstruction.h
+++ b/contrib/llvm/include/llvm/Support/GenericDomTreeConstruction.h
@@ -288,6 +288,6 @@ void Calculate(DominatorTreeBase<typename GraphTraits<NodeT>::NodeType>& DT,
   DT.updateDFSNumbers();
 }
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/GraphWriter.h b/contrib/llvm/include/llvm/Support/GraphWriter.h
index 7d1c273..04b4084 100644
--- a/contrib/llvm/include/llvm/Support/GraphWriter.h
+++ b/contrib/llvm/include/llvm/Support/GraphWriter.h
@@ -356,6 +356,6 @@ void ViewGraph(const GraphType &G, const Twine &Name,
   DisplayGraph(Filename, true, Program);
 }
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/Host.h b/contrib/llvm/include/llvm/Support/Host.h
index 8f4bf3c..f2519df 100644
--- a/contrib/llvm/include/llvm/Support/Host.h
+++ b/contrib/llvm/include/llvm/Support/Host.h
@@ -68,7 +68,7 @@ namespace sys {
   ///
   /// \return - True on success.
   bool getHostCPUFeatures(StringMap<bool> &Features);
-}
-}
+} // namespace sys
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/LineIterator.h b/contrib/llvm/include/llvm/Support/LineIterator.h
index 9d4cd3b..d0f7d30 100644
--- a/contrib/llvm/include/llvm/Support/LineIterator.h
+++ b/contrib/llvm/include/llvm/Support/LineIterator.h
@@ -83,6 +83,6 @@ private:
   /// \brief Advance the iterator to the next line.
   void advance();
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/MD5.h b/contrib/llvm/include/llvm/Support/MD5.h
index f6e1e92..8658c8e 100644
--- a/contrib/llvm/include/llvm/Support/MD5.h
+++ b/contrib/llvm/include/llvm/Support/MD5.h
@@ -65,6 +65,6 @@ private:
   const uint8_t *body(ArrayRef<uint8_t> Data);
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/ManagedStatic.h b/contrib/llvm/include/llvm/Support/ManagedStatic.h
index addd34e..1187e05 100644
--- a/contrib/llvm/include/llvm/Support/ManagedStatic.h
+++ b/contrib/llvm/include/llvm/Support/ManagedStatic.h
@@ -106,6 +106,6 @@ struct llvm_shutdown_obj {
   ~llvm_shutdown_obj() { llvm_shutdown(); }
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/MathExtras.h b/contrib/llvm/include/llvm/Support/MathExtras.h
index 2cf7e0e..7c63aaa 100644
--- a/contrib/llvm/include/llvm/Support/MathExtras.h
+++ b/contrib/llvm/include/llvm/Support/MathExtras.h
@@ -642,6 +642,6 @@ inline int64_t SignExtend64(uint64_t X, unsigned B) {
 }
 
 extern const float huge_valf;
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/Memory.h b/contrib/llvm/include/llvm/Support/Memory.h
index b4305cb..6abb17a 100644
--- a/contrib/llvm/include/llvm/Support/Memory.h
+++ b/contrib/llvm/include/llvm/Support/Memory.h
@@ -155,7 +155,7 @@ namespace sys {
     /// as writable.
     static bool setRangeWritable(const void *Addr, size_t Size);
   };
-}
-}
+} // namespace sys
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/MemoryBuffer.h b/contrib/llvm/include/llvm/Support/MemoryBuffer.h
index 35a7bdb..81616d8 100644
--- a/contrib/llvm/include/llvm/Support/MemoryBuffer.h
+++ b/contrib/llvm/include/llvm/Support/MemoryBuffer.h
@@ -124,7 +124,7 @@ public:
   static ErrorOr<std::unique_ptr<MemoryBuffer>>
   getFileOrSTDIN(const Twine &Filename, int64_t FileSize = -1);
 
-  /// Map a subrange of the the specified file as a MemoryBuffer.
+  /// Map a subrange of the specified file as a MemoryBuffer.
   static ErrorOr<std::unique_ptr<MemoryBuffer>>
   getFileSlice(const Twine &Filename, uint64_t MapSize, uint64_t Offset);
 
diff --git a/contrib/llvm/include/llvm/Support/MemoryObject.h b/contrib/llvm/include/llvm/Support/MemoryObject.h
index e0c8749..deff6c1 100644
--- a/contrib/llvm/include/llvm/Support/MemoryObject.h
+++ b/contrib/llvm/include/llvm/Support/MemoryObject.h
@@ -63,6 +63,6 @@ public:
   virtual bool isValidAddress(uint64_t address) const = 0;
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/MipsABIFlags.h b/contrib/llvm/include/llvm/Support/MipsABIFlags.h
index 93f6b41..8740823 100644
--- a/contrib/llvm/include/llvm/Support/MipsABIFlags.h
+++ b/contrib/llvm/include/llvm/Support/MipsABIFlags.h
@@ -96,7 +96,7 @@ enum Val_GNU_MIPS_ABI_MSA {
   Val_GNU_MIPS_ABI_MSA_ANY = 0, // not tagged
   Val_GNU_MIPS_ABI_MSA_128 = 1  // 128-bit MSA
 };
-}
-}
+} // namespace Mips
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/Mutex.h b/contrib/llvm/include/llvm/Support/Mutex.h
index 0f4e61a..47f0ab6 100644
--- a/contrib/llvm/include/llvm/Support/Mutex.h
+++ b/contrib/llvm/include/llvm/Support/Mutex.h
@@ -152,7 +152,7 @@ namespace llvm
     };
 
     typedef SmartScopedLock<false> ScopedLock;
-  }
-}
+  } // namespace sys
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/MutexGuard.h b/contrib/llvm/include/llvm/Support/MutexGuard.h
index 07b64b6..ea58617 100644
--- a/contrib/llvm/include/llvm/Support/MutexGuard.h
+++ b/contrib/llvm/include/llvm/Support/MutexGuard.h
@@ -36,6 +36,6 @@ namespace llvm {
     /// is held.
     bool holds(const sys::Mutex& lock) const { return &M == &lock; }
   };
-}
+} // namespace llvm
 
 #endif // LLVM_SUPPORT_MUTEXGUARD_H
diff --git a/contrib/llvm/include/llvm/Support/PluginLoader.h b/contrib/llvm/include/llvm/Support/PluginLoader.h
index bdbb134..da4324e 100644
--- a/contrib/llvm/include/llvm/Support/PluginLoader.h
+++ b/contrib/llvm/include/llvm/Support/PluginLoader.h
@@ -32,6 +32,6 @@ namespace llvm {
     LoadOpt("load", cl::ZeroOrMore, cl::value_desc("pluginfilename"),
             cl::desc("Load the specified plugin"));
 #endif
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/Process.h b/contrib/llvm/include/llvm/Support/Process.h
index cfdd06c..089894c 100644
--- a/contrib/llvm/include/llvm/Support/Process.h
+++ b/contrib/llvm/include/llvm/Support/Process.h
@@ -184,7 +184,7 @@ public:
   static unsigned GetRandomNumber();
 };
 
-}
-}
+} // namespace sys
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/Program.h b/contrib/llvm/include/llvm/Support/Program.h
index b89a0f7..5f1bc12 100644
--- a/contrib/llvm/include/llvm/Support/Program.h
+++ b/contrib/llvm/include/llvm/Support/Program.h
@@ -187,7 +187,7 @@ struct ProcessInfo {
       ///< string is non-empty upon return an error occurred while invoking the
       ///< program.
       );
-  }
-}
+  } // namespace sys
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/RWMutex.h b/contrib/llvm/include/llvm/Support/RWMutex.h
index 4be9313..5299708 100644
--- a/contrib/llvm/include/llvm/Support/RWMutex.h
+++ b/contrib/llvm/include/llvm/Support/RWMutex.h
@@ -171,7 +171,7 @@ namespace llvm
       }
     };
     typedef SmartScopedWriter<false> ScopedWriter;
-  }
-}
+  } // namespace sys
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/RandomNumberGenerator.h b/contrib/llvm/include/llvm/Support/RandomNumberGenerator.h
index 7446558..316778b 100644
--- a/contrib/llvm/include/llvm/Support/RandomNumberGenerator.h
+++ b/contrib/llvm/include/llvm/Support/RandomNumberGenerator.h
@@ -53,6 +53,6 @@ private:
 
   friend class Module;
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/Recycler.h b/contrib/llvm/include/llvm/Support/Recycler.h
index e97f36a..a909b9d 100644
--- a/contrib/llvm/include/llvm/Support/Recycler.h
+++ b/contrib/llvm/include/llvm/Support/Recycler.h
@@ -123,6 +123,6 @@ public:
   }
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/RecyclingAllocator.h b/contrib/llvm/include/llvm/Support/RecyclingAllocator.h
index 001d1cf..fded4ed 100644
--- a/contrib/llvm/include/llvm/Support/RecyclingAllocator.h
+++ b/contrib/llvm/include/llvm/Support/RecyclingAllocator.h
@@ -57,7 +57,7 @@ public:
   }
 };
 
-}
+} // namespace llvm
 
 template<class AllocatorType, class T, size_t Size, size_t Align>
 inline void *operator new(size_t size,
diff --git a/contrib/llvm/include/llvm/Support/Regex.h b/contrib/llvm/include/llvm/Support/Regex.h
index 31b35ed..15f20a6 100644
--- a/contrib/llvm/include/llvm/Support/Regex.h
+++ b/contrib/llvm/include/llvm/Support/Regex.h
@@ -100,6 +100,6 @@ namespace llvm {
     struct llvm_regex *preg;
     int error;
   };
-}
+} // namespace llvm
 
 #endif // LLVM_SUPPORT_REGEX_H
diff --git a/contrib/llvm/include/llvm/Support/Registry.h b/contrib/llvm/include/llvm/Support/Registry.h
index 95c4e96..7eb1090 100644
--- a/contrib/llvm/include/llvm/Support/Registry.h
+++ b/contrib/llvm/include/llvm/Support/Registry.h
@@ -228,6 +228,6 @@ namespace llvm {
   template <typename T, typename U>
   typename Registry<T,U>::listener *Registry<T,U>::ListenerTail;
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/Signals.h b/contrib/llvm/include/llvm/Support/Signals.h
index 7e165d7..0cb421b 100644
--- a/contrib/llvm/include/llvm/Support/Signals.h
+++ b/contrib/llvm/include/llvm/Support/Signals.h
@@ -62,7 +62,7 @@ namespace sys {
   /// different thread on some platforms.
   /// @brief Register a function to be called when ctrl-c is pressed.
   void SetInterruptFunction(void (*IF)());
-} // End sys namespace
-} // End llvm namespace
+} // namespace sys
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/SourceMgr.h b/contrib/llvm/include/llvm/Support/SourceMgr.h
index d492748..5eef9a0 100644
--- a/contrib/llvm/include/llvm/Support/SourceMgr.h
+++ b/contrib/llvm/include/llvm/Support/SourceMgr.h
@@ -276,10 +276,10 @@ public:
     return FixIts;
   }
 
-  void print(const char *ProgName, raw_ostream &S,
-             bool ShowColors = true) const;
+  void print(const char *ProgName, raw_ostream &S, bool ShowColors = true,
+             bool ShowKindLabel = true) const;
 };
 
-}  // end llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/StreamingMemoryObject.h b/contrib/llvm/include/llvm/Support/StreamingMemoryObject.h
index 932e635..fe0cc7e 100644
--- a/contrib/llvm/include/llvm/Support/StreamingMemoryObject.h
+++ b/contrib/llvm/include/llvm/Support/StreamingMemoryObject.h
@@ -24,7 +24,7 @@ namespace llvm {
 /// setKnownObjectSize methods which are not applicable to non-streamed objects.
 class StreamingMemoryObject : public MemoryObject {
 public:
-  StreamingMemoryObject(DataStreamer *streamer);
+  StreamingMemoryObject(std::unique_ptr<DataStreamer> Streamer);
   uint64_t getExtent() const override;
   uint64_t readBytes(uint8_t *Buf, uint64_t Size,
                      uint64_t Address) const override;
@@ -89,5 +89,5 @@ private:
 MemoryObject *getNonStreamedMemoryObject(
     const unsigned char *Start, const unsigned char *End);
 
-}
+} // namespace llvm
 #endif  // STREAMINGMEMORYOBJECT_H_
diff --git a/contrib/llvm/include/llvm/Support/StringPool.h b/contrib/llvm/include/llvm/Support/StringPool.h
index 2ec0c3b..3aa826b 100644
--- a/contrib/llvm/include/llvm/Support/StringPool.h
+++ b/contrib/llvm/include/llvm/Support/StringPool.h
@@ -133,6 +133,6 @@ namespace llvm {
     inline bool operator!=(const PooledStringPtr &That) const { return S != That.S; }
   };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/StringSaver.h b/contrib/llvm/include/llvm/Support/StringSaver.h
new file mode 100644
index 0000000..c7a2e8f
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/StringSaver.h
@@ -0,0 +1,42 @@
+//===- llvm/Support/StringSaver.h -------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_STRINGSAVER_H
+#define LLVM_SUPPORT_STRINGSAVER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Allocator.h"
+
+namespace llvm {
+
+/// \brief Saves strings in the inheritor's stable storage and returns a stable
+/// raw character pointer.
+class StringSaver {
+protected:
+  ~StringSaver() {}
+  virtual const char *saveImpl(StringRef S);
+
+public:
+  StringSaver(BumpPtrAllocator &Alloc) : Alloc(Alloc) {}
+  const char *save(const char *S) { return save(StringRef(S)); }
+  const char *save(StringRef S) { return saveImpl(S); }
+  const char *save(const Twine &S) { return save(StringRef(S.str())); }
+  const char *save(std::string &S) { return save(StringRef(S)); }
+
+private:
+  BumpPtrAllocator &Alloc;
+};
+
+class BumpPtrStringSaver final : public StringSaver {
+public:
+  BumpPtrStringSaver(BumpPtrAllocator &Alloc) : StringSaver(Alloc) {}
+};
+} // namespace llvm
+#endif
diff --git a/contrib/llvm/include/llvm/Support/SystemUtils.h b/contrib/llvm/include/llvm/Support/SystemUtils.h
index 2997b1b..f8c5dc8 100644
--- a/contrib/llvm/include/llvm/Support/SystemUtils.h
+++ b/contrib/llvm/include/llvm/Support/SystemUtils.h
@@ -27,6 +27,6 @@ bool CheckBitcodeOutputToConsole(
   bool print_warning = true     ///< Control whether warnings are printed
 );
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/TargetRegistry.h b/contrib/llvm/include/llvm/Support/TargetRegistry.h
index 837fc66..1c11ef3 100644
--- a/contrib/llvm/include/llvm/Support/TargetRegistry.h
+++ b/contrib/llvm/include/llvm/Support/TargetRegistry.h
@@ -69,7 +69,7 @@ MCStreamer *createMachOStreamer(MCContext &Ctx, MCAsmBackend &TAB,
                                 bool RelaxAll, bool DWARFMustBeAtTheEnd,
                                 bool LabelSections = false);
 
-MCRelocationInfo *createMCRelocationInfo(StringRef TT, MCContext &Ctx);
+MCRelocationInfo *createMCRelocationInfo(const Triple &TT, MCContext &Ctx);
 
 MCSymbolizer *createMCSymbolizer(StringRef TT, LLVMOpInfoCallback GetOpInfo,
                                  LLVMSymbolLookupCallback SymbolLookUp,
@@ -98,7 +98,7 @@ public:
   typedef MCInstrInfo *(*MCInstrInfoCtorFnTy)(void);
   typedef MCInstrAnalysis *(*MCInstrAnalysisCtorFnTy)(const MCInstrInfo *Info);
   typedef MCRegisterInfo *(*MCRegInfoCtorFnTy)(StringRef TT);
-  typedef MCSubtargetInfo *(*MCSubtargetInfoCtorFnTy)(StringRef TT,
+  typedef MCSubtargetInfo *(*MCSubtargetInfoCtorFnTy)(const Triple &TT,
                                                       StringRef CPU,
                                                       StringRef Features);
   typedef TargetMachine *(*TargetMachineCtorTy)(
@@ -112,7 +112,7 @@ public:
       TargetMachine &TM, std::unique_ptr<MCStreamer> &&Streamer);
   typedef MCAsmBackend *(*MCAsmBackendCtorTy)(const Target &T,
                                               const MCRegisterInfo &MRI,
-                                              StringRef TT, StringRef CPU);
+                                              const Triple &TT, StringRef CPU);
   typedef MCTargetAsmParser *(*MCAsmParserCtorTy)(
       MCSubtargetInfo &STI, MCAsmParser &P, const MCInstrInfo &MII,
       const MCTargetOptions &Options);
@@ -147,7 +147,7 @@ public:
       bool IsVerboseAsm);
   typedef MCTargetStreamer *(*ObjectTargetStreamerCtorTy)(
       MCStreamer &S, const MCSubtargetInfo &STI);
-  typedef MCRelocationInfo *(*MCRelocationInfoCtorTy)(StringRef TT,
+  typedef MCRelocationInfo *(*MCRelocationInfoCtorTy)(const Triple &TT,
                                                       MCContext &Ctx);
   typedef MCSymbolizer *(*MCSymbolizerCtorTy)(
       StringRef TT, LLVMOpInfoCallback GetOpInfo,
@@ -334,18 +334,18 @@ public:
 
   /// createMCSubtargetInfo - Create a MCSubtargetInfo implementation.
   ///
-  /// \param Triple This argument is used to determine the target machine
+  /// \param TheTriple This argument is used to determine the target machine
   /// feature set; it should always be provided. Generally this should be
   /// either the target triple from the module, or the target triple of the
   /// host if that does not exist.
   /// \param CPU This specifies the name of the target CPU.
   /// \param Features This specifies the string representation of the
   /// additional target features.
-  MCSubtargetInfo *createMCSubtargetInfo(StringRef Triple, StringRef CPU,
+  MCSubtargetInfo *createMCSubtargetInfo(StringRef TheTriple, StringRef CPU,
                                          StringRef Features) const {
     if (!MCSubtargetInfoCtorFn)
       return nullptr;
-    return MCSubtargetInfoCtorFn(Triple, CPU, Features);
+    return MCSubtargetInfoCtorFn(Triple(TheTriple), CPU, Features);
   }
 
   /// createTargetMachine - Create a target specific machine implementation
@@ -369,12 +369,12 @@ public:
 
   /// createMCAsmBackend - Create a target specific assembly parser.
   ///
-  /// \param Triple The target triple string.
-  MCAsmBackend *createMCAsmBackend(const MCRegisterInfo &MRI, StringRef Triple,
-                                   StringRef CPU) const {
+  /// \param TheTriple The target triple string.
+  MCAsmBackend *createMCAsmBackend(const MCRegisterInfo &MRI,
+                                   StringRef TheTriple, StringRef CPU) const {
     if (!MCAsmBackendCtorFn)
       return nullptr;
-    return MCAsmBackendCtorFn(*this, MRI, Triple, CPU);
+    return MCAsmBackendCtorFn(*this, MRI, Triple(TheTriple), CPU);
   }
 
   /// createMCAsmParser - Create a target specific assembly parser.
@@ -507,7 +507,7 @@ public:
     MCRelocationInfoCtorTy Fn = MCRelocationInfoCtorFn
                                     ? MCRelocationInfoCtorFn
                                     : llvm::createMCRelocationInfo;
-    return Fn(TT, Ctx);
+    return Fn(Triple(TT), Ctx);
   }
 
   /// createMCSymbolizer - Create a target specific MCSymbolizer.
@@ -1056,7 +1056,7 @@ template <class MCSubtargetInfoImpl> struct RegisterMCSubtargetInfo {
   }
 
 private:
-  static MCSubtargetInfo *Allocator(StringRef /*TT*/, StringRef /*CPU*/,
+  static MCSubtargetInfo *Allocator(const Triple & /*TT*/, StringRef /*CPU*/,
                                     StringRef /*FS*/) {
     return new MCSubtargetInfoImpl();
   }
@@ -1094,7 +1094,7 @@ private:
                                   StringRef FS, const TargetOptions &Options,
                                   Reloc::Model RM, CodeModel::Model CM,
                                   CodeGenOpt::Level OL) {
-    return new TargetMachineImpl(T, TT, CPU, FS, Options, RM, CM, OL);
+    return new TargetMachineImpl(T, Triple(TT), CPU, FS, Options, RM, CM, OL);
   }
 };
 
@@ -1112,8 +1112,8 @@ template <class MCAsmBackendImpl> struct RegisterMCAsmBackend {
 
 private:
   static MCAsmBackend *Allocator(const Target &T, const MCRegisterInfo &MRI,
-                                 StringRef Triple, StringRef CPU) {
-    return new MCAsmBackendImpl(T, MRI, Triple, CPU);
+                                 const Triple &TheTriple, StringRef CPU) {
+    return new MCAsmBackendImpl(T, MRI, TheTriple, CPU);
   }
 };
 
@@ -1178,6 +1178,6 @@ private:
     return new MCCodeEmitterImpl();
   }
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/TargetSelect.h b/contrib/llvm/include/llvm/Support/TargetSelect.h
index a86e953..96ecf0b 100644
--- a/contrib/llvm/include/llvm/Support/TargetSelect.h
+++ b/contrib/llvm/include/llvm/Support/TargetSelect.h
@@ -161,6 +161,6 @@ namespace llvm {
 #endif
   }  
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/ThreadLocal.h b/contrib/llvm/include/llvm/Support/ThreadLocal.h
index 427a67e..db61f5c 100644
--- a/contrib/llvm/include/llvm/Support/ThreadLocal.h
+++ b/contrib/llvm/include/llvm/Support/ThreadLocal.h
@@ -57,7 +57,7 @@ namespace llvm {
       // erase - Removes the pointer associated with the current thread.
       void erase() { removeInstance(); }
     };
-  }
-}
+  } // namespace sys
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/Threading.h b/contrib/llvm/include/llvm/Support/Threading.h
index 3cca1d6..365fb9e 100644
--- a/contrib/llvm/include/llvm/Support/Threading.h
+++ b/contrib/llvm/include/llvm/Support/Threading.h
@@ -34,6 +34,6 @@ namespace llvm {
   /// the thread stack.
   void llvm_execute_on_thread(void (*UserFn)(void*), void *UserData,
                               unsigned RequestedStackSize = 0);
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/TimeValue.h b/contrib/llvm/include/llvm/Support/TimeValue.h
index 6bca58b..a9efb1b 100644
--- a/contrib/llvm/include/llvm/Support/TimeValue.h
+++ b/contrib/llvm/include/llvm/Support/TimeValue.h
@@ -380,7 +380,7 @@ inline TimeValue operator - (const TimeValue &tv1, const TimeValue &tv2) {
   return difference;
 }
 
-}
-}
+} // namespace sys
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/Timer.h b/contrib/llvm/include/llvm/Support/Timer.h
index 2cd30e2..56fbccc 100644
--- a/contrib/llvm/include/llvm/Support/Timer.h
+++ b/contrib/llvm/include/llvm/Support/Timer.h
@@ -184,6 +184,6 @@ private:
   void PrintQueuedTimers(raw_ostream &OS);
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/ToolOutputFile.h b/contrib/llvm/include/llvm/Support/ToolOutputFile.h
index 1be26c2..e7a6545 100644
--- a/contrib/llvm/include/llvm/Support/ToolOutputFile.h
+++ b/contrib/llvm/include/llvm/Support/ToolOutputFile.h
@@ -58,6 +58,6 @@ public:
   void keep() { Installer.Keep = true; }
 };
 
-} // end llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/UniqueLock.h b/contrib/llvm/include/llvm/Support/UniqueLock.h
index 529284d..c5f37a7 100644
--- a/contrib/llvm/include/llvm/Support/UniqueLock.h
+++ b/contrib/llvm/include/llvm/Support/UniqueLock.h
@@ -62,6 +62,6 @@ namespace llvm {
 
     bool owns_lock() { return locked; }
   };
-}
+} // namespace llvm
 
 #endif // LLVM_SUPPORT_UNIQUE_LOCK_H
diff --git a/contrib/llvm/include/llvm/Support/Valgrind.h b/contrib/llvm/include/llvm/Support/Valgrind.h
index cebf75c..7eabca9 100644
--- a/contrib/llvm/include/llvm/Support/Valgrind.h
+++ b/contrib/llvm/include/llvm/Support/Valgrind.h
@@ -67,7 +67,7 @@ namespace sys {
   #define TsanIgnoreWritesBegin()
   #define TsanIgnoreWritesEnd()
 #endif
-}
-}
+} // namespace sys
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/Watchdog.h b/contrib/llvm/include/llvm/Support/Watchdog.h
index 01e1d92..5642ae2 100644
--- a/contrib/llvm/include/llvm/Support/Watchdog.h
+++ b/contrib/llvm/include/llvm/Support/Watchdog.h
@@ -32,7 +32,7 @@ namespace llvm {
       Watchdog(const Watchdog &other) = delete;
       Watchdog &operator=(const Watchdog &other) = delete;
     };
-  }
-}
+  } // namespace sys
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/circular_raw_ostream.h b/contrib/llvm/include/llvm/Support/circular_raw_ostream.h
index 19f9c2c..89d6421 100644
--- a/contrib/llvm/include/llvm/Support/circular_raw_ostream.h
+++ b/contrib/llvm/include/llvm/Support/circular_raw_ostream.h
@@ -152,7 +152,7 @@ namespace llvm
         delete TheStream;
     }
   };
-} // end llvm namespace
+} // namespace llvm
 
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/raw_os_ostream.h b/contrib/llvm/include/llvm/Support/raw_os_ostream.h
index a983aeb..c13e779 100644
--- a/contrib/llvm/include/llvm/Support/raw_os_ostream.h
+++ b/contrib/llvm/include/llvm/Support/raw_os_ostream.h
@@ -37,6 +37,6 @@ public:
   ~raw_os_ostream() override;
 };
 
-} // end llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/raw_ostream.h b/contrib/llvm/include/llvm/Support/raw_ostream.h
index b593171..4b4f933 100644
--- a/contrib/llvm/include/llvm/Support/raw_ostream.h
+++ b/contrib/llvm/include/llvm/Support/raw_ostream.h
@@ -545,6 +545,6 @@ public:
   ~buffer_ostream() { OS << str(); }
 };
 
-} // end llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/type_traits.h b/contrib/llvm/include/llvm/Support/type_traits.h
index 45465ae..6e2e202 100644
--- a/contrib/llvm/include/llvm/Support/type_traits.h
+++ b/contrib/llvm/include/llvm/Support/type_traits.h
@@ -91,7 +91,7 @@ struct add_const_past_pointer<
   typedef const typename std::remove_pointer<T>::type *type;
 };
 
-}
+} // namespace llvm
 
 #ifdef LLVM_DEFINED_HAS_FEATURE
 #undef __has_feature
diff --git a/contrib/llvm/include/llvm/TableGen/Error.h b/contrib/llvm/include/llvm/TableGen/Error.h
index 3df658d..2ecc9d2 100644
--- a/contrib/llvm/include/llvm/TableGen/Error.h
+++ b/contrib/llvm/include/llvm/TableGen/Error.h
@@ -34,6 +34,6 @@ LLVM_ATTRIBUTE_NORETURN void PrintFatalError(ArrayRef<SMLoc> ErrorLoc,
 extern SourceMgr SrcMgr;
 extern unsigned ErrorsPrinted;
 
-} // end namespace "llvm"
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/TableGen/Record.h b/contrib/llvm/include/llvm/TableGen/Record.h
index 14ad636..c5a4301 100644
--- a/contrib/llvm/include/llvm/TableGen/Record.h
+++ b/contrib/llvm/include/llvm/TableGen/Record.h
@@ -1012,7 +1012,6 @@ public:
     return I->getKind() == IK_FieldInit;
   }
   static FieldInit *get(Init *R, const std::string &FN);
-  static FieldInit *get(Init *R, const Init *FN);
 
   Init *getBit(unsigned Bit) const override;
 
@@ -1590,6 +1589,6 @@ Init *QualifyName(Record &CurRec, MultiClass *CurMultiClass,
 Init *QualifyName(Record &CurRec, MultiClass *CurMultiClass,
                   const std::string &Name, const std::string &Scoper);
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/TableGen/StringMatcher.h b/contrib/llvm/include/llvm/TableGen/StringMatcher.h
index b438779..5a77f5e 100644
--- a/contrib/llvm/include/llvm/TableGen/StringMatcher.h
+++ b/contrib/llvm/include/llvm/TableGen/StringMatcher.h
@@ -49,6 +49,6 @@ private:
                                 unsigned CharNo, unsigned IndentCount) const;
 };
 
-} // end llvm namespace.
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Target/Target.td b/contrib/llvm/include/llvm/Target/Target.td
index d99f0e1..6123499 100644
--- a/contrib/llvm/include/llvm/Target/Target.td
+++ b/contrib/llvm/include/llvm/Target/Target.td
@@ -881,6 +881,12 @@ def FRAME_ALLOC : Instruction {
   let hasSideEffects = 0;
   let hasCtrlDep = 1;
 }
+def FAULTING_LOAD_OP : Instruction {
+  let OutOperandList = (outs unknown:$dst);
+  let InOperandList = (ins variable_ops);
+  let usesCustomInserter = 1;
+  let mayLoad = 1;
+}
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/include/llvm/Target/TargetCallingConv.h b/contrib/llvm/include/llvm/Target/TargetCallingConv.h
index 9d4e7a0..11a2cfd 100644
--- a/contrib/llvm/include/llvm/Target/TargetCallingConv.h
+++ b/contrib/llvm/include/llvm/Target/TargetCallingConv.h
@@ -190,8 +190,8 @@ namespace ISD {
       ArgVT = argvt;
     }
   };
-}
+} // namespace ISD
 
-} // end llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Target/TargetFrameLowering.h b/contrib/llvm/include/llvm/Target/TargetFrameLowering.h
index 0e31724..2e8fe21 100644
--- a/contrib/llvm/include/llvm/Target/TargetFrameLowering.h
+++ b/contrib/llvm/include/llvm/Target/TargetFrameLowering.h
@@ -283,6 +283,6 @@ public:
   }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Target/TargetInstrInfo.h b/contrib/llvm/include/llvm/Target/TargetInstrInfo.h
index 902b99c..ec7aef3 100644
--- a/contrib/llvm/include/llvm/Target/TargetInstrInfo.h
+++ b/contrib/llvm/include/llvm/Target/TargetInstrInfo.h
@@ -40,6 +40,7 @@ class TargetRegisterClass;
 class TargetRegisterInfo;
 class BranchProbability;
 class TargetSubtargetInfo;
+class TargetSchedModel;
 class DFAPacketizer;
 
 template<class T> class SmallVectorImpl;
@@ -386,6 +387,51 @@ public:
     return true;
   }
 
+  /// Represents a predicate at the MachineFunction level.  The control flow a
+  /// MachineBranchPredicate represents is:
+  ///
+  ///  Reg <def>= LHS `Predicate` RHS         == ConditionDef
+  ///  if Reg then goto TrueDest else goto FalseDest
+  ///
+  struct MachineBranchPredicate {
+    enum ComparePredicate {
+      PRED_EQ,     // True if two values are equal
+      PRED_NE,     // True if two values are not equal
+      PRED_INVALID // Sentinel value
+    };
+
+    ComparePredicate Predicate;
+    MachineOperand LHS;
+    MachineOperand RHS;
+    MachineBasicBlock *TrueDest;
+    MachineBasicBlock *FalseDest;
+    MachineInstr *ConditionDef;
+
+    /// SingleUseCondition is true if ConditionDef is dead except for the
+    /// branch(es) at the end of the basic block.
+    ///
+    bool SingleUseCondition;
+
+    explicit MachineBranchPredicate()
+        : Predicate(PRED_INVALID), LHS(MachineOperand::CreateImm(0)),
+          RHS(MachineOperand::CreateImm(0)), TrueDest(nullptr),
+          FalseDest(nullptr), ConditionDef(nullptr), SingleUseCondition(false) {
+    }
+  };
+
+  /// Analyze the branching code at the end of MBB and parse it into the
+  /// MachineBranchPredicate structure if possible.  Returns false on success
+  /// and true on failure.
+  ///
+  /// If AllowModify is true, then this routine is allowed to modify the basic
+  /// block (e.g. delete instructions after the unconditional branch).
+  ///
+  virtual bool AnalyzeBranchPredicate(MachineBasicBlock &MBB,
+                                      MachineBranchPredicate &MBP,
+                                      bool AllowModify = false) const {
+    return true;
+  }
+
   /// Remove the branching code at the end of the specific MBB.
   /// This is only invoked in cases where AnalyzeBranch returns success. It
   /// returns the number of instructions that were removed.
@@ -405,7 +451,7 @@ public:
   /// merging needs to be disabled.
   virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                                 MachineBasicBlock *FBB,
-                                const SmallVectorImpl<MachineOperand> &Cond,
+                                ArrayRef<MachineOperand> Cond,
                                 DebugLoc DL) const {
     llvm_unreachable("Target didn't implement TargetInstrInfo::InsertBranch!");
   }
@@ -530,7 +576,7 @@ public:
   /// @param TrueCycles  Latency from TrueReg to select output.
   /// @param FalseCycles Latency from FalseReg to select output.
   virtual bool canInsertSelect(const MachineBasicBlock &MBB,
-                               const SmallVectorImpl<MachineOperand> &Cond,
+                               ArrayRef<MachineOperand> Cond,
                                unsigned TrueReg, unsigned FalseReg,
                                int &CondCycles,
                                int &TrueCycles, int &FalseCycles) const {
@@ -554,8 +600,7 @@ public:
   /// @param FalseReg Virtual register to copy when Cons is false.
   virtual void insertSelect(MachineBasicBlock &MBB,
                             MachineBasicBlock::iterator I, DebugLoc DL,
-                            unsigned DstReg,
-                            const SmallVectorImpl<MachineOperand> &Cond,
+                            unsigned DstReg, ArrayRef<MachineOperand> Cond,
                             unsigned TrueReg, unsigned FalseReg) const {
     llvm_unreachable("Target didn't implement TargetInstrInfo::insertSelect!");
   }
@@ -679,25 +724,25 @@ public:
   /// order since the pattern evaluator stops checking as soon as it finds a
   /// faster sequence.
   /// \param Root - Instruction that could be combined with one of its operands
-  /// \param Pattern - Vector of possible combination pattern
-  virtual bool hasPattern(
+  /// \param Pattern - Vector of possible combination patterns
+  virtual bool getMachineCombinerPatterns(
       MachineInstr &Root,
       SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Pattern) const {
     return false;
   }
 
-  /// When hasPattern() finds a pattern this function generates the instructions
-  /// that could replace the original code sequence. The client has to decide
-  /// whether the actual replacement is beneficial or not.
+  /// When getMachineCombinerPatterns() finds patterns, this function generates
+  /// the instructions that could replace the original code sequence. The client
+  /// has to decide whether the actual replacement is beneficial or not.
   /// \param Root - Instruction that could be combined with one of its operands
-  /// \param P - Combination pattern for Root
+  /// \param Pattern - Combination pattern for Root
   /// \param InsInstrs - Vector of new instructions that implement P
   /// \param DelInstrs - Old instructions, including Root, that could be
   /// replaced by InsInstr
   /// \param InstrIdxForVirtReg - map of virtual register to instruction in
   /// InsInstr that defines it
   virtual void genAlternativeCodeSequence(
-      MachineInstr &Root, MachineCombinerPattern::MC_PATTERN P,
+      MachineInstr &Root, MachineCombinerPattern::MC_PATTERN Pattern,
       SmallVectorImpl<MachineInstr *> &InsInstrs,
       SmallVectorImpl<MachineInstr *> &DelInstrs,
       DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
@@ -827,10 +872,11 @@ public:
     return false;
   }
 
-  /// Get the base register and byte offset of a load/store instr.
-  virtual bool getLdStBaseRegImmOfs(MachineInstr *LdSt,
-                                    unsigned &BaseReg, unsigned &Offset,
-                                    const TargetRegisterInfo *TRI) const {
+  /// Get the base register and byte offset of an instruction that reads/writes
+  /// memory.
+  virtual bool getMemOpBaseRegImmOfs(MachineInstr *MemOp, unsigned &BaseReg,
+                                     unsigned &Offset,
+                                     const TargetRegisterInfo *TRI) const {
     return false;
   }
 
@@ -878,13 +924,13 @@ public:
   /// It returns true if the operation was successful.
   virtual
   bool PredicateInstruction(MachineInstr *MI,
-                        const SmallVectorImpl<MachineOperand> &Pred) const;
+                            ArrayRef<MachineOperand> Pred) const;
 
   /// Returns true if the first specified predicate
   /// subsumes the second, e.g. GE subsumes GT.
   virtual
-  bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
-                         const SmallVectorImpl<MachineOperand> &Pred2) const {
+  bool SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
+                         ArrayRef<MachineOperand> Pred2) const {
     return false;
   }
 
@@ -1055,7 +1101,7 @@ public:
   /// determine whether it makes sense to hoist an instruction out even in a
   /// high register pressure situation.
   virtual
-  bool hasHighOperandLatency(const InstrItineraryData *ItinData,
+  bool hasHighOperandLatency(const TargetSchedModel &SchedModel,
                              const MachineRegisterInfo *MRI,
                              const MachineInstr *DefMI, unsigned DefIdx,
                              const MachineInstr *UseMI, unsigned UseIdx) const {
@@ -1065,7 +1111,7 @@ public:
   /// Compute operand latency of a def of 'Reg'. Return true
   /// if the target considered it 'low'.
   virtual
-  bool hasLowDefLatency(const InstrItineraryData *ItinData,
+  bool hasLowDefLatency(const TargetSchedModel &SchedModel,
                         const MachineInstr *DefMI, unsigned DefIdx) const;
 
   /// Perform target-specific instruction verification.
@@ -1224,6 +1270,6 @@ private:
   unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode;
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Target/TargetIntrinsicInfo.h b/contrib/llvm/include/llvm/Target/TargetIntrinsicInfo.h
index c630f5b..3732959 100644
--- a/contrib/llvm/include/llvm/Target/TargetIntrinsicInfo.h
+++ b/contrib/llvm/include/llvm/Target/TargetIntrinsicInfo.h
@@ -60,6 +60,6 @@ public:
                                    unsigned numTys = 0) const = 0;
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Target/TargetLowering.h b/contrib/llvm/include/llvm/Target/TargetLowering.h
index 616edd8..a536e00 100644
--- a/contrib/llvm/include/llvm/Target/TargetLowering.h
+++ b/contrib/llvm/include/llvm/Target/TargetLowering.h
@@ -2394,7 +2394,7 @@ public:
   /// outgoing token chain. It calls LowerCall to do the actual lowering.
   std::pair<SDValue, SDValue> LowerCallTo(CallLoweringInfo &CLI) const;
 
-  /// This hook must be implemented to lower calls into the the specified
+  /// This hook must be implemented to lower calls into the specified
   /// DAG. The outgoing arguments to the call are described by the Outs array,
   /// and the values to be returned by the call are described by the Ins
   /// array. The implementation should fill in the InVals array with legal-type
@@ -2801,6 +2801,6 @@ void GetReturnInfo(Type* ReturnType, AttributeSet attr,
                    SmallVectorImpl<ISD::OutputArg> &Outs,
                    const TargetLowering &TLI);
 
-} // end llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Target/TargetMachine.h b/contrib/llvm/include/llvm/Target/TargetMachine.h
index 2a1ce04..7681575 100644
--- a/contrib/llvm/include/llvm/Target/TargetMachine.h
+++ b/contrib/llvm/include/llvm/Target/TargetMachine.h
@@ -15,6 +15,7 @@
 #define LLVM_TARGET_TARGETMACHINE_H
 
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CodeGen.h"
@@ -27,6 +28,7 @@ namespace llvm {
 class InstrItineraryData;
 class GlobalValue;
 class Mangler;
+class MachineFunctionInitializer;
 class MCAsmInfo;
 class MCCodeGenInfo;
 class MCContext;
@@ -68,7 +70,7 @@ class TargetMachine {
   void operator=(const TargetMachine &) = delete;
 protected: // Can only create subclasses.
   TargetMachine(const Target &T, StringRef DataLayoutString,
-                StringRef TargetTriple, StringRef CPU, StringRef FS,
+                const Triple &TargetTriple, StringRef CPU, StringRef FS,
                 const TargetOptions &Options);
 
   /// The Target that this machine was created for.
@@ -79,7 +81,7 @@ protected: // Can only create subclasses.
 
   /// Triple string, CPU name, and target feature strings the TargetMachine
   /// instance is created with.
-  std::string TargetTriple;
+  Triple TargetTriple;
   std::string TargetCPU;
   std::string TargetFS;
 
@@ -103,7 +105,7 @@ public:
 
   const Target &getTarget() const { return TheTarget; }
 
-  StringRef getTargetTriple() const { return TargetTriple; }
+  const Triple &getTargetTriple() const { return TargetTriple; }
   StringRef getTargetCPU() const { return TargetCPU; }
   StringRef getTargetFeatureString() const { return TargetFS; }
 
@@ -208,11 +210,11 @@ public:
   /// emitted.  Typically this will involve several steps of code generation.
   /// This method should return true if emission of this file type is not
   /// supported, or false on success.
-  virtual bool addPassesToEmitFile(PassManagerBase &, raw_pwrite_stream &,
-                                   CodeGenFileType,
-                                   bool /*DisableVerify*/ = true,
-                                   AnalysisID /*StartAfter*/ = nullptr,
-                                   AnalysisID /*StopAfter*/ = nullptr) {
+  virtual bool addPassesToEmitFile(
+      PassManagerBase &, raw_pwrite_stream &, CodeGenFileType,
+      bool /*DisableVerify*/ = true, AnalysisID /*StartAfter*/ = nullptr,
+      AnalysisID /*StopAfter*/ = nullptr,
+      MachineFunctionInitializer * /*MFInitializer*/ = nullptr) {
     return true;
   }
 
@@ -238,7 +240,7 @@ public:
 class LLVMTargetMachine : public TargetMachine {
 protected: // Can only create subclasses.
   LLVMTargetMachine(const Target &T, StringRef DataLayoutString,
-                    StringRef TargetTriple, StringRef CPU, StringRef FS,
+                    const Triple &TargetTriple, StringRef CPU, StringRef FS,
                     TargetOptions Options, Reloc::Model RM, CodeModel::Model CM,
                     CodeGenOpt::Level OL);
 
@@ -256,10 +258,11 @@ public:
 
   /// Add passes to the specified pass manager to get the specified file
   /// emitted.  Typically this will involve several steps of code generation.
-  bool addPassesToEmitFile(PassManagerBase &PM, raw_pwrite_stream &Out,
-                           CodeGenFileType FileType, bool DisableVerify = true,
-                           AnalysisID StartAfter = nullptr,
-                           AnalysisID StopAfter = nullptr) override;
+  bool addPassesToEmitFile(
+      PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType,
+      bool DisableVerify = true, AnalysisID StartAfter = nullptr,
+      AnalysisID StopAfter = nullptr,
+      MachineFunctionInitializer *MFInitializer = nullptr) override;
 
   /// Add passes to the specified pass manager to get machine code emitted with
   /// the MCJIT. This method returns true if machine code is not supported. It
@@ -270,6 +273,6 @@ public:
                          bool DisableVerify = true) override;
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Target/TargetOpcodes.h b/contrib/llvm/include/llvm/Target/TargetOpcodes.h
index afc2236..1f9a5d4 100644
--- a/contrib/llvm/include/llvm/Target/TargetOpcodes.h
+++ b/contrib/llvm/include/llvm/Target/TargetOpcodes.h
@@ -122,6 +122,12 @@ enum {
   /// label. Created by the llvm.frameallocate intrinsic. It has two arguments:
   /// the symbol for the label and the frame index of the stack allocation.
   FRAME_ALLOC = 21,
+
+  /// Loading instruction that may page fault, bundled with associated
+  /// information on how to handle such a page fault.  It is intended to support
+  /// "zero cost" null checks in managed languages by allowing LLVM to fold
+  /// comparisions into existing memory operations.
+  FAULTING_LOAD_OP = 22,
 };
 } // end namespace TargetOpcode
 } // end namespace llvm
diff --git a/contrib/llvm/include/llvm/Target/TargetOptions.h b/contrib/llvm/include/llvm/Target/TargetOptions.h
index 8f8b78d..f27411e 100644
--- a/contrib/llvm/include/llvm/Target/TargetOptions.h
+++ b/contrib/llvm/include/llvm/Target/TargetOptions.h
@@ -67,7 +67,7 @@ namespace llvm {
           HonorSignDependentRoundingFPMathOption(false),
           NoZerosInBSS(false),
           GuaranteedTailCallOpt(false),
-          DisableTailCalls(false), StackAlignmentOverride(0),
+          StackAlignmentOverride(0),
           EnableFastISel(false), PositionIndependentExecutable(false),
           UseInitArray(false), DisableIntegratedAS(false),
           CompressDebugSections(false), FunctionSections(false),
@@ -137,10 +137,6 @@ namespace llvm {
     /// as their parent function, etc.), using an alternate ABI if necessary.
     unsigned GuaranteedTailCallOpt : 1;
 
-    /// DisableTailCalls - This flag controls whether we will use tail calls.
-    /// Disabling them may be useful to maintain a correct call stack.
-    unsigned DisableTailCalls : 1;
-
     /// StackAlignmentOverride - Override default stack alignment for target.
     unsigned StackAlignmentOverride;
 
@@ -236,7 +232,6 @@ inline bool operator==(const TargetOptions &LHS,
     ARE_EQUAL(HonorSignDependentRoundingFPMathOption) &&
     ARE_EQUAL(NoZerosInBSS) &&
     ARE_EQUAL(GuaranteedTailCallOpt) &&
-    ARE_EQUAL(DisableTailCalls) &&
     ARE_EQUAL(StackAlignmentOverride) &&
     ARE_EQUAL(EnableFastISel) &&
     ARE_EQUAL(PositionIndependentExecutable) &&
@@ -257,6 +252,6 @@ inline bool operator!=(const TargetOptions &LHS,
   return !(LHS == RHS);
 }
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Target/TargetRecip.h b/contrib/llvm/include/llvm/Target/TargetRecip.h
index 4cc3672..c3beb40 100644
--- a/contrib/llvm/include/llvm/Target/TargetRecip.h
+++ b/contrib/llvm/include/llvm/Target/TargetRecip.h
@@ -68,6 +68,6 @@ private:
   void parseIndividualParams(const std::vector<std::string> &Args);
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Target/TargetRegisterInfo.h b/contrib/llvm/include/llvm/Target/TargetRegisterInfo.h
index 121b8a2..1377b38 100644
--- a/contrib/llvm/include/llvm/Target/TargetRegisterInfo.h
+++ b/contrib/llvm/include/llvm/Target/TargetRegisterInfo.h
@@ -373,6 +373,19 @@ public:
     return SubRegIndexLaneMasks[SubIdx];
   }
 
+  /// Returns true if the given lane mask is imprecise.
+  ///
+  /// LaneMasks as given by getSubRegIndexLaneMask() have a limited number of
+  /// bits, so for targets with more than 31 disjunct subregister indices there
+  /// may be cases where:
+  ///    getSubReg(Reg,A) does not overlap getSubReg(Reg,B)
+  /// but we still have
+  ///    (getSubRegIndexLaneMask(A) & getSubRegIndexLaneMask(B)) != 0.
+  /// This function returns true in those cases.
+  static bool isImpreciseLaneMask(unsigned LaneMask) {
+    return LaneMask & 0x80000000u;
+  }
+
   /// The lane masks returned by getSubRegIndexLaneMask() above can only be
   /// used to determine if sub-registers overlap - they can't be used to
   /// determine if a set of sub-registers completely cover another
@@ -985,6 +998,6 @@ static inline raw_ostream &operator<<(raw_ostream &OS,
   return OS;
 }
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Target/TargetSelectionDAGInfo.h b/contrib/llvm/include/llvm/Target/TargetSelectionDAGInfo.h
index bacdd95..c3343ca 100644
--- a/contrib/llvm/include/llvm/Target/TargetSelectionDAGInfo.h
+++ b/contrib/llvm/include/llvm/Target/TargetSelectionDAGInfo.h
@@ -163,6 +163,6 @@ public:
   }
 };
 
-} // end llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Target/TargetSubtargetInfo.h b/contrib/llvm/include/llvm/Target/TargetSubtargetInfo.h
index 0f42790..640e1123 100644
--- a/contrib/llvm/include/llvm/Target/TargetSubtargetInfo.h
+++ b/contrib/llvm/include/llvm/Target/TargetSubtargetInfo.h
@@ -115,12 +115,11 @@ public:
   /// can be overridden.
   virtual bool enableJoinGlobalCopies() const;
 
-  /// \brief True if the subtarget should run PostMachineScheduler.
+  /// True if the subtarget should run a scheduler after register allocation.
   ///
-  /// This only takes effect if the target has configured the
-  /// PostMachineScheduler pass to run, or if the global cl::opt flag,
-  /// MISchedPostRA, is set.
-  virtual bool enablePostMachineScheduler() const;
+  /// By default this queries the PostRAScheduling bit in the scheduling model
+  /// which is the preferred way to influence this.
+  virtual bool enablePostRAScheduler() const;
 
   /// \brief True if the subtarget should run the atomic expansion pass.
   virtual bool enableAtomicExpand() const;
@@ -179,6 +178,6 @@ public:
   virtual bool enableSubRegLiveness() const { return false; }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Transforms/IPO.h b/contrib/llvm/include/llvm/Transforms/IPO.h
index fbd999c..59cd921 100644
--- a/contrib/llvm/include/llvm/Transforms/IPO.h
+++ b/contrib/llvm/include/llvm/Transforms/IPO.h
@@ -203,6 +203,6 @@ ModulePass *createBarrierNoopPass();
 /// to bitsets.
 ModulePass *createLowerBitSetsPass();
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Transforms/IPO/InlinerPass.h b/contrib/llvm/include/llvm/Transforms/IPO/InlinerPass.h
index 6a644ad..4abb92d 100644
--- a/contrib/llvm/include/llvm/Transforms/IPO/InlinerPass.h
+++ b/contrib/llvm/include/llvm/Transforms/IPO/InlinerPass.h
@@ -86,6 +86,6 @@ private:
   bool shouldInline(CallSite CS);
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Transforms/InstCombine/InstCombine.h b/contrib/llvm/include/llvm/Transforms/InstCombine/InstCombine.h
index f48ec13..cfb3156 100644
--- a/contrib/llvm/include/llvm/Transforms/InstCombine/InstCombine.h
+++ b/contrib/llvm/include/llvm/Transforms/InstCombine/InstCombine.h
@@ -41,6 +41,6 @@ public:
   PreservedAnalyses run(Function &F, AnalysisManager<Function> *AM);
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Transforms/Instrumentation.h b/contrib/llvm/include/llvm/Transforms/Instrumentation.h
index 884f54f..4447d0d 100644
--- a/contrib/llvm/include/llvm/Transforms/Instrumentation.h
+++ b/contrib/llvm/include/llvm/Transforms/Instrumentation.h
@@ -84,8 +84,8 @@ ModulePass *createInstrProfilingPass(
     const InstrProfOptions &Options = InstrProfOptions());
 
 // Insert AddressSanitizer (address sanity checking) instrumentation
-FunctionPass *createAddressSanitizerFunctionPass();
-ModulePass *createAddressSanitizerModulePass();
+FunctionPass *createAddressSanitizerFunctionPass(bool CompileKernel = false);
+ModulePass *createAddressSanitizerModulePass(bool CompileKernel = false);
 
 // Insert MemorySanitizer instrumentation (detection of uninitialized reads)
 FunctionPass *createMemorySanitizerPass(int TrackOrigins = 0);
@@ -132,6 +132,10 @@ inline ModulePass *createDataFlowSanitizerPassForJIT(
 // checking on loads, stores, and other memory intrinsics.
 FunctionPass *createBoundsCheckingPass();
 
-} // End llvm namespace
+/// \brief This pass splits the stack into a safe stack and an unsafe stack to
+/// protect against stack-based overflow vulnerabilities.
+FunctionPass *createSafeStackPass();
+
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Transforms/ObjCARC.h b/contrib/llvm/include/llvm/Transforms/ObjCARC.h
index 1897adc..367cdf6 100644
--- a/contrib/llvm/include/llvm/Transforms/ObjCARC.h
+++ b/contrib/llvm/include/llvm/Transforms/ObjCARC.h
@@ -43,6 +43,6 @@ Pass *createObjCARCContractPass();
 //
 Pass *createObjCARCOptPass();
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar.h b/contrib/llvm/include/llvm/Transforms/Scalar.h
index 4676c95..99fff37 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar.h
@@ -486,6 +486,6 @@ FunctionPass *createNaryReassociatePass();
 //
 FunctionPass *createLoopDistributePass();
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/EarlyCSE.h b/contrib/llvm/include/llvm/Transforms/Scalar/EarlyCSE.h
index e3dd3c0..5cd4a69 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar/EarlyCSE.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/EarlyCSE.h
@@ -34,6 +34,6 @@ public:
   PreservedAnalyses run(Function &F, AnalysisManager<Function> *AM);
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h b/contrib/llvm/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h
index 4028320..ce36742 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h
@@ -35,6 +35,6 @@ public:
   PreservedAnalyses run(Function &F);
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h b/contrib/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h
index ef28e0f..d8b638d 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h
@@ -41,6 +41,6 @@ public:
   PreservedAnalyses run(Function &F, AnalysisManager<Function> *AM);
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/ASanStackFrameLayout.h b/contrib/llvm/include/llvm/Transforms/Utils/ASanStackFrameLayout.h
index 4e4f02c..7f6a264 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/ASanStackFrameLayout.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/ASanStackFrameLayout.h
@@ -59,6 +59,6 @@ void ComputeASanStackFrameLayout(
     // The result is put here.
     ASanStackFrameLayout *Layout);
 
-} // llvm namespace
+} // namespace llvm
 
 #endif  // LLVM_TRANSFORMS_UTILS_ASANSTACKFRAMELAYOUT_H
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/contrib/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
index 710db03..3004f9e 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -308,6 +308,6 @@ void SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore,
 /// entered if the condition is false.
 Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
                       BasicBlock *&IfFalse);
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h b/contrib/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
index 879f295..5081229 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
@@ -111,6 +111,6 @@ namespace llvm {
   /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
   Value *EmitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B,
                     const DataLayout &DL, const TargetLibraryInfo *TLI);
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/Cloning.h b/contrib/llvm/include/llvm/Transforms/Utils/Cloning.h
index cb187ec..9ba6bea 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -233,6 +233,6 @@ bool InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
 bool InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
                     bool InsertLifetime = true);
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/contrib/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
index 3a96d95..c3c2f3e 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
@@ -121,6 +121,6 @@ namespace llvm {
                                     ValueSet &inputs,
                                     ValueSet &outputs);
   };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/CtorUtils.h b/contrib/llvm/include/llvm/Transforms/Utils/CtorUtils.h
index 63e564d..1213324 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/CtorUtils.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/CtorUtils.h
@@ -27,6 +27,6 @@ class Module;
 bool optimizeGlobalCtorsList(Module &M,
                              function_ref<bool(Function *)> ShouldRemove);
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/GlobalStatus.h b/contrib/llvm/include/llvm/Transforms/Utils/GlobalStatus.h
index c366095..658449c 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/GlobalStatus.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/GlobalStatus.h
@@ -77,6 +77,6 @@ struct GlobalStatus {
 
   GlobalStatus();
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/IntegerDivision.h b/contrib/llvm/include/llvm/Transforms/Utils/IntegerDivision.h
index 0ec3321..5ba6685 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/IntegerDivision.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/IntegerDivision.h
@@ -68,6 +68,6 @@ namespace llvm {
   /// @brief Replace Rem with generated code.
   bool expandDivisionUpTo64Bits(BinaryOperator *Div);
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/Local.h b/contrib/llvm/include/llvm/Transforms/Utils/Local.h
index a1bb367a..1063f5f 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/Local.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/Local.h
@@ -291,6 +291,6 @@ void combineMetadata(Instruction *K, const Instruction *J, ArrayRef<unsigned> Kn
 /// the given edge.  Returns the number of replacements made.
 unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT,
                                   const BasicBlockEdge &Edge);
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index 28791f5..3aa40cf 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -43,12 +43,37 @@ struct LICMSafetyInfo {
   {}
 };
 
-/// This POD struct holds information about a potential reduction operation.
-class ReductionInstDesc {
+/// The RecurrenceDescriptor is used to identify recurrences variables in a
+/// loop. Reduction is a special case of recurrence that has uses of the
+/// recurrence variable outside the loop. The method isReductionPHI identifies
+/// reductions that are basic recurrences.
+///
+/// Basic recurrences are defined as the summation, product, OR, AND, XOR, min,
+/// or max of a set of terms. For example: for(i=0; i<n; i++) { total +=
+/// array[i]; } is a summation of array elements. Basic recurrences are a
+/// special case of chains of recurrences (CR). See ScalarEvolution for CR
+/// references.
+
+/// This struct holds information about recurrence variables.
+class RecurrenceDescriptor {
 
 public:
-  // This enum represents the kind of minmax reduction.
-  enum MinMaxReductionKind {
+  /// This enum represents the kinds of recurrences that we support.
+  enum RecurrenceKind {
+    RK_NoRecurrence,  ///< Not a recurrence.
+    RK_IntegerAdd,    ///< Sum of integers.
+    RK_IntegerMult,   ///< Product of integers.
+    RK_IntegerOr,     ///< Bitwise or logical OR of numbers.
+    RK_IntegerAnd,    ///< Bitwise or logical AND of numbers.
+    RK_IntegerXor,    ///< Bitwise or logical XOR of numbers.
+    RK_IntegerMinMax, ///< Min/max implemented in terms of select(cmp()).
+    RK_FloatAdd,      ///< Sum of floats.
+    RK_FloatMult,     ///< Product of floats.
+    RK_FloatMinMax    ///< Min/max implemented in terms of select(cmp()).
+  };
+
+  // This enum represents the kind of minmax recurrence.
+  enum MinMaxRecurrenceKind {
     MRK_Invalid,
     MRK_UIntMin,
     MRK_UIntMax,
@@ -57,62 +82,48 @@ public:
     MRK_FloatMin,
     MRK_FloatMax
   };
-  ReductionInstDesc(bool IsRedux, Instruction *I)
-      : IsReduction(IsRedux), PatternLastInst(I), MinMaxKind(MRK_Invalid) {}
 
-  ReductionInstDesc(Instruction *I, MinMaxReductionKind K)
-      : IsReduction(true), PatternLastInst(I), MinMaxKind(K) {}
+  RecurrenceDescriptor()
+      : StartValue(nullptr), LoopExitInstr(nullptr), Kind(RK_NoRecurrence),
+        MinMaxKind(MRK_Invalid) {}
 
-  bool isReduction() { return IsReduction; }
+  RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurrenceKind K,
+                       MinMaxRecurrenceKind MK)
+      : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK) {}
 
-  MinMaxReductionKind getMinMaxKind() { return MinMaxKind; }
- 
-  Instruction *getPatternInst() { return PatternLastInst; }
+  /// This POD struct holds information about a potential recurrence operation.
+  class InstDesc {
 
-private:
-  // Is this instruction a reduction candidate.
-  bool IsReduction;
-  // The last instruction in a min/max pattern (select of the select(icmp())
-  // pattern), or the current reduction instruction otherwise.
-  Instruction *PatternLastInst;
-  // If this is a min/max pattern the comparison predicate.
-  MinMaxReductionKind MinMaxKind;
-};
+  public:
+    InstDesc(bool IsRecur, Instruction *I)
+        : IsRecurrence(IsRecur), PatternLastInst(I), MinMaxKind(MRK_Invalid) {}
 
-/// This struct holds information about reduction variables.
-class ReductionDescriptor {
+    InstDesc(Instruction *I, MinMaxRecurrenceKind K)
+        : IsRecurrence(true), PatternLastInst(I), MinMaxKind(K) {}
 
-public:
-  /// This enum represents the kinds of reductions that we support.
-  enum ReductionKind {
-    RK_NoReduction,   ///< Not a reduction.
-    RK_IntegerAdd,    ///< Sum of integers.
-    RK_IntegerMult,   ///< Product of integers.
-    RK_IntegerOr,     ///< Bitwise or logical OR of numbers.
-    RK_IntegerAnd,    ///< Bitwise or logical AND of numbers.
-    RK_IntegerXor,    ///< Bitwise or logical XOR of numbers.
-    RK_IntegerMinMax, ///< Min/max implemented in terms of select(cmp()).
-    RK_FloatAdd,      ///< Sum of floats.
-    RK_FloatMult,     ///< Product of floats.
-    RK_FloatMinMax    ///< Min/max implemented in terms of select(cmp()).
-  };
+    bool isRecurrence() { return IsRecurrence; }
 
-  ReductionDescriptor()
-      : StartValue(nullptr), LoopExitInstr(nullptr), Kind(RK_NoReduction),
-        MinMaxKind(ReductionInstDesc::MRK_Invalid) {}
+    MinMaxRecurrenceKind getMinMaxKind() { return MinMaxKind; }
 
-  ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K,
-                      ReductionInstDesc::MinMaxReductionKind MK)
-      : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK) {}
+    Instruction *getPatternInst() { return PatternLastInst; }
+
+  private:
+    // Is this instruction a recurrence candidate.
+    bool IsRecurrence;
+    // The last instruction in a min/max pattern (select of the select(icmp())
+    // pattern), or the current recurrence instruction otherwise.
+    Instruction *PatternLastInst;
+    // If this is a min/max pattern the comparison predicate.
+    MinMaxRecurrenceKind MinMaxKind;
+  };
 
-  /// Returns a struct describing if the instruction 'I' can be a reduction
-  /// variable of type 'Kind'. If the reduction is a min/max pattern of
+  /// Returns a struct describing if the instruction 'I' can be a recurrence
+  /// variable of type 'Kind'. If the recurrence is a min/max pattern of
   /// select(icmp()) this function advances the instruction pointer 'I' from the
   /// compare instruction to the select instruction and stores this pointer in
   /// 'PatternLastInst' member of the returned struct.
-  static ReductionInstDesc isReductionInstr(Instruction *I, ReductionKind Kind,
-                                            ReductionInstDesc &Prev,
-                                            bool HasFunNoNaNAttr);
+  static InstDesc isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
+                                    InstDesc &Prev, bool HasFunNoNaNAttr);
 
   /// Returns true if instuction I has multiple uses in Insts
   static bool hasMultipleUsesOf(Instruction *I,
@@ -124,51 +135,48 @@ public:
   /// Returns a struct describing if the instruction if the instruction is a
   /// Select(ICmp(X, Y), X, Y) instruction pattern corresponding to a min(X, Y)
   /// or max(X, Y).
-  static ReductionInstDesc isMinMaxSelectCmpPattern(Instruction *I,
-                                                    ReductionInstDesc &Prev);
+  static InstDesc isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev);
 
-  /// Returns identity corresponding to the ReductionKind.
-  static Constant *getReductionIdentity(ReductionKind K, Type *Tp);
+  /// Returns identity corresponding to the RecurrenceKind.
+  static Constant *getRecurrenceIdentity(RecurrenceKind K, Type *Tp);
 
-  /// Returns the opcode of binary operation corresponding to the ReductionKind.
-  static unsigned getReductionBinOp(ReductionKind Kind);
+  /// Returns the opcode of binary operation corresponding to the
+  /// RecurrenceKind.
+  static unsigned getRecurrenceBinOp(RecurrenceKind Kind);
 
-  /// Returns a Min/Max operation corresponding to MinMaxReductionKind.
-  static Value *createMinMaxOp(IRBuilder<> &Builder,
-                               ReductionInstDesc::MinMaxReductionKind RK,
+  /// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
+  static Value *createMinMaxOp(IRBuilder<> &Builder, MinMaxRecurrenceKind RK,
                                Value *Left, Value *Right);
 
   /// Returns true if Phi is a reduction of type Kind and adds it to the
-  /// ReductionDescriptor.
-  static bool AddReductionVar(PHINode *Phi, ReductionKind Kind, Loop *TheLoop,
+  /// RecurrenceDescriptor.
+  static bool AddReductionVar(PHINode *Phi, RecurrenceKind Kind, Loop *TheLoop,
                               bool HasFunNoNaNAttr,
-                              ReductionDescriptor &RedDes);
+                              RecurrenceDescriptor &RedDes);
 
-  /// Returns true if Phi is a reduction in TheLoop. The ReductionDescriptor is
+  /// Returns true if Phi is a reduction in TheLoop. The RecurrenceDescriptor is
   /// returned in RedDes.
   static bool isReductionPHI(PHINode *Phi, Loop *TheLoop,
-                             ReductionDescriptor &RedDes);
+                             RecurrenceDescriptor &RedDes);
 
-  ReductionKind getReductionKind() { return Kind; }
+  RecurrenceKind getRecurrenceKind() { return Kind; }
 
-  ReductionInstDesc::MinMaxReductionKind getMinMaxReductionKind() {
-    return MinMaxKind;
-  }
+  MinMaxRecurrenceKind getMinMaxRecurrenceKind() { return MinMaxKind; }
 
-  TrackingVH<Value> getReductionStartValue() { return StartValue; }
+  TrackingVH<Value> getRecurrenceStartValue() { return StartValue; }
 
   Instruction *getLoopExitInstr() { return LoopExitInstr; }
 
 private:
-  // The starting value of the reduction.
+  // The starting value of the recurrence.
   // It does not have to be zero!
   TrackingVH<Value> StartValue;
   // The instruction who's value is used outside the loop.
   Instruction *LoopExitInstr;
-  // The kind of the reduction.
-  ReductionKind Kind;
-  // If this a min/max reduction the kind of reduction.
-  ReductionInstDesc::MinMaxReductionKind MinMaxKind;
+  // The kind of the recurrence.
+  RecurrenceKind Kind;
+  // If this a min/max recurrence the kind of recurrence.
+  MinMaxRecurrenceKind MinMaxKind;
 };
 
 BasicBlock *InsertPreheaderForLoop(Loop *L, Pass *P);
@@ -255,6 +263,6 @@ void computeLICMSafetyInfo(LICMSafetyInfo *, Loop *);
 /// variable. Returns true if this is an induction PHI along with the step
 /// value.
 bool isInductionPHI(PHINode *, ScalarEvolution *, ConstantInt *&);
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/ModuleUtils.h b/contrib/llvm/include/llvm/Transforms/Utils/ModuleUtils.h
index 622265b..120d14a 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/ModuleUtils.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/ModuleUtils.h
@@ -57,6 +57,6 @@ Function *checkSanitizerInterfaceFunction(Constant *FuncOrBitcast);
 std::pair<Function *, Function *> createSanitizerCtorAndInitFunctions(
     Module &M, StringRef CtorName, StringRef InitName,
     ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs);
-} // End llvm namespace
+} // namespace llvm
 
 #endif //  LLVM_TRANSFORMS_UTILS_MODULEUTILS_H
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/PromoteMemToReg.h b/contrib/llvm/include/llvm/Transforms/Utils/PromoteMemToReg.h
index d0602bf..6c3d2ea 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/PromoteMemToReg.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/PromoteMemToReg.h
@@ -45,6 +45,6 @@ void PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
                      AliasSetTracker *AST = nullptr,
                      AssumptionCache *AC = nullptr);
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/SSAUpdater.h b/contrib/llvm/include/llvm/Transforms/Utils/SSAUpdater.h
index 1c7b2c58..5179d58 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/SSAUpdater.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/SSAUpdater.h
@@ -173,6 +173,6 @@ public:
   }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/SSAUpdaterImpl.h b/contrib/llvm/include/llvm/Transforms/Utils/SSAUpdaterImpl.h
index ed0841c..1b9cb48 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/SSAUpdaterImpl.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/SSAUpdaterImpl.h
@@ -455,6 +455,6 @@ public:
 
 #undef DEBUG_TYPE // "ssaupdater"
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/contrib/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
index 4115960..d7c8338 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
@@ -166,6 +166,6 @@ private:
   /// function by checking for an existing function with name FuncName + f
   bool hasFloatVersion(StringRef FuncName);
 };
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/SymbolRewriter.h b/contrib/llvm/include/llvm/Transforms/Utils/SymbolRewriter.h
index 5ccee98..d798358 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/SymbolRewriter.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/SymbolRewriter.h
@@ -108,7 +108,7 @@ private:
                                          yaml::MappingNode *V,
                                          RewriteDescriptorList *DL);
 };
-}
+} // namespace SymbolRewriter
 
 template <>
 struct ilist_traits<SymbolRewriter::RewriteDescriptor>
@@ -147,6 +147,6 @@ public:
 
 ModulePass *createRewriteSymbolsPass();
 ModulePass *createRewriteSymbolsPass(SymbolRewriter::RewriteDescriptorList &);
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h b/contrib/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h
index 550292f..b19c6fa 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h
@@ -47,6 +47,6 @@ public:
 
 Pass *createUnifyFunctionExitNodesPass();
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/contrib/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
index 7f2cf8d7..ba58668 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -37,6 +37,6 @@ bool UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
                              LPPassManager *LPM);
 
 MDNode *GetUnrollMetadata(MDNode *LoopID, StringRef Name);
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/ValueMapper.h b/contrib/llvm/include/llvm/Transforms/Utils/ValueMapper.h
index 047ab81..737ad4f 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/ValueMapper.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/ValueMapper.h
@@ -96,6 +96,6 @@ namespace llvm {
                                    Materializer));
   }
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/VectorUtils.h b/contrib/llvm/include/llvm/Transforms/Utils/VectorUtils.h
index 9f0fb19..6a35247 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/VectorUtils.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/VectorUtils.h
@@ -200,6 +200,6 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
   return Intrinsic::not_intrinsic;
 }
 
-} // llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Transforms/Vectorize.h b/contrib/llvm/include/llvm/Transforms/Vectorize.h
index aec3993..aab2790 100644
--- a/contrib/llvm/include/llvm/Transforms/Vectorize.h
+++ b/contrib/llvm/include/llvm/Transforms/Vectorize.h
@@ -139,6 +139,6 @@ Pass *createSLPVectorizerPass();
 bool vectorizeBasicBlock(Pass *P, BasicBlock &BB,
                          const VectorizeConfig &C = VectorizeConfig());
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/module.modulemap b/contrib/llvm/include/llvm/module.modulemap
index 163cbc3..a9e6daf 100644
--- a/contrib/llvm/include/llvm/module.modulemap
+++ b/contrib/llvm/include/llvm/module.modulemap
@@ -29,6 +29,9 @@ module LLVM_Backend {
     exclude header "CodeGen/CommandFlags.h"
     exclude header "CodeGen/LinkAllAsmWriterComponents.h"
     exclude header "CodeGen/LinkAllCodegenComponents.h"
+
+    // These are intended for (repeated) textual inclusion.
+    textual header "CodeGen/DIEValue.def"
   }
 
   module Target {
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
index ce46d53..d44653e 100644
--- a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
@@ -48,23 +48,22 @@ char AliasAnalysis::ID = 0;
 // Default chaining methods
 //===----------------------------------------------------------------------===//
 
-AliasAnalysis::AliasResult
-AliasAnalysis::alias(const Location &LocA, const Location &LocB) {
+AliasAnalysis::AliasResult AliasAnalysis::alias(const MemoryLocation &LocA,
+                                                const MemoryLocation &LocB) {
   assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
   return AA->alias(LocA, LocB);
 }
 
-bool AliasAnalysis::pointsToConstantMemory(const Location &Loc,
+bool AliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc,
                                            bool OrLocal) {
   assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
   return AA->pointsToConstantMemory(Loc, OrLocal);
 }
 
-AliasAnalysis::Location
-AliasAnalysis::getArgLocation(ImmutableCallSite CS, unsigned ArgIdx,
-                              AliasAnalysis::ModRefResult &Mask) {
+AliasAnalysis::ModRefResult
+AliasAnalysis::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {
   assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
-  return AA->getArgLocation(CS, ArgIdx, Mask);
+  return AA->getArgModRefInfo(CS, ArgIdx);
 }
 
 void AliasAnalysis::deleteValue(Value *V) {
@@ -93,7 +92,7 @@ AliasAnalysis::getModRefInfo(Instruction *I, ImmutableCallSite Call) {
     // location this memory access defines.  The best we can say
     // is that if the call references what this instruction
     // defines, it must be clobbered by this location.
-    const AliasAnalysis::Location DefLoc = MemoryLocation::get(I);
+    const MemoryLocation DefLoc = MemoryLocation::get(I);
     if (getModRefInfo(Call, DefLoc) != AliasAnalysis::NoModRef)
       return AliasAnalysis::ModRef;
   }
@@ -101,8 +100,7 @@ AliasAnalysis::getModRefInfo(Instruction *I, ImmutableCallSite Call) {
 }
 
 AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(ImmutableCallSite CS,
-                             const Location &Loc) {
+AliasAnalysis::getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc) {
   assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
 
   ModRefBehavior MRB = getModRefBehavior(CS);
@@ -122,11 +120,11 @@ AliasAnalysis::getModRefInfo(ImmutableCallSite CS,
         const Value *Arg = *AI;
         if (!Arg->getType()->isPointerTy())
           continue;
-        ModRefResult ArgMask;
-        Location CSLoc =
-          getArgLocation(CS, (unsigned) std::distance(CS.arg_begin(), AI),
-                         ArgMask);
-        if (!isNoAlias(CSLoc, Loc)) {
+        unsigned ArgIdx = std::distance(CS.arg_begin(), AI);
+        MemoryLocation ArgLoc =
+            MemoryLocation::getForArgument(CS, ArgIdx, *TLI);
+        if (!isNoAlias(ArgLoc, Loc)) {
+          ModRefResult ArgMask = getArgModRefInfo(CS, ArgIdx);
           doesAlias = true;
           AllArgsMask = ModRefResult(AllArgsMask | ArgMask);
         }
@@ -183,18 +181,18 @@ AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
         const Value *Arg = *I;
         if (!Arg->getType()->isPointerTy())
           continue;
-        ModRefResult ArgMask;
-        Location CS2Loc =
-          getArgLocation(CS2, (unsigned) std::distance(CS2.arg_begin(), I),
-                         ArgMask);
-        // ArgMask indicates what CS2 might do to CS2Loc, and the dependence of
+        unsigned CS2ArgIdx = std::distance(CS2.arg_begin(), I);
+        auto CS2ArgLoc = MemoryLocation::getForArgument(CS2, CS2ArgIdx, *TLI);
+
+        // ArgMask indicates what CS2 might do to CS2ArgLoc, and the dependence of
         // CS1 on that location is the inverse.
+        ModRefResult ArgMask = getArgModRefInfo(CS2, CS2ArgIdx);
         if (ArgMask == Mod)
           ArgMask = ModRef;
         else if (ArgMask == Ref)
           ArgMask = Mod;
 
-        R = ModRefResult((R | (getModRefInfo(CS1, CS2Loc) & ArgMask)) & Mask);
+        R = ModRefResult((R | (getModRefInfo(CS1, CS2ArgLoc) & ArgMask)) & Mask);
         if (R == Mask)
           break;
       }
@@ -212,13 +210,14 @@ AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
         const Value *Arg = *I;
         if (!Arg->getType()->isPointerTy())
           continue;
-        ModRefResult ArgMask;
-        Location CS1Loc = getArgLocation(
-            CS1, (unsigned)std::distance(CS1.arg_begin(), I), ArgMask);
-        // ArgMask indicates what CS1 might do to CS1Loc; if CS1 might Mod
-        // CS1Loc, then we care about either a Mod or a Ref by CS2. If CS1
+        unsigned CS1ArgIdx = std::distance(CS1.arg_begin(), I);
+        auto CS1ArgLoc = MemoryLocation::getForArgument(CS1, CS1ArgIdx, *TLI);
+
+        // ArgMask indicates what CS1 might do to CS1ArgLoc; if CS1 might Mod
+        // CS1ArgLoc, then we care about either a Mod or a Ref by CS2. If CS1
         // might Ref, then we care only about a Mod by CS2.
-        ModRefResult ArgR = getModRefInfo(CS2, CS1Loc);
+        ModRefResult ArgMask = getArgModRefInfo(CS1, CS1ArgIdx);
+        ModRefResult ArgR = getModRefInfo(CS2, CS1ArgLoc);
         if (((ArgMask & Mod) != NoModRef && (ArgR & ModRef) != NoModRef) ||
             ((ArgMask & Ref) != NoModRef && (ArgR & Mod)    != NoModRef))
           R = ModRefResult((R | ArgMask) & Mask);
@@ -268,7 +267,7 @@ AliasAnalysis::getModRefBehavior(const Function *F) {
 //===----------------------------------------------------------------------===//
 
 AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(const LoadInst *L, const Location &Loc) {
+AliasAnalysis::getModRefInfo(const LoadInst *L, const MemoryLocation &Loc) {
   // Be conservative in the face of volatile/atomic.
   if (!L->isUnordered())
     return ModRef;
@@ -283,7 +282,7 @@ AliasAnalysis::getModRefInfo(const LoadInst *L, const Location &Loc) {
 }
 
 AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(const StoreInst *S, const Location &Loc) {
+AliasAnalysis::getModRefInfo(const StoreInst *S, const MemoryLocation &Loc) {
   // Be conservative in the face of volatile/atomic.
   if (!S->isUnordered())
     return ModRef;
@@ -306,7 +305,7 @@ AliasAnalysis::getModRefInfo(const StoreInst *S, const Location &Loc) {
 }
 
 AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(const VAArgInst *V, const Location &Loc) {
+AliasAnalysis::getModRefInfo(const VAArgInst *V, const MemoryLocation &Loc) {
 
   if (Loc.Ptr) {
     // If the va_arg address cannot alias the pointer in question, then the
@@ -325,7 +324,8 @@ AliasAnalysis::getModRefInfo(const VAArgInst *V, const Location &Loc) {
 }
 
 AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(const AtomicCmpXchgInst *CX, const Location &Loc) {
+AliasAnalysis::getModRefInfo(const AtomicCmpXchgInst *CX,
+                             const MemoryLocation &Loc) {
   // Acquire/Release cmpxchg has properties that matter for arbitrary addresses.
   if (CX->getSuccessOrdering() > Monotonic)
     return ModRef;
@@ -338,7 +338,8 @@ AliasAnalysis::getModRefInfo(const AtomicCmpXchgInst *CX, const Location &Loc) {
 }
 
 AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(const AtomicRMWInst *RMW, const Location &Loc) {
+AliasAnalysis::getModRefInfo(const AtomicRMWInst *RMW,
+                             const MemoryLocation &Loc) {
   // Acquire/Release atomicrmw has properties that matter for arbitrary addresses.
   if (RMW->getOrdering() > Monotonic)
     return ModRef;
@@ -354,10 +355,8 @@ AliasAnalysis::getModRefInfo(const AtomicRMWInst *RMW, const Location &Loc) {
 // BasicAA isn't willing to spend linear time determining whether an alloca
 // was captured before or after this particular call, while we are. However,
 // with a smarter AA in place, this test is just wasting compile time.
-AliasAnalysis::ModRefResult
-AliasAnalysis::callCapturesBefore(const Instruction *I,
-                                  const AliasAnalysis::Location &MemLoc,
-                                  DominatorTree *DT) {
+AliasAnalysis::ModRefResult AliasAnalysis::callCapturesBefore(
+    const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT) {
   if (!DT)
     return AliasAnalysis::ModRef;
 
@@ -390,8 +389,7 @@ AliasAnalysis::callCapturesBefore(const Instruction *I,
     // is impossible to alias the pointer we're checking.  If not, we have to
     // assume that the call could touch the pointer, even though it doesn't
     // escape.
-    if (isNoAlias(AliasAnalysis::Location(*CI),
-                  AliasAnalysis::Location(Object)))
+    if (isNoAlias(MemoryLocation(*CI), MemoryLocation(Object)))
       continue;
     if (CS.doesNotAccessMemory(ArgNo))
       continue;
@@ -431,14 +429,14 @@ void AliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
 /// if known, or a conservative value otherwise.
 ///
 uint64_t AliasAnalysis::getTypeStoreSize(Type *Ty) {
-  return DL ? DL->getTypeStoreSize(Ty) : UnknownSize;
+  return DL ? DL->getTypeStoreSize(Ty) : MemoryLocation::UnknownSize;
 }
 
 /// canBasicBlockModify - Return true if it is possible for execution of the
 /// specified basic block to modify the location Loc.
 ///
 bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB,
-                                        const Location &Loc) {
+                                        const MemoryLocation &Loc) {
   return canInstructionRangeModRef(BB.front(), BB.back(), Loc, Mod);
 }
 
@@ -449,7 +447,7 @@ bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB,
 /// I1 and I2 must be in the same basic block.
 bool AliasAnalysis::canInstructionRangeModRef(const Instruction &I1,
                                               const Instruction &I2,
-                                              const Location &Loc,
+                                              const MemoryLocation &Loc,
                                               const ModRefResult Mode) {
   assert(I1.getParent() == I2.getParent() &&
          "Instructions not in same basic block!");
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp
index a1bfba1..0112186 100644
--- a/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp
+++ b/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp
@@ -98,22 +98,24 @@ namespace {
     }
     
     // FIXME: We could count these too...
-    bool pointsToConstantMemory(const Location &Loc, bool OrLocal) override {
+    bool pointsToConstantMemory(const MemoryLocation &Loc,
+                                bool OrLocal) override {
       return getAnalysis<AliasAnalysis>().pointsToConstantMemory(Loc, OrLocal);
     }
 
     // Forwarding functions: just delegate to a real AA implementation, counting
     // the number of responses...
-    AliasResult alias(const Location &LocA, const Location &LocB) override;
+    AliasResult alias(const MemoryLocation &LocA,
+                      const MemoryLocation &LocB) override;
 
     ModRefResult getModRefInfo(ImmutableCallSite CS,
-                               const Location &Loc) override;
+                               const MemoryLocation &Loc) override;
     ModRefResult getModRefInfo(ImmutableCallSite CS1,
                                ImmutableCallSite CS2) override {
       return AliasAnalysis::getModRefInfo(CS1,CS2);
     }
   };
-}
+} // namespace
 
 char AliasAnalysisCounter::ID = 0;
 INITIALIZE_AG_PASS(AliasAnalysisCounter, AliasAnalysis, "count-aa",
@@ -124,7 +126,8 @@ ModulePass *llvm::createAliasAnalysisCounterPass() {
 }
 
 AliasAnalysis::AliasResult
-AliasAnalysisCounter::alias(const Location &LocA, const Location &LocB) {
+AliasAnalysisCounter::alias(const MemoryLocation &LocA,
+                            const MemoryLocation &LocB) {
   AliasResult R = getAnalysis<AliasAnalysis>().alias(LocA, LocB);
 
   const char *AliasString = nullptr;
@@ -150,7 +153,7 @@ AliasAnalysisCounter::alias(const Location &LocA, const Location &LocB) {
 
 AliasAnalysis::ModRefResult
 AliasAnalysisCounter::getModRefInfo(ImmutableCallSite CS,
-                                    const Location &Loc) {
+                                    const MemoryLocation &Loc) {
   ModRefResult R = getAnalysis<AliasAnalysis>().getModRefInfo(CS, Loc);
 
   const char *MRString = nullptr;
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
index dd6a3a0..1501b5f 100644
--- a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -47,8 +47,8 @@ static cl::opt<bool> EvalAAMD("evaluate-aa-metadata", cl::ReallyHidden);
 
 namespace {
   class AAEval : public FunctionPass {
-    unsigned NoAlias, MayAlias, PartialAlias, MustAlias;
-    unsigned NoModRef, Mod, Ref, ModRef;
+    unsigned NoAliasCount, MayAliasCount, PartialAliasCount, MustAliasCount;
+    unsigned NoModRefCount, ModCount, RefCount, ModRefCount;
 
   public:
     static char ID; // Pass identification, replacement for typeid
@@ -62,8 +62,8 @@ namespace {
     }
 
     bool doInitialization(Module &M) override {
-      NoAlias = MayAlias = PartialAlias = MustAlias = 0;
-      NoModRef = Mod = Ref = ModRef = 0;
+      NoAliasCount = MayAliasCount = PartialAliasCount = MustAliasCount = 0;
+      NoModRefCount = ModCount = RefCount = ModRefCount = 0;
 
       if (PrintAll) {
         PrintNoAlias = PrintMayAlias = true;
@@ -76,7 +76,7 @@ namespace {
     bool runOnFunction(Function &F) override;
     bool doFinalization(Module &M) override;
   };
-}
+} // namespace
 
 char AAEval::ID = 0;
 INITIALIZE_PASS_BEGIN(AAEval, "aa-eval",
@@ -186,29 +186,33 @@ bool AAEval::runOnFunction(Function &F) {
   // iterate over the worklist, and run the full (n^2)/2 disambiguations
   for (SetVector<Value *>::iterator I1 = Pointers.begin(), E = Pointers.end();
        I1 != E; ++I1) {
-    uint64_t I1Size = AliasAnalysis::UnknownSize;
+    uint64_t I1Size = MemoryLocation::UnknownSize;
     Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType();
     if (I1ElTy->isSized()) I1Size = AA.getTypeStoreSize(I1ElTy);
 
     for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) {
-      uint64_t I2Size = AliasAnalysis::UnknownSize;
+      uint64_t I2Size = MemoryLocation::UnknownSize;
       Type *I2ElTy =cast<PointerType>((*I2)->getType())->getElementType();
       if (I2ElTy->isSized()) I2Size = AA.getTypeStoreSize(I2ElTy);
 
       switch (AA.alias(*I1, I1Size, *I2, I2Size)) {
       case AliasAnalysis::NoAlias:
         PrintResults("NoAlias", PrintNoAlias, *I1, *I2, F.getParent());
-        ++NoAlias; break;
+        ++NoAliasCount;
+        break;
       case AliasAnalysis::MayAlias:
         PrintResults("MayAlias", PrintMayAlias, *I1, *I2, F.getParent());
-        ++MayAlias; break;
+        ++MayAliasCount;
+        break;
       case AliasAnalysis::PartialAlias:
         PrintResults("PartialAlias", PrintPartialAlias, *I1, *I2,
                      F.getParent());
-        ++PartialAlias; break;
+        ++PartialAliasCount;
+        break;
       case AliasAnalysis::MustAlias:
         PrintResults("MustAlias", PrintMustAlias, *I1, *I2, F.getParent());
-        ++MustAlias; break;
+        ++MustAliasCount;
+        break;
       }
     }
   }
@@ -224,19 +228,23 @@ bool AAEval::runOnFunction(Function &F) {
         case AliasAnalysis::NoAlias:
           PrintLoadStoreResults("NoAlias", PrintNoAlias, *I1, *I2,
                                 F.getParent());
-          ++NoAlias; break;
+          ++NoAliasCount;
+          break;
         case AliasAnalysis::MayAlias:
           PrintLoadStoreResults("MayAlias", PrintMayAlias, *I1, *I2,
                                 F.getParent());
-          ++MayAlias; break;
+          ++MayAliasCount;
+          break;
         case AliasAnalysis::PartialAlias:
           PrintLoadStoreResults("PartialAlias", PrintPartialAlias, *I1, *I2,
                                 F.getParent());
-          ++PartialAlias; break;
+          ++PartialAliasCount;
+          break;
         case AliasAnalysis::MustAlias:
           PrintLoadStoreResults("MustAlias", PrintMustAlias, *I1, *I2,
                                 F.getParent());
-          ++MustAlias; break;
+          ++MustAliasCount;
+          break;
         }
       }
     }
@@ -250,19 +258,23 @@ bool AAEval::runOnFunction(Function &F) {
         case AliasAnalysis::NoAlias:
           PrintLoadStoreResults("NoAlias", PrintNoAlias, *I1, *I2,
                                 F.getParent());
-          ++NoAlias; break;
+          ++NoAliasCount;
+          break;
         case AliasAnalysis::MayAlias:
           PrintLoadStoreResults("MayAlias", PrintMayAlias, *I1, *I2,
                                 F.getParent());
-          ++MayAlias; break;
+          ++MayAliasCount;
+          break;
         case AliasAnalysis::PartialAlias:
           PrintLoadStoreResults("PartialAlias", PrintPartialAlias, *I1, *I2,
                                 F.getParent());
-          ++PartialAlias; break;
+          ++PartialAliasCount;
+          break;
         case AliasAnalysis::MustAlias:
           PrintLoadStoreResults("MustAlias", PrintMustAlias, *I1, *I2,
                                 F.getParent());
-          ++MustAlias; break;
+          ++MustAliasCount;
+          break;
         }
       }
     }
@@ -275,23 +287,27 @@ bool AAEval::runOnFunction(Function &F) {
 
     for (SetVector<Value *>::iterator V = Pointers.begin(), Ve = Pointers.end();
          V != Ve; ++V) {
-      uint64_t Size = AliasAnalysis::UnknownSize;
+      uint64_t Size = MemoryLocation::UnknownSize;
       Type *ElTy = cast<PointerType>((*V)->getType())->getElementType();
       if (ElTy->isSized()) Size = AA.getTypeStoreSize(ElTy);
 
       switch (AA.getModRefInfo(*C, *V, Size)) {
       case AliasAnalysis::NoModRef:
         PrintModRefResults("NoModRef", PrintNoModRef, I, *V, F.getParent());
-        ++NoModRef; break;
+        ++NoModRefCount;
+        break;
       case AliasAnalysis::Mod:
         PrintModRefResults("Just Mod", PrintMod, I, *V, F.getParent());
-        ++Mod; break;
+        ++ModCount;
+        break;
       case AliasAnalysis::Ref:
         PrintModRefResults("Just Ref", PrintRef, I, *V, F.getParent());
-        ++Ref; break;
+        ++RefCount;
+        break;
       case AliasAnalysis::ModRef:
         PrintModRefResults("Both ModRef", PrintModRef, I, *V, F.getParent());
-        ++ModRef; break;
+        ++ModRefCount;
+        break;
       }
     }
   }
@@ -305,16 +321,20 @@ bool AAEval::runOnFunction(Function &F) {
       switch (AA.getModRefInfo(*C, *D)) {
       case AliasAnalysis::NoModRef:
         PrintModRefResults("NoModRef", PrintNoModRef, *C, *D, F.getParent());
-        ++NoModRef; break;
+        ++NoModRefCount;
+        break;
       case AliasAnalysis::Mod:
         PrintModRefResults("Just Mod", PrintMod, *C, *D, F.getParent());
-        ++Mod; break;
+        ++ModCount;
+        break;
       case AliasAnalysis::Ref:
         PrintModRefResults("Just Ref", PrintRef, *C, *D, F.getParent());
-        ++Ref; break;
+        ++RefCount;
+        break;
       case AliasAnalysis::ModRef:
         PrintModRefResults("Both ModRef", PrintModRef, *C, *D, F.getParent());
-        ++ModRef; break;
+        ++ModRefCount;
+        break;
       }
     }
   }
@@ -328,43 +348,47 @@ static void PrintPercent(unsigned Num, unsigned Sum) {
 }
 
 bool AAEval::doFinalization(Module &M) {
-  unsigned AliasSum = NoAlias + MayAlias + PartialAlias + MustAlias;
+  unsigned AliasSum =
+      NoAliasCount + MayAliasCount + PartialAliasCount + MustAliasCount;
   errs() << "===== Alias Analysis Evaluator Report =====\n";
   if (AliasSum == 0) {
     errs() << "  Alias Analysis Evaluator Summary: No pointers!\n";
   } else {
     errs() << "  " << AliasSum << " Total Alias Queries Performed\n";
-    errs() << "  " << NoAlias << " no alias responses ";
-    PrintPercent(NoAlias, AliasSum);
-    errs() << "  " << MayAlias << " may alias responses ";
-    PrintPercent(MayAlias, AliasSum);
-    errs() << "  " << PartialAlias << " partial alias responses ";
-    PrintPercent(PartialAlias, AliasSum);
-    errs() << "  " << MustAlias << " must alias responses ";
-    PrintPercent(MustAlias, AliasSum);
+    errs() << "  " << NoAliasCount << " no alias responses ";
+    PrintPercent(NoAliasCount, AliasSum);
+    errs() << "  " << MayAliasCount << " may alias responses ";
+    PrintPercent(MayAliasCount, AliasSum);
+    errs() << "  " << PartialAliasCount << " partial alias responses ";
+    PrintPercent(PartialAliasCount, AliasSum);
+    errs() << "  " << MustAliasCount << " must alias responses ";
+    PrintPercent(MustAliasCount, AliasSum);
     errs() << "  Alias Analysis Evaluator Pointer Alias Summary: "
-           << NoAlias*100/AliasSum  << "%/" << MayAlias*100/AliasSum << "%/"
-           << PartialAlias*100/AliasSum << "%/"
-           << MustAlias*100/AliasSum << "%\n";
+           << NoAliasCount * 100 / AliasSum << "%/"
+           << MayAliasCount * 100 / AliasSum << "%/"
+           << PartialAliasCount * 100 / AliasSum << "%/"
+           << MustAliasCount * 100 / AliasSum << "%\n";
   }
 
   // Display the summary for mod/ref analysis
-  unsigned ModRefSum = NoModRef + Mod + Ref + ModRef;
+  unsigned ModRefSum = NoModRefCount + ModCount + RefCount + ModRefCount;
   if (ModRefSum == 0) {
-    errs() << "  Alias Analysis Mod/Ref Evaluator Summary: no mod/ref!\n";
+    errs() << "  Alias Analysis Mod/Ref Evaluator Summary: no "
+              "mod/ref!\n";
   } else {
     errs() << "  " << ModRefSum << " Total ModRef Queries Performed\n";
-    errs() << "  " << NoModRef << " no mod/ref responses ";
-    PrintPercent(NoModRef, ModRefSum);
-    errs() << "  " << Mod << " mod responses ";
-    PrintPercent(Mod, ModRefSum);
-    errs() << "  " << Ref << " ref responses ";
-    PrintPercent(Ref, ModRefSum);
-    errs() << "  " << ModRef << " mod & ref responses ";
-    PrintPercent(ModRef, ModRefSum);
+    errs() << "  " << NoModRefCount << " no mod/ref responses ";
+    PrintPercent(NoModRefCount, ModRefSum);
+    errs() << "  " << ModCount << " mod responses ";
+    PrintPercent(ModCount, ModRefSum);
+    errs() << "  " << RefCount << " ref responses ";
+    PrintPercent(RefCount, ModRefSum);
+    errs() << "  " << ModRefCount << " mod & ref responses ";
+    PrintPercent(ModRefCount, ModRefSum);
     errs() << "  Alias Analysis Evaluator Mod/Ref Summary: "
-           << NoModRef*100/ModRefSum  << "%/" << Mod*100/ModRefSum << "%/"
-           << Ref*100/ModRefSum << "%/" << ModRef*100/ModRefSum << "%\n";
+           << NoModRefCount * 100 / ModRefSum << "%/"
+           << ModCount * 100 / ModRefSum << "%/" << RefCount * 100 / ModRefSum
+           << "%/" << ModRefCount * 100 / ModRefSum << "%\n";
   }
 
   return false;
diff --git a/contrib/llvm/lib/Analysis/AliasDebugger.cpp b/contrib/llvm/lib/Analysis/AliasDebugger.cpp
index f98b578..fde0eeb 100644
--- a/contrib/llvm/lib/Analysis/AliasDebugger.cpp
+++ b/contrib/llvm/lib/Analysis/AliasDebugger.cpp
@@ -94,7 +94,8 @@ namespace {
     //------------------------------------------------
     // Implement the AliasAnalysis API
     //
-    AliasResult alias(const Location &LocA, const Location &LocB) override {
+    AliasResult alias(const MemoryLocation &LocA,
+                      const MemoryLocation &LocB) override {
       assert(Vals.find(LocA.Ptr) != Vals.end() &&
              "Never seen value in AA before");
       assert(Vals.find(LocB.Ptr) != Vals.end() &&
@@ -103,7 +104,7 @@ namespace {
     }
 
     ModRefResult getModRefInfo(ImmutableCallSite CS,
-                               const Location &Loc) override {
+                               const MemoryLocation &Loc) override {
       assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before");
       return AliasAnalysis::getModRefInfo(CS, Loc);
     }
@@ -113,7 +114,8 @@ namespace {
       return AliasAnalysis::getModRefInfo(CS1,CS2);
     }
 
-    bool pointsToConstantMemory(const Location &Loc, bool OrLocal) override {
+    bool pointsToConstantMemory(const MemoryLocation &Loc,
+                                bool OrLocal) override {
       assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before");
       return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
     }
@@ -128,7 +130,7 @@ namespace {
     }
 
   };
-}
+} // namespace
 
 char AliasDebugger::ID = 0;
 INITIALIZE_AG_PASS(AliasDebugger, AliasAnalysis, "debug-aa",
diff --git a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
index 12c1c7d..f7a803c 100644
--- a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
+++ b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
@@ -45,13 +45,9 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
     PointerRec *R = AS.getSomePointer();
 
     // If the pointers are not a must-alias pair, this set becomes a may alias.
-    if (AA.alias(AliasAnalysis::Location(L->getValue(),
-                                         L->getSize(),
-                                         L->getAAInfo()),
-                 AliasAnalysis::Location(R->getValue(),
-                                         R->getSize(),
-                                         R->getAAInfo()))
-        != AliasAnalysis::MustAlias)
+    if (AA.alias(MemoryLocation(L->getValue(), L->getSize(), L->getAAInfo()),
+                 MemoryLocation(R->getValue(), R->getSize(), R->getAAInfo())) !=
+        AliasAnalysis::MustAlias)
       AliasTy = MayAlias;
   }
 
@@ -106,9 +102,8 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
     if (PointerRec *P = getSomePointer()) {
       AliasAnalysis &AA = AST.getAliasAnalysis();
       AliasAnalysis::AliasResult Result =
-        AA.alias(AliasAnalysis::Location(P->getValue(), P->getSize(),
-                                         P->getAAInfo()),
-                 AliasAnalysis::Location(Entry.getValue(), Size, AAInfo));
+          AA.alias(MemoryLocation(P->getValue(), P->getSize(), P->getAAInfo()),
+                   MemoryLocation(Entry.getValue(), Size, AAInfo));
       if (Result != AliasAnalysis::MustAlias)
         AliasTy = MayAlias;
       else                  // First entry of must alias must have maximum size!
@@ -156,26 +151,24 @@ bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size,
     // SOME value in the set.
     PointerRec *SomePtr = getSomePointer();
     assert(SomePtr && "Empty must-alias set??");
-    return AA.alias(AliasAnalysis::Location(SomePtr->getValue(),
-                                            SomePtr->getSize(),
-                                            SomePtr->getAAInfo()),
-                    AliasAnalysis::Location(Ptr, Size, AAInfo));
+    return AA.alias(MemoryLocation(SomePtr->getValue(), SomePtr->getSize(),
+                                   SomePtr->getAAInfo()),
+                    MemoryLocation(Ptr, Size, AAInfo));
   }
 
   // If this is a may-alias set, we have to check all of the pointers in the set
   // to be sure it doesn't alias the set...
   for (iterator I = begin(), E = end(); I != E; ++I)
-    if (AA.alias(AliasAnalysis::Location(Ptr, Size, AAInfo),
-                 AliasAnalysis::Location(I.getPointer(), I.getSize(),
-                                         I.getAAInfo())))
+    if (AA.alias(MemoryLocation(Ptr, Size, AAInfo),
+                 MemoryLocation(I.getPointer(), I.getSize(), I.getAAInfo())))
       return true;
 
   // Check the unknown instructions...
   if (!UnknownInsts.empty()) {
     for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i)
       if (AA.getModRefInfo(UnknownInsts[i],
-                           AliasAnalysis::Location(Ptr, Size, AAInfo)) !=
-            AliasAnalysis::NoModRef)
+                           MemoryLocation(Ptr, Size, AAInfo)) !=
+          AliasAnalysis::NoModRef)
         return true;
   }
 
@@ -196,10 +189,9 @@ bool AliasSet::aliasesUnknownInst(const Instruction *Inst,
   }
 
   for (iterator I = begin(), E = end(); I != E; ++I)
-    if (AA.getModRefInfo(Inst, AliasAnalysis::Location(I.getPointer(),
-                                                       I.getSize(),
-                                                       I.getAAInfo())) !=
-           AliasAnalysis::NoModRef)
+    if (AA.getModRefInfo(
+            Inst, MemoryLocation(I.getPointer(), I.getSize(), I.getAAInfo())) !=
+        AliasAnalysis::NoModRef)
       return true;
 
   return false;
@@ -345,8 +337,8 @@ bool AliasSetTracker::add(VAArgInst *VAAI) {
   VAAI->getAAMetadata(AAInfo);
 
   bool NewPtr;
-  addPointer(VAAI->getOperand(0), AliasAnalysis::UnknownSize, 
-             AAInfo, AliasSet::ModRef, NewPtr);
+  addPointer(VAAI->getOperand(0), MemoryLocation::UnknownSize, AAInfo,
+             AliasSet::ModRef, NewPtr);
   return NewPtr;
 }
 
@@ -479,7 +471,7 @@ bool AliasSetTracker::remove(VAArgInst *VAAI) {
   VAAI->getAAMetadata(AAInfo);
 
   AliasSet *AS = findAliasSetForPointer(VAAI->getOperand(0),
-                                        AliasAnalysis::UnknownSize, AAInfo);
+                                        MemoryLocation::UnknownSize, AAInfo);
   if (!AS) return false;
   remove(*AS);
   return true;
@@ -674,7 +666,7 @@ namespace {
       return false;
     }
   };
-}
+} // namespace
 
 char AliasSetPrinter::ID = 0;
 INITIALIZE_PASS_BEGIN(AliasSetPrinter, "print-alias-sets",
diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index a61faca..d11a748 100644
--- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -105,7 +105,7 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &DL,
   uint64_t Size;
   if (getObjectSize(V, Size, DL, &TLI, RoundToAlign))
     return Size;
-  return AliasAnalysis::UnknownSize;
+  return MemoryLocation::UnknownSize;
 }
 
 /// isObjectSmallerThan - Return true if we can prove that the object specified
@@ -146,7 +146,7 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size,
   // reads a bit past the end given sufficient alignment.
   uint64_t ObjectSize = getObjectSize(V, DL, TLI, /*RoundToAlign*/true);
 
-  return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize < Size;
+  return ObjectSize != MemoryLocation::UnknownSize && ObjectSize < Size;
 }
 
 /// isObjectSize - Return true if we can prove that the object specified
@@ -154,7 +154,7 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size,
 static bool isObjectSize(const Value *V, uint64_t Size,
                          const DataLayout &DL, const TargetLibraryInfo &TLI) {
   uint64_t ObjectSize = getObjectSize(V, DL, TLI);
-  return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize == Size;
+  return ObjectSize != MemoryLocation::UnknownSize && ObjectSize == Size;
 }
 
 //===----------------------------------------------------------------------===//
@@ -182,7 +182,7 @@ namespace {
       return !operator==(Other);
     }
   };
-}
+} // namespace
 
 
 /// GetLinearExpression - Analyze the specified value as a linear expression:
@@ -459,7 +459,8 @@ namespace {
       AU.addRequired<TargetLibraryInfoWrapperPass>();
     }
 
-    AliasResult alias(const Location &LocA, const Location &LocB) override {
+    AliasResult alias(const MemoryLocation &LocA,
+                      const MemoryLocation &LocB) override {
       assert(AliasCache.empty() && "AliasCache must be cleared after use!");
       assert(notDifferentParent(LocA.Ptr, LocB.Ptr) &&
              "BasicAliasAnalysis doesn't support interprocedural queries.");
@@ -475,18 +476,19 @@ namespace {
     }
 
     ModRefResult getModRefInfo(ImmutableCallSite CS,
-                               const Location &Loc) override;
+                               const MemoryLocation &Loc) override;
 
     ModRefResult getModRefInfo(ImmutableCallSite CS1,
                                ImmutableCallSite CS2) override;
 
     /// pointsToConstantMemory - Chase pointers until we find a (constant
     /// global) or not.
-    bool pointsToConstantMemory(const Location &Loc, bool OrLocal) override;
+    bool pointsToConstantMemory(const MemoryLocation &Loc,
+                                bool OrLocal) override;
 
     /// Get the location associated with a pointer argument of a callsite.
-    Location getArgLocation(ImmutableCallSite CS, unsigned ArgIdx,
-                            ModRefResult &Mask) override;
+    ModRefResult getArgModRefInfo(ImmutableCallSite CS,
+                                  unsigned ArgIdx) override;
 
     /// getModRefBehavior - Return the behavior when calling the given
     /// call site.
@@ -508,7 +510,7 @@ namespace {
 
   private:
     // AliasCache - Track alias queries to guard against recursion.
-    typedef std::pair<Location, Location> LocPair;
+    typedef std::pair<MemoryLocation, MemoryLocation> LocPair;
     typedef SmallDenseMap<LocPair, AliasResult, 8> AliasCacheTy;
     AliasCacheTy AliasCache;
 
@@ -592,8 +594,8 @@ ImmutablePass *llvm::createBasicAliasAnalysisPass() {
 /// pointsToConstantMemory - Returns whether the given pointer value
 /// points to memory that is local to the function, with global constants being
 /// considered local to all functions.
-bool
-BasicAliasAnalysis::pointsToConstantMemory(const Location &Loc, bool OrLocal) {
+bool BasicAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc,
+                                                bool OrLocal) {
   assert(Visited.empty() && "Visited must be cleared after use!");
 
   unsigned MaxLookup = 8;
@@ -652,6 +654,8 @@ BasicAliasAnalysis::pointsToConstantMemory(const Location &Loc, bool OrLocal) {
   return Worklist.empty();
 }
 
+// FIXME: This code is duplicated with MemoryLocation and should be hoisted to
+// some common utility location.
 static bool isMemsetPattern16(const Function *MS,
                               const TargetLibraryInfo &TLI) {
   if (TLI.has(LibFunc::memset_pattern16) &&
@@ -715,84 +719,33 @@ BasicAliasAnalysis::getModRefBehavior(const Function *F) {
   return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min);
 }
 
-AliasAnalysis::Location
-BasicAliasAnalysis::getArgLocation(ImmutableCallSite CS, unsigned ArgIdx,
-                                   ModRefResult &Mask) {
-  Location Loc = AliasAnalysis::getArgLocation(CS, ArgIdx, Mask);
-  const TargetLibraryInfo &TLI =
-      getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
-  const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction());
-  if (II != nullptr)
+AliasAnalysis::ModRefResult
+BasicAliasAnalysis::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {
+  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction()))
     switch (II->getIntrinsicID()) {
-    default: break;
+    default:
+      break;
     case Intrinsic::memset:
     case Intrinsic::memcpy:
-    case Intrinsic::memmove: {
+    case Intrinsic::memmove:
       assert((ArgIdx == 0 || ArgIdx == 1) &&
              "Invalid argument index for memory intrinsic");
-      if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2)))
-        Loc.Size = LenCI->getZExtValue();
-      assert(Loc.Ptr == II->getArgOperand(ArgIdx) &&
-             "Memory intrinsic location pointer not argument?");
-      Mask = ArgIdx ? Ref : Mod;
-      break;
-    }
-    case Intrinsic::lifetime_start:
-    case Intrinsic::lifetime_end:
-    case Intrinsic::invariant_start: {
-      assert(ArgIdx == 1 && "Invalid argument index");
-      assert(Loc.Ptr == II->getArgOperand(ArgIdx) &&
-             "Intrinsic location pointer not argument?");
-      Loc.Size = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
-      break;
-    }
-    case Intrinsic::invariant_end: {
-      assert(ArgIdx == 2 && "Invalid argument index");
-      assert(Loc.Ptr == II->getArgOperand(ArgIdx) &&
-             "Intrinsic location pointer not argument?");
-      Loc.Size = cast<ConstantInt>(II->getArgOperand(1))->getZExtValue();
-      break;
-    }
-    case Intrinsic::arm_neon_vld1: {
-      assert(ArgIdx == 0 && "Invalid argument index");
-      assert(Loc.Ptr == II->getArgOperand(ArgIdx) &&
-             "Intrinsic location pointer not argument?");
-      // LLVM's vld1 and vst1 intrinsics currently only support a single
-      // vector register.
-      if (DL)
-        Loc.Size = DL->getTypeStoreSize(II->getType());
-      break;
-    }
-    case Intrinsic::arm_neon_vst1: {
-      assert(ArgIdx == 0 && "Invalid argument index");
-      assert(Loc.Ptr == II->getArgOperand(ArgIdx) &&
-             "Intrinsic location pointer not argument?");
-      if (DL)
-        Loc.Size = DL->getTypeStoreSize(II->getArgOperand(1)->getType());
-      break;
-    }
+      return ArgIdx ? Ref : Mod;
     }
 
   // We can bound the aliasing properties of memset_pattern16 just as we can
   // for memcpy/memset.  This is particularly important because the
   // LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16
   // whenever possible.
-  else if (CS.getCalledFunction() &&
-           isMemsetPattern16(CS.getCalledFunction(), TLI)) {
+  if (CS.getCalledFunction() &&
+      isMemsetPattern16(CS.getCalledFunction(), *TLI)) {
     assert((ArgIdx == 0 || ArgIdx == 1) &&
            "Invalid argument index for memset_pattern16");
-    if (ArgIdx == 1)
-      Loc.Size = 16;
-    else if (const ConstantInt *LenCI =
-             dyn_cast<ConstantInt>(CS.getArgument(2)))
-      Loc.Size = LenCI->getZExtValue();
-    assert(Loc.Ptr == CS.getArgument(ArgIdx) &&
-           "memset_pattern16 location pointer not argument?");
-    Mask = ArgIdx ? Ref : Mod;
+    return ArgIdx ? Ref : Mod;
   }
   // FIXME: Handle memset_pattern4 and memset_pattern8 also.
 
-  return Loc;
+  return AliasAnalysis::getArgModRefInfo(CS, ArgIdx);
 }
 
 static bool isAssumeIntrinsic(ImmutableCallSite CS) {
@@ -814,7 +767,7 @@ bool BasicAliasAnalysis::doInitialization(Module &M) {
 /// simple "address taken" analysis on local objects.
 AliasAnalysis::ModRefResult
 BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
-                                  const Location &Loc) {
+                                  const MemoryLocation &Loc) {
   assert(notDifferentParent(CS.getInstruction(), Loc.Ptr) &&
          "AliasAnalysis query involving multiple functions!");
 
@@ -850,7 +803,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
       // is impossible to alias the pointer we're checking.  If not, we have to
       // assume that the call could touch the pointer, even though it doesn't
       // escape.
-      if (!isNoAlias(Location(*CI), Location(Object))) {
+      if (!isNoAlias(MemoryLocation(*CI), MemoryLocation(Object))) {
         PassedAsArg = true;
         break;
       }
@@ -902,8 +855,8 @@ aliasSameBasePointerGEPs(const GEPOperator *GEP1, uint64_t V1Size,
 
   // If we don't know the size of the accesses through both GEPs, we can't
   // determine whether the struct fields accessed can't alias.
-  if (V1Size == AliasAnalysis::UnknownSize ||
-      V2Size == AliasAnalysis::UnknownSize)
+  if (V1Size == MemoryLocation::UnknownSize ||
+      V2Size == MemoryLocation::UnknownSize)
     return AliasAnalysis::MayAlias;
 
   ConstantInt *C1 =
@@ -1017,8 +970,9 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
   // derived pointer.
   if (const GEPOperator *GEP2 = dyn_cast<GEPOperator>(V2)) {
     // Do the base pointers alias?
-    AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, AAMDNodes(),
-                                       UnderlyingV2, UnknownSize, AAMDNodes());
+    AliasResult BaseAlias =
+        aliasCheck(UnderlyingV1, MemoryLocation::UnknownSize, AAMDNodes(),
+                   UnderlyingV2, MemoryLocation::UnknownSize, AAMDNodes());
 
     // Check for geps of non-aliasing underlying pointers where the offsets are
     // identical.
@@ -1109,11 +1063,12 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
     // pointer, we know they cannot alias.
 
     // If both accesses are unknown size, we can't do anything useful here.
-    if (V1Size == UnknownSize && V2Size == UnknownSize)
+    if (V1Size == MemoryLocation::UnknownSize &&
+        V2Size == MemoryLocation::UnknownSize)
       return MayAlias;
 
-    AliasResult R = aliasCheck(UnderlyingV1, UnknownSize, AAMDNodes(),
-                               V2, V2Size, V2AAInfo);
+    AliasResult R = aliasCheck(UnderlyingV1, MemoryLocation::UnknownSize,
+                               AAMDNodes(), V2, V2Size, V2AAInfo);
     if (R != MustAlias)
       // If V2 may alias GEP base pointer, conservatively returns MayAlias.
       // If V2 is known not to alias GEP base pointer, then the two values
@@ -1153,7 +1108,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
   // greater, we know they do not overlap.
   if (GEP1BaseOffset != 0 && GEP1VariableIndices.empty()) {
     if (GEP1BaseOffset >= 0) {
-      if (V2Size != UnknownSize) {
+      if (V2Size != MemoryLocation::UnknownSize) {
         if ((uint64_t)GEP1BaseOffset < V2Size)
           return PartialAlias;
         return NoAlias;
@@ -1167,7 +1122,8 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
       // GEP1             V2
       // We need to know that V2Size is not unknown, otherwise we might have
       // stripped a gep with negative index ('gep <ptr>, -1, ...).
-      if (V1Size != UnknownSize && V2Size != UnknownSize) {
+      if (V1Size != MemoryLocation::UnknownSize &&
+          V2Size != MemoryLocation::UnknownSize) {
         if (-(uint64_t)GEP1BaseOffset < V1Size)
           return PartialAlias;
         return NoAlias;
@@ -1218,8 +1174,9 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
     // mod Modulo. Check whether that difference guarantees that the
     // two locations do not alias.
     uint64_t ModOffset = (uint64_t)GEP1BaseOffset & (Modulo - 1);
-    if (V1Size != UnknownSize && V2Size != UnknownSize &&
-        ModOffset >= V2Size && V1Size <= Modulo - ModOffset)
+    if (V1Size != MemoryLocation::UnknownSize &&
+        V2Size != MemoryLocation::UnknownSize && ModOffset >= V2Size &&
+        V1Size <= Modulo - ModOffset)
       return NoAlias;
 
     // If we know all the variables are positive, then GEP1 >= GEP1BasePtr.
@@ -1302,8 +1259,8 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
   // on corresponding edges.
   if (const PHINode *PN2 = dyn_cast<PHINode>(V2))
     if (PN2->getParent() == PN->getParent()) {
-      LocPair Locs(Location(PN, PNSize, PNAAInfo),
-                   Location(V2, V2Size, V2AAInfo));
+      LocPair Locs(MemoryLocation(PN, PNSize, PNAAInfo),
+                   MemoryLocation(V2, V2Size, V2AAInfo));
       if (PN > V2)
         std::swap(Locs.first, Locs.second);
       // Analyse the PHIs' inputs under the assumption that the PHIs are
@@ -1457,14 +1414,16 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
   // If the size of one access is larger than the entire object on the other
   // side, then we know such behavior is undefined and can assume no alias.
   if (DL)
-    if ((V1Size != UnknownSize && isObjectSmallerThan(O2, V1Size, *DL, *TLI)) ||
-        (V2Size != UnknownSize && isObjectSmallerThan(O1, V2Size, *DL, *TLI)))
+    if ((V1Size != MemoryLocation::UnknownSize &&
+         isObjectSmallerThan(O2, V1Size, *DL, *TLI)) ||
+        (V2Size != MemoryLocation::UnknownSize &&
+         isObjectSmallerThan(O1, V2Size, *DL, *TLI)))
       return NoAlias;
 
   // Check the cache before climbing up use-def chains. This also terminates
   // otherwise infinitely recursive queries.
-  LocPair Locs(Location(V1, V1Size, V1AAInfo),
-               Location(V2, V2Size, V2AAInfo));
+  LocPair Locs(MemoryLocation(V1, V1Size, V1AAInfo),
+               MemoryLocation(V2, V2Size, V2AAInfo));
   if (V1 > V2)
     std::swap(Locs.first, Locs.second);
   std::pair<AliasCacheTy::iterator, bool> Pair =
@@ -1511,13 +1470,15 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
   // accesses is accessing the entire object, then the accesses must
   // overlap in some way.
   if (DL && O1 == O2)
-    if ((V1Size != UnknownSize && isObjectSize(O1, V1Size, *DL, *TLI)) ||
-        (V2Size != UnknownSize && isObjectSize(O2, V2Size, *DL, *TLI)))
+    if ((V1Size != MemoryLocation::UnknownSize &&
+         isObjectSize(O1, V1Size, *DL, *TLI)) ||
+        (V2Size != MemoryLocation::UnknownSize &&
+         isObjectSize(O2, V2Size, *DL, *TLI)))
       return AliasCache[Locs] = PartialAlias;
 
   AliasResult Result =
-    AliasAnalysis::alias(Location(V1, V1Size, V1AAInfo),
-                         Location(V2, V2Size, V2AAInfo));
+      AliasAnalysis::alias(MemoryLocation(V1, V1Size, V1AAInfo),
+                           MemoryLocation(V2, V2Size, V2AAInfo));
   return AliasCache[Locs] = Result;
 }
 
diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
index 456cee1..daa77b8 100644
--- a/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -286,7 +286,7 @@ bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist,
 
   if (isLoopHeader(Resolved)) {
     DEBUG(debugSuccessor("backedge"));
-    Dist.addBackedge(OuterLoop->getHeader(), Weight);
+    Dist.addBackedge(Resolved, Weight);
     return true;
   }
 
@@ -349,7 +349,10 @@ void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) {
 
   // LoopScale == 1 / ExitMass
   // ExitMass == HeadMass - BackedgeMass
-  BlockMass ExitMass = BlockMass::getFull() - Loop.BackedgeMass;
+  BlockMass TotalBackedgeMass;
+  for (auto &Mass : Loop.BackedgeMass)
+    TotalBackedgeMass += Mass;
+  BlockMass ExitMass = BlockMass::getFull() - TotalBackedgeMass;
 
   // Block scale stores the inverse of the scale. If this is an infinite loop,
   // its exit mass will be zero. In this case, use an arbitrary scale for the
@@ -358,7 +361,7 @@ void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) {
       ExitMass.isEmpty() ? InifiniteLoopScale : ExitMass.toScaled().inverse();
 
   DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull()
-               << " - " << Loop.BackedgeMass << ")\n"
+               << " - " << TotalBackedgeMass << ")\n"
                << " - scale = " << Loop.Scale << "\n");
 }
 
@@ -375,6 +378,19 @@ void BlockFrequencyInfoImplBase::packageLoop(LoopData &Loop) {
   Loop.IsPackaged = true;
 }
 
+#ifndef NDEBUG
+static void debugAssign(const BlockFrequencyInfoImplBase &BFI,
+                        const DitheringDistributer &D, const BlockNode &T,
+                        const BlockMass &M, const char *Desc) {
+  dbgs() << "  => assign " << M << " (" << D.RemMass << ")";
+  if (Desc)
+    dbgs() << " [" << Desc << "]";
+  if (T.isValid())
+    dbgs() << " to " << BFI.getBlockName(T);
+  dbgs() << "\n";
+}
+#endif
+
 void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
                                                 LoopData *OuterLoop,
                                                 Distribution &Dist) {
@@ -384,25 +400,12 @@ void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
   // Distribute mass to successors as laid out in Dist.
   DitheringDistributer D(Dist, Mass);
 
-#ifndef NDEBUG
-  auto debugAssign = [&](const BlockNode &T, const BlockMass &M,
-                         const char *Desc) {
-    dbgs() << "  => assign " << M << " (" << D.RemMass << ")";
-    if (Desc)
-      dbgs() << " [" << Desc << "]";
-    if (T.isValid())
-      dbgs() << " to " << getBlockName(T);
-    dbgs() << "\n";
-  };
-  (void)debugAssign;
-#endif
-
   for (const Weight &W : Dist.Weights) {
     // Check for a local edge (non-backedge and non-exit).
     BlockMass Taken = D.takeMass(W.Amount);
     if (W.Type == Weight::Local) {
       Working[W.TargetNode.Index].getMass() += Taken;
-      DEBUG(debugAssign(W.TargetNode, Taken, nullptr));
+      DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
       continue;
     }
 
@@ -411,15 +414,15 @@ void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
 
     // Check for a backedge.
     if (W.Type == Weight::Backedge) {
-      OuterLoop->BackedgeMass += Taken;
-      DEBUG(debugAssign(BlockNode(), Taken, "back"));
+      OuterLoop->BackedgeMass[OuterLoop->getHeaderIndex(W.TargetNode)] += Taken;
+      DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "back"));
       continue;
     }
 
     // This must be an exit.
     assert(W.Type == Weight::Exit);
     OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken));
-    DEBUG(debugAssign(W.TargetNode, Taken, "exit"));
+    DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "exit"));
   }
 }
 
@@ -595,7 +598,7 @@ template <> struct GraphTraits<IrreducibleGraph> {
   static ChildIteratorType child_begin(NodeType *N) { return N->succ_begin(); }
   static ChildIteratorType child_end(NodeType *N) { return N->succ_end(); }
 };
-}
+} // namespace llvm
 
 /// \brief Find extra irreducible headers.
 ///
@@ -713,10 +716,44 @@ BlockFrequencyInfoImplBase::analyzeIrreducible(
 void
 BlockFrequencyInfoImplBase::updateLoopWithIrreducible(LoopData &OuterLoop) {
   OuterLoop.Exits.clear();
-  OuterLoop.BackedgeMass = BlockMass::getEmpty();
+  for (auto &Mass : OuterLoop.BackedgeMass)
+    Mass = BlockMass::getEmpty();
   auto O = OuterLoop.Nodes.begin() + 1;
   for (auto I = O, E = OuterLoop.Nodes.end(); I != E; ++I)
     if (!Working[I->Index].isPackaged())
       *O++ = *I;
   OuterLoop.Nodes.erase(O, OuterLoop.Nodes.end());
 }
+
+void BlockFrequencyInfoImplBase::adjustLoopHeaderMass(LoopData &Loop) {
+  assert(Loop.isIrreducible() && "this only makes sense on irreducible loops");
+
+  // Since the loop has more than one header block, the mass flowing back into
+  // each header will be different. Adjust the mass in each header loop to
+  // reflect the masses flowing through back edges.
+  //
+  // To do this, we distribute the initial mass using the backedge masses
+  // as weights for the distribution.
+  BlockMass LoopMass = BlockMass::getFull();
+  Distribution Dist;
+
+  DEBUG(dbgs() << "adjust-loop-header-mass:\n");
+  for (uint32_t H = 0; H < Loop.NumHeaders; ++H) {
+    auto &HeaderNode = Loop.Nodes[H];
+    auto &BackedgeMass = Loop.BackedgeMass[Loop.getHeaderIndex(HeaderNode)];
+    DEBUG(dbgs() << " - Add back edge mass for node "
+                 << getBlockName(HeaderNode) << ": " << BackedgeMass << "\n");
+    Dist.addLocal(HeaderNode, BackedgeMass.getMass());
+  }
+
+  DitheringDistributer D(Dist, LoopMass);
+
+  DEBUG(dbgs() << " Distribute loop mass " << LoopMass
+               << " to headers using above weights\n");
+  for (const Weight &W : Dist.Weights) {
+    BlockMass Taken = D.takeMass(W.Amount);
+    assert(W.Type == Weight::Local && "all weights should be local");
+    Working[W.TargetNode.Index].getMass() = Taken;
+    DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
+  }
+}
diff --git a/contrib/llvm/lib/Analysis/CFGPrinter.cpp b/contrib/llvm/lib/Analysis/CFGPrinter.cpp
index c86f1f5..edd02c2 100644
--- a/contrib/llvm/lib/Analysis/CFGPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/CFGPrinter.cpp
@@ -40,7 +40,7 @@ namespace {
       AU.setPreservesAll();
     }
   };
-}
+} // namespace
 
 char CFGViewer::ID = 0;
 INITIALIZE_PASS(CFGViewer, "view-cfg", "View CFG of function", false, true)
@@ -63,7 +63,7 @@ namespace {
       AU.setPreservesAll();
     }
   };
-}
+} // namespace
 
 char CFGOnlyViewer::ID = 0;
 INITIALIZE_PASS(CFGOnlyViewer, "view-cfg-only",
@@ -97,7 +97,7 @@ namespace {
       AU.setPreservesAll();
     }
   };
-}
+} // namespace
 
 char CFGPrinter::ID = 0;
 INITIALIZE_PASS(CFGPrinter, "dot-cfg", "Print CFG of function to 'dot' file", 
@@ -130,7 +130,7 @@ namespace {
       AU.setPreservesAll();
     }
   };
-}
+} // namespace
 
 char CFGOnlyPrinter::ID = 0;
 INITIALIZE_PASS(CFGOnlyPrinter, "dot-cfg-only",
diff --git a/contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp
index 84b31df..d937c0b 100644
--- a/contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp
@@ -14,8 +14,7 @@
 // Alias Analysis" by Zhang Q, Lyu M R, Yuan H, and Su Z. -- to summarize the
 // papers, we build a graph of the uses of a variable, where each node is a
 // memory location, and each edge is an action that happened on that memory
-// location.  The "actions" can be one of Dereference, Reference, Assign, or
-// Assign.
+// location.  The "actions" can be one of Dereference, Reference, or Assign.
 //
 // Two variables are considered as aliasing iff you can reach one value's node
 // from the other value's node and the language formed by concatenating all of
@@ -219,9 +218,10 @@ public:
     return Iter->second;
   }
 
-  AliasResult query(const Location &LocA, const Location &LocB);
+  AliasResult query(const MemoryLocation &LocA, const MemoryLocation &LocB);
 
-  AliasResult alias(const Location &LocA, const Location &LocB) override {
+  AliasResult alias(const MemoryLocation &LocA,
+                    const MemoryLocation &LocB) override {
     if (LocA.Ptr == LocB.Ptr) {
       if (LocA.Size == LocB.Size) {
         return MustAlias;
@@ -539,6 +539,19 @@ public:
     Output.push_back(Edge(&Inst, From1, EdgeType::Assign, AttrNone));
     Output.push_back(Edge(&Inst, From2, EdgeType::Assign, AttrNone));
   }
+
+  void visitConstantExpr(ConstantExpr *CE) {
+    switch (CE->getOpcode()) {
+    default:
+      llvm_unreachable("Unknown instruction type encountered!");
+// Build the switch statement using the Instruction.def file.
+#define HANDLE_INST(NUM, OPCODE, CLASS)                                        \
+  case Instruction::OPCODE:                                                    \
+    visit##OPCODE(*(CLASS *)CE);                                               \
+    break;
+#include "llvm/IR/Instruction.def"
+    }
+  }
 };
 
 // For a given instruction, we need to know which Value* to get the
@@ -712,7 +725,7 @@ public:
 
 typedef WeightedBidirectionalGraph<std::pair<EdgeType, StratifiedAttrs>> GraphT;
 typedef DenseMap<Value *, GraphT::Node> NodeMapT;
-}
+} // namespace
 
 // -- Setting up/registering CFLAA pass -- //
 char CFLAliasAnalysis::ID = 0;
@@ -741,6 +754,10 @@ static EdgeType flipWeight(EdgeType);
 static void argsToEdges(CFLAliasAnalysis &, Instruction *,
                         SmallVectorImpl<Edge> &);
 
+// Gets edges of the given ConstantExpr*, writing them to the SmallVector*.
+static void argsToEdges(CFLAliasAnalysis &, ConstantExpr *,
+                        SmallVectorImpl<Edge> &);
+
 // Gets the "Level" that one should travel in StratifiedSets
 // given an EdgeType.
 static Level directionOfEdgeType(EdgeType);
@@ -807,6 +824,13 @@ static bool hasUsefulEdges(Instruction *Inst) {
   return !isa<CmpInst>(Inst) && !isa<FenceInst>(Inst) && !IsNonInvokeTerminator;
 }
 
+static bool hasUsefulEdges(ConstantExpr *CE) {
+  // ConstantExpr doens't have terminators, invokes, or fences, so only needs
+  // to check for compares.
+  return CE->getOpcode() != Instruction::ICmp &&
+         CE->getOpcode() != Instruction::FCmp;
+}
+
 static Optional<StratifiedAttr> valueToAttrIndex(Value *Val) {
   if (isa<GlobalValue>(Val))
     return AttrGlobalIndex;
@@ -846,6 +870,13 @@ static void argsToEdges(CFLAliasAnalysis &Analysis, Instruction *Inst,
   v.visit(Inst);
 }
 
+static void argsToEdges(CFLAliasAnalysis &Analysis, ConstantExpr *CE,
+                        SmallVectorImpl<Edge> &Output) {
+  assert(hasUsefulEdges(CE) && "Expected constant expr to have 'useful' edges");
+  GetEdgesVisitor v(Analysis, Output);
+  v.visitConstantExpr(CE);
+}
+
 static Level directionOfEdgeType(EdgeType Weight) {
   switch (Weight) {
   case EdgeType::Reference:
@@ -865,25 +896,23 @@ static void constexprToEdges(CFLAliasAnalysis &Analysis,
   Worklist.push_back(&CExprToCollapse);
 
   SmallVector<Edge, 8> ConstexprEdges;
+  SmallPtrSet<ConstantExpr *, 4> Visited;
   while (!Worklist.empty()) {
     auto *CExpr = Worklist.pop_back_val();
-    std::unique_ptr<Instruction> Inst(CExpr->getAsInstruction());
 
-    if (!hasUsefulEdges(Inst.get()))
+    if (!hasUsefulEdges(CExpr))
       continue;
 
     ConstexprEdges.clear();
-    argsToEdges(Analysis, Inst.get(), ConstexprEdges);
+    argsToEdges(Analysis, CExpr, ConstexprEdges);
     for (auto &Edge : ConstexprEdges) {
-      if (Edge.From == Inst.get())
-        Edge.From = CExpr;
-      else if (auto *Nested = dyn_cast<ConstantExpr>(Edge.From))
-        Worklist.push_back(Nested);
-
-      if (Edge.To == Inst.get())
-        Edge.To = CExpr;
-      else if (auto *Nested = dyn_cast<ConstantExpr>(Edge.To))
-        Worklist.push_back(Nested);
+      if (auto *Nested = dyn_cast<ConstantExpr>(Edge.From))
+        if (Visited.insert(Nested).second)
+          Worklist.push_back(Nested);
+
+      if (auto *Nested = dyn_cast<ConstantExpr>(Edge.To))
+        if (Visited.insert(Nested).second)
+          Worklist.push_back(Nested);
     }
 
     Results.append(ConstexprEdges.begin(), ConstexprEdges.end());
@@ -1080,9 +1109,8 @@ void CFLAliasAnalysis::scan(Function *Fn) {
   Handles.push_front(FunctionHandle(Fn, this));
 }
 
-AliasAnalysis::AliasResult
-CFLAliasAnalysis::query(const AliasAnalysis::Location &LocA,
-                        const AliasAnalysis::Location &LocB) {
+AliasAnalysis::AliasResult CFLAliasAnalysis::query(const MemoryLocation &LocA,
+                                                   const MemoryLocation &LocB) {
   auto *ValA = const_cast<Value *>(LocA.Ptr);
   auto *ValB = const_cast<Value *>(LocB.Ptr);
 
diff --git a/contrib/llvm/lib/Analysis/CaptureTracking.cpp b/contrib/llvm/lib/Analysis/CaptureTracking.cpp
index 5a54754..92f6932 100644
--- a/contrib/llvm/lib/Analysis/CaptureTracking.cpp
+++ b/contrib/llvm/lib/Analysis/CaptureTracking.cpp
@@ -110,7 +110,7 @@ namespace {
 
     bool Captured;
   };
-}
+} // namespace
 
 /// PointerMayBeCaptured - Return true if this pointer value may be captured
 /// by the enclosing function (which is required to exist).  This routine can
diff --git a/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp
index e5ee295..3765adf 100644
--- a/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp
@@ -284,7 +284,7 @@ void DivergencePropagator::propagate() {
   }
 }
 
-} /// end namespace anonymous
+} // namespace
 
 FunctionPass *llvm::createDivergenceAnalysisPass() {
   return new DivergenceAnalysis();
diff --git a/contrib/llvm/lib/Analysis/DomPrinter.cpp b/contrib/llvm/lib/Analysis/DomPrinter.cpp
index 0c880df..0e0d174 100644
--- a/contrib/llvm/lib/Analysis/DomPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/DomPrinter.cpp
@@ -78,7 +78,7 @@ struct DOTGraphTraits<PostDominatorTree*>
     return DOTGraphTraits<DomTreeNode*>::getNodeLabel(Node, G->getRootNode());
   }
 };
-}
+} // namespace llvm
 
 namespace {
 struct DominatorTreeWrapperPassAnalysisGraphTraits {
diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp b/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp
index 67cf7f8..e2799d9 100644
--- a/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp
+++ b/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp
@@ -24,8 +24,8 @@ CallGraph::CallGraph(Module &M)
     : M(M), Root(nullptr), ExternalCallingNode(getOrInsertFunction(nullptr)),
       CallsExternalNode(new CallGraphNode(nullptr)) {
   // Add every function to the call graph.
-  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
-    addToCallGraph(I);
+  for (Function &F : M)
+    addToCallGraph(&F);
 
   // If we didn't find a main function, use the external call graph node
   if (!Root)
@@ -40,13 +40,11 @@ CallGraph::~CallGraph() {
 // Reset all node's use counts to zero before deleting them to prevent an
 // assertion from firing.
 #ifndef NDEBUG
-  for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
-       I != E; ++I)
-    I->second->allReferencesDropped();
+  for (auto &I : FunctionMap)
+    I.second->allReferencesDropped();
 #endif
-  for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
-       I != E; ++I)
-    delete I->second;
+  for (auto &I : FunctionMap)
+    delete I.second;
 }
 
 void CallGraph::addToCallGraph(Function *F) {
@@ -81,8 +79,10 @@ void CallGraph::addToCallGraph(Function *F) {
       CallSite CS(cast<Value>(II));
       if (CS) {
         const Function *Callee = CS.getCalledFunction();
-        if (!Callee)
+        if (!Callee || !Intrinsic::isLeaf(Callee->getIntrinsicID()))
           // Indirect calls of intrinsics are not allowed so no need to check.
+          // We can be more precise here by using TargetArg returned by
+          // Intrinsic::isLeaf.
           Node->addCalledFunction(CS, CallsExternalNode);
         else if (!Callee->isIntrinsic())
           Node->addCalledFunction(CS, getOrInsertFunction(Callee));
@@ -98,8 +98,26 @@ void CallGraph::print(raw_ostream &OS) const {
     OS << "<<null function: 0x" << Root << ">>\n";
   }
 
-  for (CallGraph::const_iterator I = begin(), E = end(); I != E; ++I)
-    I->second->print(OS);
+  // Print in a deterministic order by sorting CallGraphNodes by name.  We do
+  // this here to avoid slowing down the non-printing fast path.
+
+  SmallVector<CallGraphNode *, 16> Nodes;
+  Nodes.reserve(FunctionMap.size());
+
+  for (auto I = begin(), E = end(); I != E; ++I)
+    Nodes.push_back(I->second);
+
+  std::sort(Nodes.begin(), Nodes.end(),
+            [](CallGraphNode *LHS, CallGraphNode *RHS) {
+    if (Function *LF = LHS->getFunction())
+      if (Function *RF = RHS->getFunction())
+        return LF->getName() < RF->getName();
+
+    return RHS->getFunction() != nullptr;
+  });
+
+  for (CallGraphNode *CN : Nodes)
+    CN->print(OS);
 }
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp b/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp
index 65ba1c7..6b3e063 100644
--- a/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -217,8 +217,10 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
           // another value. This can happen when constant folding happens
           // of well known functions etc.
           !CallSite(I->first) ||
-           (CallSite(I->first).getCalledFunction() &&
-            CallSite(I->first).getCalledFunction()->isIntrinsic())) {
+          (CallSite(I->first).getCalledFunction() &&
+           CallSite(I->first).getCalledFunction()->isIntrinsic() &&
+           Intrinsic::isLeaf(
+               CallSite(I->first).getCalledFunction()->getIntrinsicID()))) {
         assert(!CheckingMode &&
                "CallGraphSCCPass did not update the CallGraph correctly!");
         
diff --git a/contrib/llvm/lib/Analysis/IPA/CallPrinter.cpp b/contrib/llvm/lib/Analysis/IPA/CallPrinter.cpp
index 68dcd3c..f183625 100644
--- a/contrib/llvm/lib/Analysis/IPA/CallPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/IPA/CallPrinter.cpp
@@ -41,7 +41,7 @@ struct AnalysisCallGraphWrapperPassTraits {
   }
 };
 
-} // end llvm namespace
+} // namespace llvm
 
 namespace {
 
diff --git a/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp
index 018ae99..a32631d 100644
--- a/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -115,9 +115,10 @@ namespace {
     //------------------------------------------------
     // Implement the AliasAnalysis API
     //
-    AliasResult alias(const Location &LocA, const Location &LocB) override;
+    AliasResult alias(const MemoryLocation &LocA,
+                      const MemoryLocation &LocB) override;
     ModRefResult getModRefInfo(ImmutableCallSite CS,
-                               const Location &Loc) override;
+                               const MemoryLocation &Loc) override;
     ModRefResult getModRefInfo(ImmutableCallSite CS1,
                                ImmutableCallSite CS2) override {
       return AliasAnalysis::getModRefInfo(CS1, CS2);
@@ -188,7 +189,7 @@ namespace {
                               GlobalValue *OkayStoreDest = nullptr);
     bool AnalyzeIndirectGlobalMemory(GlobalValue *GV);
   };
-}
+} // namespace
 
 char GlobalsModRef::ID = 0;
 INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis,
@@ -478,9 +479,8 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) {
 /// alias - If one of the pointers is to a global that we are tracking, and the
 /// other is some random pointer, we know there cannot be an alias, because the
 /// address of the global isn't taken.
-AliasAnalysis::AliasResult
-GlobalsModRef::alias(const Location &LocA,
-                     const Location &LocB) {
+AliasAnalysis::AliasResult GlobalsModRef::alias(const MemoryLocation &LocA,
+                                                const MemoryLocation &LocB) {
   // Get the base object these pointers point to.
   const Value *UV1 = GetUnderlyingObject(LocA.Ptr, *DL);
   const Value *UV2 = GetUnderlyingObject(LocB.Ptr, *DL);
@@ -535,8 +535,7 @@ GlobalsModRef::alias(const Location &LocA,
 }
 
 AliasAnalysis::ModRefResult
-GlobalsModRef::getModRefInfo(ImmutableCallSite CS,
-                             const Location &Loc) {
+GlobalsModRef::getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc) {
   unsigned Known = ModRef;
 
   // If we are asking for mod/ref info of a direct call with a pointer to a
diff --git a/contrib/llvm/lib/Analysis/InstCount.cpp b/contrib/llvm/lib/Analysis/InstCount.cpp
index de2b9c0..e76d26e 100644
--- a/contrib/llvm/lib/Analysis/InstCount.cpp
+++ b/contrib/llvm/lib/Analysis/InstCount.cpp
@@ -64,7 +64,7 @@ namespace {
     void print(raw_ostream &O, const Module *M) const override {}
 
   };
-}
+} // namespace
 
 char InstCount::ID = 0;
 INITIALIZE_PASS(InstCount, "instcount",
diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
index ec56d88..12e406b 100644
--- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -854,8 +854,8 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
       return X;
   }
 
-  // fsub nnan ninf x, x ==> 0.0
-  if (FMF.noNaNs() && FMF.noInfs() && Op0 == Op1)
+  // fsub nnan x, x ==> 0.0
+  if (FMF.noNaNs() && Op0 == Op1)
     return Constant::getNullValue(Op0->getType());
 
   return nullptr;
@@ -1126,6 +1126,21 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
   if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZero()))
     return Op0;
 
+  if (FMF.noNaNs()) {
+    // X / X -> 1.0 is legal when NaNs are ignored.
+    if (Op0 == Op1)
+      return ConstantFP::get(Op0->getType(), 1.0);
+
+    // -X /  X -> -1.0 and
+    //  X / -X -> -1.0 are legal when NaNs are ignored.
+    // We can ignore signed zeros because +-0.0/+-0.0 is NaN and ignored.
+    if ((BinaryOperator::isFNeg(Op0, /*IgnoreZeroSign=*/true) &&
+         BinaryOperator::getFNegArgument(Op0) == Op1) ||
+        (BinaryOperator::isFNeg(Op1, /*IgnoreZeroSign=*/true) &&
+         BinaryOperator::getFNegArgument(Op1) == Op0))
+      return ConstantFP::get(Op0->getType(), -1.0);
+  }
+
   return nullptr;
 }
 
diff --git a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
index e6f586a..f421d28 100644
--- a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -286,7 +286,7 @@ raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) {
               << Val.getConstantRange().getUpper() << '>';
   return OS << "constant<" << *Val.getConstant() << '>';
 }
-}
+} // namespace llvm
 
 //===----------------------------------------------------------------------===//
 //                          LazyValueInfoCache Decl
@@ -306,7 +306,7 @@ namespace {
       deleted();
     }
   };
-}
+} // namespace
 
 namespace { 
   /// This is the cache kept by LazyValueInfo which
@@ -1262,8 +1262,40 @@ LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C,
                               Instruction *CxtI) {
   const DataLayout &DL = CxtI->getModule()->getDataLayout();
   LVILatticeVal Result = getCache(PImpl, AC, &DL, DT).getValueAt(V, CxtI);
-
-  return getPredicateResult(Pred, C, Result, DL, TLI);
+  Tristate Ret = getPredicateResult(Pred, C, Result, DL, TLI);
+  if (Ret != Unknown)
+    return Ret;
+
+  // TODO: Move this logic inside getValueAt so that it can be cached rather
+  // than re-queried on each call.  This would also allow us to merge the
+  // underlying lattice values to get more information 
+  if (CxtI) {
+    // For a comparison where the V is outside this block, it's possible
+    // that we've branched on it before.  Look to see if the value is known
+    // on all incoming edges.
+    BasicBlock *BB = CxtI->getParent();
+    pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+    if (PI != PE &&
+        (!isa<Instruction>(V) ||
+         cast<Instruction>(V)->getParent() != BB)) {
+      // For predecessor edge, determine if the comparison is true or false
+      // on that edge.  If they're all true or all false, we can conclude 
+      // the value of the comparison in this block.
+      Tristate Baseline = getPredicateOnEdge(Pred, V, C, *PI, BB, CxtI);
+      if (Baseline != Unknown) {
+        // Check that all remaining incoming values match the first one.
+        while (++PI != PE) {
+          Tristate Ret = getPredicateOnEdge(Pred, V, C, *PI, BB, CxtI);
+          if (Ret != Baseline) break;
+        }
+        // If we terminated early, then one of the values didn't match.
+        if (PI == PE) {
+          return Baseline;
+        }
+      }
+    }
+  }
+  return Unknown;
 }
 
 void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
diff --git a/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp
index f6025e3..991a0e3 100644
--- a/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp
@@ -48,7 +48,7 @@ bool LibCallAliasAnalysis::runOnFunction(Function &F) {
 AliasAnalysis::ModRefResult
 LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI,
                                             ImmutableCallSite CS,
-                                            const Location &Loc) {
+                                            const MemoryLocation &Loc) {
   // If we have a function, check to see what kind of mod/ref effects it
   // has.  Start by including any info globally known about the function.
   AliasAnalysis::ModRefResult MRInfo = FI->UniversalBehavior;
@@ -122,7 +122,7 @@ LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI,
 //
 AliasAnalysis::ModRefResult
 LibCallAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
-                                    const Location &Loc) {
+                                    const MemoryLocation &Loc) {
   ModRefResult MRInfo = ModRef;
   
   // If this is a direct call to a function that LCI knows about, get the
diff --git a/contrib/llvm/lib/Analysis/LibCallSemantics.cpp b/contrib/llvm/lib/Analysis/LibCallSemantics.cpp
index e98540b..003c81e 100644
--- a/contrib/llvm/lib/Analysis/LibCallSemantics.cpp
+++ b/contrib/llvm/lib/Analysis/LibCallSemantics.cpp
@@ -80,9 +80,8 @@ EHPersonality llvm::classifyEHPersonality(const Value *Pers) {
     .Default(EHPersonality::Unknown);
 }
 
-bool llvm::canSimplifyInvokeNoUnwind(const InvokeInst *II) {
-  const LandingPadInst *LP = II->getLandingPadInst();
-  EHPersonality Personality = classifyEHPersonality(LP->getPersonalityFn());
+bool llvm::canSimplifyInvokeNoUnwind(const Function *F) {
+  EHPersonality Personality = classifyEHPersonality(F->getPersonalityFn());
   // We can't simplify any invokes to nounwind functions if the personality
   // function wants to catch asynch exceptions.  The nounwind attribute only
   // implies that the function does not throw synchronous exceptions.
diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp
index 65a90d7..6ea6ccb 100644
--- a/contrib/llvm/lib/Analysis/Lint.cpp
+++ b/contrib/llvm/lib/Analysis/Lint.cpp
@@ -157,7 +157,7 @@ namespace {
       WriteValues({V1, Vs...});
     }
   };
-}
+} // namespace
 
 char Lint::ID = 0;
 INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR",
@@ -202,8 +202,8 @@ void Lint::visitCallSite(CallSite CS) {
   Value *Callee = CS.getCalledValue();
   const DataLayout &DL = CS->getModule()->getDataLayout();
 
-  visitMemoryReference(I, Callee, AliasAnalysis::UnknownSize,
-                       0, nullptr, MemRef::Callee);
+  visitMemoryReference(I, Callee, MemoryLocation::UnknownSize, 0, nullptr,
+                       MemRef::Callee);
 
   if (Function *F = dyn_cast<Function>(findValue(Callee, DL,
                                                  /*OffsetOk=*/false))) {
@@ -282,12 +282,10 @@ void Lint::visitCallSite(CallSite CS) {
     case Intrinsic::memcpy: {
       MemCpyInst *MCI = cast<MemCpyInst>(&I);
       // TODO: If the size is known, use it.
-      visitMemoryReference(I, MCI->getDest(), AliasAnalysis::UnknownSize,
-                           MCI->getAlignment(), nullptr,
-                           MemRef::Write);
-      visitMemoryReference(I, MCI->getSource(), AliasAnalysis::UnknownSize,
-                           MCI->getAlignment(), nullptr,
-                           MemRef::Read);
+      visitMemoryReference(I, MCI->getDest(), MemoryLocation::UnknownSize,
+                           MCI->getAlignment(), nullptr, MemRef::Write);
+      visitMemoryReference(I, MCI->getSource(), MemoryLocation::UnknownSize,
+                           MCI->getAlignment(), nullptr, MemRef::Read);
 
       // Check that the memcpy arguments don't overlap. The AliasAnalysis API
       // isn't expressive enough for what we really want to do. Known partial
@@ -306,20 +304,17 @@ void Lint::visitCallSite(CallSite CS) {
     case Intrinsic::memmove: {
       MemMoveInst *MMI = cast<MemMoveInst>(&I);
       // TODO: If the size is known, use it.
-      visitMemoryReference(I, MMI->getDest(), AliasAnalysis::UnknownSize,
-                           MMI->getAlignment(), nullptr,
-                           MemRef::Write);
-      visitMemoryReference(I, MMI->getSource(), AliasAnalysis::UnknownSize,
-                           MMI->getAlignment(), nullptr,
-                           MemRef::Read);
+      visitMemoryReference(I, MMI->getDest(), MemoryLocation::UnknownSize,
+                           MMI->getAlignment(), nullptr, MemRef::Write);
+      visitMemoryReference(I, MMI->getSource(), MemoryLocation::UnknownSize,
+                           MMI->getAlignment(), nullptr, MemRef::Read);
       break;
     }
     case Intrinsic::memset: {
       MemSetInst *MSI = cast<MemSetInst>(&I);
       // TODO: If the size is known, use it.
-      visitMemoryReference(I, MSI->getDest(), AliasAnalysis::UnknownSize,
-                           MSI->getAlignment(), nullptr,
-                           MemRef::Write);
+      visitMemoryReference(I, MSI->getDest(), MemoryLocation::UnknownSize,
+                           MSI->getAlignment(), nullptr, MemRef::Write);
       break;
     }
 
@@ -328,26 +323,26 @@ void Lint::visitCallSite(CallSite CS) {
              "Undefined behavior: va_start called in a non-varargs function",
              &I);
 
-      visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
-                           0, nullptr, MemRef::Read | MemRef::Write);
+      visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0,
+                           nullptr, MemRef::Read | MemRef::Write);
       break;
     case Intrinsic::vacopy:
-      visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
-                           0, nullptr, MemRef::Write);
-      visitMemoryReference(I, CS.getArgument(1), AliasAnalysis::UnknownSize,
-                           0, nullptr, MemRef::Read);
+      visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0,
+                           nullptr, MemRef::Write);
+      visitMemoryReference(I, CS.getArgument(1), MemoryLocation::UnknownSize, 0,
+                           nullptr, MemRef::Read);
       break;
     case Intrinsic::vaend:
-      visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
-                           0, nullptr, MemRef::Read | MemRef::Write);
+      visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0,
+                           nullptr, MemRef::Read | MemRef::Write);
       break;
 
     case Intrinsic::stackrestore:
       // Stackrestore doesn't read or write memory, but it sets the
       // stack pointer, which the compiler may read from or write to
       // at any time, so check it for both readability and writeability.
-      visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
-                           0, nullptr, MemRef::Read | MemRef::Write);
+      visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0,
+                           nullptr, MemRef::Read | MemRef::Write);
       break;
 
     case Intrinsic::eh_begincatch:
@@ -435,7 +430,7 @@ void Lint::visitMemoryReference(Instruction &I,
     // OK, so the access is to a constant offset from Ptr.  Check that Ptr is
     // something we can handle and if so extract the size of this base object
     // along with its alignment.
-    uint64_t BaseSize = AliasAnalysis::UnknownSize;
+    uint64_t BaseSize = MemoryLocation::UnknownSize;
     unsigned BaseAlign = 0;
 
     if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
@@ -460,8 +455,8 @@ void Lint::visitMemoryReference(Instruction &I,
 
     // Accesses from before the start or after the end of the object are not
     // defined.
-    Assert(Size == AliasAnalysis::UnknownSize ||
-               BaseSize == AliasAnalysis::UnknownSize ||
+    Assert(Size == MemoryLocation::UnknownSize ||
+               BaseSize == MemoryLocation::UnknownSize ||
                (Offset >= 0 && Offset + Size <= BaseSize),
            "Undefined behavior: Buffer overflow", &I);
 
@@ -770,12 +765,12 @@ void Lint::visitAllocaInst(AllocaInst &I) {
 }
 
 void Lint::visitVAArgInst(VAArgInst &I) {
-  visitMemoryReference(I, I.getOperand(0), AliasAnalysis::UnknownSize, 0,
+  visitMemoryReference(I, I.getOperand(0), MemoryLocation::UnknownSize, 0,
                        nullptr, MemRef::Read | MemRef::Write);
 }
 
 void Lint::visitIndirectBrInst(IndirectBrInst &I) {
-  visitMemoryReference(I, I.getAddress(), AliasAnalysis::UnknownSize, 0,
+  visitMemoryReference(I, I.getAddress(), MemoryLocation::UnknownSize, 0,
                        nullptr, MemRef::Branchee);
 
   Assert(I.getNumDestinations() != 0,
diff --git a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index c661c7b..8425b75 100644
--- a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -210,18 +210,18 @@ public:
       : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckNeeded(false) {}
 
   /// \brief Register a load  and whether it is only read from.
-  void addLoad(AliasAnalysis::Location &Loc, bool IsReadOnly) {
+  void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
     Value *Ptr = const_cast<Value*>(Loc.Ptr);
-    AST.add(Ptr, AliasAnalysis::UnknownSize, Loc.AATags);
+    AST.add(Ptr, MemoryLocation::UnknownSize, Loc.AATags);
     Accesses.insert(MemAccessInfo(Ptr, false));
     if (IsReadOnly)
       ReadOnlyPtr.insert(Ptr);
   }
 
   /// \brief Register a store.
-  void addStore(AliasAnalysis::Location &Loc) {
+  void addStore(MemoryLocation &Loc) {
     Value *Ptr = const_cast<Value*>(Loc.Ptr);
-    AST.add(Ptr, AliasAnalysis::UnknownSize, Loc.AATags);
+    AST.add(Ptr, MemoryLocation::UnknownSize, Loc.AATags);
     Accesses.insert(MemAccessInfo(Ptr, true));
   }
 
@@ -1150,7 +1150,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
     if (Seen.insert(Ptr).second) {
       ++NumReadWrites;
 
-      AliasAnalysis::Location Loc = MemoryLocation::get(ST);
+      MemoryLocation Loc = MemoryLocation::get(ST);
       // The TBAA metadata could have a control dependency on the predication
       // condition, so we cannot rely on it when determining whether or not we
       // need runtime pointer checks.
@@ -1186,7 +1186,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
       IsReadOnlyPtr = true;
     }
 
-    AliasAnalysis::Location Loc = MemoryLocation::get(LD);
+    MemoryLocation Loc = MemoryLocation::get(LD);
     // The TBAA metadata could have a control dependency on the predication
     // condition, so we cannot rely on it when determining whether or not we
     // need runtime pointer checks.
diff --git a/contrib/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm/lib/Analysis/LoopPass.cpp
index e9fcf02..81b7ecd 100644
--- a/contrib/llvm/lib/Analysis/LoopPass.cpp
+++ b/contrib/llvm/lib/Analysis/LoopPass.cpp
@@ -56,7 +56,7 @@ public:
 };
 
 char PrintLoopPass::ID = 0;
-}
+} // namespace
 
 //===----------------------------------------------------------------------===//
 // LPPassManager
diff --git a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp
index da3b829..54a04d9 100644
--- a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp
@@ -74,7 +74,7 @@ namespace {
       return InstTypePair(inst, type);
     }
   };
-}
+} // namespace
 
 char MemDepPrinter::ID = 0;
 INITIALIZE_PASS_BEGIN(MemDepPrinter, "print-memdeps",
diff --git a/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp b/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp
index fa292a2..b0194d3 100644
--- a/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp
@@ -37,7 +37,7 @@ namespace {
       Vec.clear();
     }
   };
-}
+} // namespace
 
 char MemDerefPrinter::ID = 0;
 INITIALIZE_PASS_BEGIN(MemDerefPrinter, "print-memderefs",
diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index 255bae6..cf8ba5c 100644
--- a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -118,10 +118,8 @@ static void RemoveFromReverseMap(DenseMap<Instruction*,
 /// location, fill in Loc with the details, otherwise set Loc.Ptr to null.
 /// Return a ModRefInfo value describing the general behavior of the
 /// instruction.
-static
-AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst,
-                                        AliasAnalysis::Location &Loc,
-                                        AliasAnalysis *AA) {
+static AliasAnalysis::ModRefResult
+GetLocation(const Instruction *Inst, MemoryLocation &Loc, AliasAnalysis *AA) {
   if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
     if (LI->isUnordered()) {
       Loc = MemoryLocation::get(LI);
@@ -131,7 +129,7 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst,
       Loc = MemoryLocation::get(LI);
       return AliasAnalysis::ModRef;
     }
-    Loc = AliasAnalysis::Location();
+    Loc = MemoryLocation();
     return AliasAnalysis::ModRef;
   }
 
@@ -144,7 +142,7 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst,
       Loc = MemoryLocation::get(SI);
       return AliasAnalysis::ModRef;
     }
-    Loc = AliasAnalysis::Location();
+    Loc = MemoryLocation();
     return AliasAnalysis::ModRef;
   }
 
@@ -155,7 +153,7 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst,
 
   if (const CallInst *CI = isFreeCall(Inst, AA->getTargetLibraryInfo())) {
     // calls to free() deallocate the entire structure
-    Loc = AliasAnalysis::Location(CI->getArgOperand(0));
+    Loc = MemoryLocation(CI->getArgOperand(0));
     return AliasAnalysis::Mod;
   }
 
@@ -167,17 +165,17 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst,
     case Intrinsic::lifetime_end:
     case Intrinsic::invariant_start:
       II->getAAMetadata(AAInfo);
-      Loc = AliasAnalysis::Location(II->getArgOperand(1),
-                                    cast<ConstantInt>(II->getArgOperand(0))
-                                      ->getZExtValue(), AAInfo);
+      Loc = MemoryLocation(
+          II->getArgOperand(1),
+          cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(), AAInfo);
       // These intrinsics don't really modify the memory, but returning Mod
       // will allow them to be handled conservatively.
       return AliasAnalysis::Mod;
     case Intrinsic::invariant_end:
       II->getAAMetadata(AAInfo);
-      Loc = AliasAnalysis::Location(II->getArgOperand(2),
-                                    cast<ConstantInt>(II->getArgOperand(1))
-                                      ->getZExtValue(), AAInfo);
+      Loc = MemoryLocation(
+          II->getArgOperand(2),
+          cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(), AAInfo);
       // These intrinsics don't really modify the memory, but returning Mod
       // will allow them to be handled conservatively.
       return AliasAnalysis::Mod;
@@ -212,7 +210,7 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
     Instruction *Inst = --ScanIt;
 
     // If this inst is a memory op, get the pointer it accessed
-    AliasAnalysis::Location Loc;
+    MemoryLocation Loc;
     AliasAnalysis::ModRefResult MR = GetLocation(Inst, Loc, AA);
     if (Loc.Ptr) {
       // A simple instruction.
@@ -259,9 +257,10 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
 ///
 /// MemLocBase, MemLocOffset are lazily computed here the first time the
 /// base/offs of memloc is needed.
-static bool isLoadLoadClobberIfExtendedToFullWidth(
-    const AliasAnalysis::Location &MemLoc, const Value *&MemLocBase,
-    int64_t &MemLocOffs, const LoadInst *LI) {
+static bool isLoadLoadClobberIfExtendedToFullWidth(const MemoryLocation &MemLoc,
+                                                   const Value *&MemLocBase,
+                                                   int64_t &MemLocOffs,
+                                                   const LoadInst *LI) {
   const DataLayout &DL = LI->getModule()->getDataLayout();
 
   // If we haven't already computed the base/offset of MemLoc, do so now.
@@ -368,10 +367,9 @@ static bool isVolatile(Instruction *Inst) {
 /// with reads from read-only locations.  If possible, pass the query
 /// instruction as well; this function may take advantage of the metadata
 /// annotated to the query instruction to refine the result.
-MemDepResult MemoryDependenceAnalysis::
-getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
-                         BasicBlock::iterator ScanIt, BasicBlock *BB,
-                         Instruction *QueryInst) {
+MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom(
+    const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
+    BasicBlock *BB, Instruction *QueryInst) {
 
   const Value *MemLocBase = nullptr;
   int64_t MemLocOffset = 0;
@@ -440,8 +438,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
         // pointer, not on query pointers that are indexed off of them.  It'd
         // be nice to handle that at some point (the right approach is to use
         // GetPointerBaseWithConstantOffset).
-        if (AA->isMustAlias(AliasAnalysis::Location(II->getArgOperand(1)),
-                            MemLoc))
+        if (AA->isMustAlias(MemoryLocation(II->getArgOperand(1)), MemLoc))
           return MemDepResult::getDef(II);
         continue;
       }
@@ -486,7 +483,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
         }
       }
 
-      AliasAnalysis::Location LoadLoc = MemoryLocation::get(LI);
+      MemoryLocation LoadLoc = MemoryLocation::get(LI);
 
       // If we found a pointer, check if it could be the same as our pointer.
       AliasAnalysis::AliasResult R = AA->alias(LoadLoc, MemLoc);
@@ -575,7 +572,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
 
       // Ok, this store might clobber the query pointer.  Check to see if it is
       // a must alias: in this case, we want to return this as a def.
-      AliasAnalysis::Location StoreLoc = MemoryLocation::get(SI);
+      MemoryLocation StoreLoc = MemoryLocation::get(SI);
 
       // If we found a pointer, check if it could be the same as our pointer.
       AliasAnalysis::AliasResult R = AA->alias(StoreLoc, MemLoc);
@@ -679,7 +676,7 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
     else
       LocalCache = MemDepResult::getNonFuncLocal();
   } else {
-    AliasAnalysis::Location MemLoc;
+    MemoryLocation MemLoc;
     AliasAnalysis::ModRefResult MR = GetLocation(QueryInst, MemLoc, AA);
     if (MemLoc.Ptr) {
       // If we can do a pointer scan, make it happen.
@@ -872,7 +869,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
 void MemoryDependenceAnalysis::
 getNonLocalPointerDependency(Instruction *QueryInst,
                              SmallVectorImpl<NonLocalDepResult> &Result) {
-  const AliasAnalysis::Location Loc = MemoryLocation::get(QueryInst);
+  const MemoryLocation Loc = MemoryLocation::get(QueryInst);
   bool isLoad = isa<LoadInst>(QueryInst);
   BasicBlock *FromBB = QueryInst->getParent();
   assert(FromBB);
@@ -924,11 +921,9 @@ getNonLocalPointerDependency(Instruction *QueryInst,
 /// Pointer/PointeeSize using either cached information in Cache or by doing a
 /// lookup (which may use dirty cache info if available).  If we do a lookup,
 /// add the result to the cache.
-MemDepResult MemoryDependenceAnalysis::
-GetNonLocalInfoForBlock(Instruction *QueryInst,
-                        const AliasAnalysis::Location &Loc,
-                        bool isLoad, BasicBlock *BB,
-                        NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
+MemDepResult MemoryDependenceAnalysis::GetNonLocalInfoForBlock(
+    Instruction *QueryInst, const MemoryLocation &Loc, bool isLoad,
+    BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
 
   // Do a binary search to see if we already have an entry for this block in
   // the cache set.  If so, find it.
@@ -1040,14 +1035,11 @@ SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
 /// This function returns false on success, or true to indicate that it could
 /// not compute dependence information for some reason.  This should be treated
 /// as a clobber dependence on the first instruction in the predecessor block.
-bool MemoryDependenceAnalysis::
-getNonLocalPointerDepFromBB(Instruction *QueryInst,
-                            const PHITransAddr &Pointer,
-                            const AliasAnalysis::Location &Loc,
-                            bool isLoad, BasicBlock *StartBB,
-                            SmallVectorImpl<NonLocalDepResult> &Result,
-                            DenseMap<BasicBlock*, Value*> &Visited,
-                            bool SkipFirstBlock) {
+bool MemoryDependenceAnalysis::getNonLocalPointerDepFromBB(
+    Instruction *QueryInst, const PHITransAddr &Pointer,
+    const MemoryLocation &Loc, bool isLoad, BasicBlock *StartBB,
+    SmallVectorImpl<NonLocalDepResult> &Result,
+    DenseMap<BasicBlock *, Value *> &Visited, bool SkipFirstBlock) {
   // Look up the cached info for Pointer.
   ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
 
diff --git a/contrib/llvm/lib/Analysis/MemoryLocation.cpp b/contrib/llvm/lib/Analysis/MemoryLocation.cpp
index f87a017..e449126 100644
--- a/contrib/llvm/lib/Analysis/MemoryLocation.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryLocation.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Instructions.h"
@@ -88,3 +89,86 @@ MemoryLocation MemoryLocation::getForDest(const MemIntrinsic *MTI) {
 
   return MemoryLocation(MTI->getRawDest(), Size, AATags);
 }
+
+// FIXME: This code is duplicated with BasicAliasAnalysis and should be hoisted
+// to some common utility location.
+static bool isMemsetPattern16(const Function *MS,
+                              const TargetLibraryInfo &TLI) {
+  if (TLI.has(LibFunc::memset_pattern16) &&
+      MS->getName() == "memset_pattern16") {
+    FunctionType *MemsetType = MS->getFunctionType();
+    if (!MemsetType->isVarArg() && MemsetType->getNumParams() == 3 &&
+        isa<PointerType>(MemsetType->getParamType(0)) &&
+        isa<PointerType>(MemsetType->getParamType(1)) &&
+        isa<IntegerType>(MemsetType->getParamType(2)))
+      return true;
+  }
+
+  return false;
+}
+
+MemoryLocation MemoryLocation::getForArgument(ImmutableCallSite CS,
+                                              unsigned ArgIdx,
+                                              const TargetLibraryInfo &TLI) {
+  AAMDNodes AATags;
+  CS->getAAMetadata(AATags);
+  const Value *Arg = CS.getArgument(ArgIdx);
+
+  // We may be able to produce an exact size for known intrinsics.
+  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
+    const DataLayout &DL = II->getModule()->getDataLayout();
+
+    switch (II->getIntrinsicID()) {
+    default:
+      break;
+    case Intrinsic::memset:
+    case Intrinsic::memcpy:
+    case Intrinsic::memmove:
+      assert((ArgIdx == 0 || ArgIdx == 1) &&
+             "Invalid argument index for memory intrinsic");
+      if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2)))
+        return MemoryLocation(Arg, LenCI->getZExtValue(), AATags);
+      break;
+
+    case Intrinsic::lifetime_start:
+    case Intrinsic::lifetime_end:
+    case Intrinsic::invariant_start:
+      assert(ArgIdx == 1 && "Invalid argument index");
+      return MemoryLocation(
+          Arg, cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(), AATags);
+
+    case Intrinsic::invariant_end:
+      assert(ArgIdx == 2 && "Invalid argument index");
+      return MemoryLocation(
+          Arg, cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(), AATags);
+
+    case Intrinsic::arm_neon_vld1:
+      assert(ArgIdx == 0 && "Invalid argument index");
+      // LLVM's vld1 and vst1 intrinsics currently only support a single
+      // vector register.
+      return MemoryLocation(Arg, DL.getTypeStoreSize(II->getType()), AATags);
+
+    case Intrinsic::arm_neon_vst1:
+      assert(ArgIdx == 0 && "Invalid argument index");
+      return MemoryLocation(
+          Arg, DL.getTypeStoreSize(II->getArgOperand(1)->getType()), AATags);
+    }
+  }
+
+  // We can bound the aliasing properties of memset_pattern16 just as we can
+  // for memcpy/memset.  This is particularly important because the
+  // LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16
+  // whenever possible.
+  if (CS.getCalledFunction() &&
+      isMemsetPattern16(CS.getCalledFunction(), TLI)) {
+    assert((ArgIdx == 0 || ArgIdx == 1) &&
+           "Invalid argument index for memset_pattern16");
+    if (ArgIdx == 1)
+      return MemoryLocation(Arg, 16, AATags);
+    if (const ConstantInt *LenCI = dyn_cast<ConstantInt>(CS.getArgument(2)))
+      return MemoryLocation(Arg, LenCI->getZExtValue(), AATags);
+  }
+  // FIXME: Handle memset_pattern4 and memset_pattern8 also.
+
+  return MemoryLocation(CS.getArgument(ArgIdx), UnknownSize, AATags);
+}
diff --git a/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
index 36c4714..45ae818 100644
--- a/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -40,7 +40,7 @@ namespace {
     }
     void print(raw_ostream &O, const Module *M) const override;
   };
-}
+} // namespace
 
 char ModuleDebugInfoPrinter::ID = 0;
 INITIALIZE_PASS(ModuleDebugInfoPrinter, "module-debuginfo",
diff --git a/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp
index 203e1da..7617622 100644
--- a/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp
@@ -41,7 +41,8 @@ namespace {
       return true;
     }
 
-    AliasResult alias(const Location &LocA, const Location &LocB) override {
+    AliasResult alias(const MemoryLocation &LocA,
+                      const MemoryLocation &LocB) override {
       return MayAlias;
     }
 
@@ -52,19 +53,17 @@ namespace {
       return UnknownModRefBehavior;
     }
 
-    bool pointsToConstantMemory(const Location &Loc, bool OrLocal) override {
+    bool pointsToConstantMemory(const MemoryLocation &Loc,
+                                bool OrLocal) override {
       return false;
     }
-    Location getArgLocation(ImmutableCallSite CS, unsigned ArgIdx,
-                            ModRefResult &Mask) override {
-      Mask = ModRef;
-      AAMDNodes AATags;
-      CS->getAAMetadata(AATags);
-      return Location(CS.getArgument(ArgIdx), UnknownSize, AATags);
+    ModRefResult getArgModRefInfo(ImmutableCallSite CS,
+                                  unsigned ArgIdx) override {
+      return ModRef;
     }
 
     ModRefResult getModRefInfo(ImmutableCallSite CS,
-                               const Location &Loc) override {
+                               const MemoryLocation &Loc) override {
       return ModRef;
     }
     ModRefResult getModRefInfo(ImmutableCallSite CS1,
diff --git a/contrib/llvm/lib/Analysis/PHITransAddr.cpp b/contrib/llvm/lib/Analysis/PHITransAddr.cpp
index 633d6aa..8d80c60 100644
--- a/contrib/llvm/lib/Analysis/PHITransAddr.cpp
+++ b/contrib/llvm/lib/Analysis/PHITransAddr.cpp
@@ -244,13 +244,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
             GEPI->getNumOperands() == GEPOps.size() &&
             GEPI->getParent()->getParent() == CurBB->getParent() &&
             (!DT || DT->dominates(GEPI->getParent(), PredBB))) {
-          bool Mismatch = false;
-          for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
-            if (GEPI->getOperand(i) != GEPOps[i]) {
-              Mismatch = true;
-              break;
-            }
-          if (!Mismatch)
+          if (std::equal(GEPOps.begin(), GEPOps.end(), GEPI->op_begin()))
             return GEPI;
         }
     }
@@ -392,10 +386,10 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
     if (!OpVal) return nullptr;
 
     // Otherwise insert a cast at the end of PredBB.
-    CastInst *New = CastInst::Create(Cast->getOpcode(),
-                                     OpVal, InVal->getType(),
-                                     InVal->getName()+".phi.trans.insert",
+    CastInst *New = CastInst::Create(Cast->getOpcode(), OpVal, InVal->getType(),
+                                     InVal->getName() + ".phi.trans.insert",
                                      PredBB->getTerminator());
+    New->setDebugLoc(Inst->getDebugLoc());
     NewInsts.push_back(New);
     return New;
   }
@@ -414,6 +408,7 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
     GetElementPtrInst *Result = GetElementPtrInst::Create(
         GEP->getSourceElementType(), GEPOps[0], makeArrayRef(GEPOps).slice(1),
         InVal->getName() + ".phi.trans.insert", PredBB->getTerminator());
+    Result->setDebugLoc(Inst->getDebugLoc());
     Result->setIsInBounds(GEP->isInBounds());
     NewInsts.push_back(Result);
     return Result;
diff --git a/contrib/llvm/lib/Analysis/RegionPrinter.cpp b/contrib/llvm/lib/Analysis/RegionPrinter.cpp
index d7f5109..2b09bec 100644
--- a/contrib/llvm/lib/Analysis/RegionPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/RegionPrinter.cpp
@@ -194,7 +194,7 @@ struct RegionOnlyPrinter
     }
 };
 
-}
+} // namespace
 
 char RegionOnlyPrinter::ID = 0;
 INITIALIZE_PASS(RegionOnlyPrinter, "dot-regions-only",
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
index 0e9f812..81e07e9 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -627,7 +627,7 @@ namespace {
       llvm_unreachable("Unknown SCEV kind!");
     }
   };
-}
+} // namespace
 
 /// GroupByComplexity - Given a list of SCEV objects, order them by their
 /// complexity, and group objects of the same complexity together by value.
@@ -689,7 +689,7 @@ struct FindSCEVSize {
     return false;
   }
 };
-}
+} // namespace
 
 // Returns the size of the SCEV S.
 static inline int sizeOfSCEV(const SCEV *S) {
@@ -937,7 +937,7 @@ private:
   const SCEV *Denominator, *Quotient, *Remainder, *Zero, *One;
 };
 
-}
+} // namespace
 
 //===----------------------------------------------------------------------===//
 //                      Simple SCEV method implementations
@@ -1248,7 +1248,7 @@ struct ExtendOpTraits<SCEVZeroExtendExpr> : public ExtendOpTraitsBase {
 
 const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
     SCEVZeroExtendExpr>::GetExtendExpr = &ScalarEvolution::getZeroExtendExpr;
-}
+} // namespace
 
 // The recurrence AR has been shown to have no signed/unsigned wrap or something
 // close to it. Typically, if we can prove NSW/NUW for AR, then we can just as
@@ -3300,7 +3300,7 @@ namespace {
     }
     bool isDone() const { return FindOne; }
   };
-}
+} // namespace
 
 bool ScalarEvolution::checkValidity(const SCEV *S) const {
   FindInvalidSCEVUnknown F;
@@ -7594,7 +7594,7 @@ struct FindUndefs {
     return Found;
   }
 };
-}
+} // namespace
 
 // Return true when S contains at least an undef value.
 static inline bool
@@ -7644,7 +7644,7 @@ struct SCEVCollectTerms {
   }
   bool isDone() const { return false; }
 };
-}
+} // namespace
 
 /// Find parametric terms in this SCEVAddRecExpr.
 void SCEVAddRecExpr::collectParametricTerms(
@@ -7737,7 +7737,7 @@ struct FindParameter {
     return FoundParameter;
   }
 };
-}
+} // namespace
 
 // Returns true when S contains at least a SCEVUnknown parameter.
 static inline bool
@@ -8418,7 +8418,7 @@ struct SCEVSearch {
   }
   bool isDone() const { return IsFound; }
 };
-}
+} // namespace
 
 bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
   SCEVSearch Search(Op);
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index ccec0a8..2d45c59 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -53,7 +53,8 @@ namespace {
   private:
     void getAnalysisUsage(AnalysisUsage &AU) const override;
     bool runOnFunction(Function &F) override;
-    AliasResult alias(const Location &LocA, const Location &LocB) override;
+    AliasResult alias(const MemoryLocation &LocA,
+                      const MemoryLocation &LocB) override;
 
     Value *GetBaseValue(const SCEV *S);
   };
@@ -107,8 +108,8 @@ ScalarEvolutionAliasAnalysis::GetBaseValue(const SCEV *S) {
 }
 
 AliasAnalysis::AliasResult
-ScalarEvolutionAliasAnalysis::alias(const Location &LocA,
-                                    const Location &LocB) {
+ScalarEvolutionAliasAnalysis::alias(const MemoryLocation &LocA,
+                                    const MemoryLocation &LocB) {
   // If either of the memory references is empty, it doesn't matter what the
   // pointer values are. This allows the code below to ignore this special
   // case.
@@ -161,12 +162,12 @@ ScalarEvolutionAliasAnalysis::alias(const Location &LocA,
   Value *AO = GetBaseValue(AS);
   Value *BO = GetBaseValue(BS);
   if ((AO && AO != LocA.Ptr) || (BO && BO != LocB.Ptr))
-    if (alias(Location(AO ? AO : LocA.Ptr,
-                       AO ? +UnknownSize : LocA.Size,
-                       AO ? AAMDNodes() : LocA.AATags),
-              Location(BO ? BO : LocB.Ptr,
-                       BO ? +UnknownSize : LocB.Size,
-                       BO ? AAMDNodes() : LocB.AATags)) == NoAlias)
+    if (alias(MemoryLocation(AO ? AO : LocA.Ptr,
+                             AO ? +MemoryLocation::UnknownSize : LocA.Size,
+                             AO ? AAMDNodes() : LocA.AATags),
+              MemoryLocation(BO ? BO : LocB.Ptr,
+                             BO ? +MemoryLocation::UnknownSize : LocB.Size,
+                             BO ? AAMDNodes() : LocB.AATags)) == NoAlias)
       return NoAlias;
 
   // Forward the query to the next analysis.
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
index f82235d..0264ad1 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -661,7 +661,7 @@ public:
   }
 };
 
-}
+} // namespace
 
 Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
   Type *Ty = SE.getEffectiveSCEVType(S->getType());
@@ -1702,7 +1702,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
 
   unsigned NumElim = 0;
   DenseMap<const SCEV *, PHINode *> ExprToIVMap;
-  // Process phis from wide to narrow. Mapping wide phis to the their truncation
+  // Process phis from wide to narrow. Map wide phis to their truncation
   // so narrow phis can reuse them.
   for (SmallVectorImpl<PHINode*>::const_iterator PIter = Phis.begin(),
          PEnd = Phis.end(); PIter != PEnd; ++PIter) {
@@ -1933,7 +1933,7 @@ struct SCEVFindUnsafe {
   }
   bool isDone() const { return IsUnsafe; }
 };
-}
+} // namespace
 
 namespace llvm {
 bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE) {
diff --git a/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp b/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp
index 02f8b0b..a8cfeb6 100644
--- a/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp
+++ b/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp
@@ -99,12 +99,13 @@ protected:
 
 private:
   void getAnalysisUsage(AnalysisUsage &AU) const override;
-  AliasResult alias(const Location &LocA, const Location &LocB) override;
-  bool pointsToConstantMemory(const Location &Loc, bool OrLocal) override;
+  AliasResult alias(const MemoryLocation &LocA,
+                    const MemoryLocation &LocB) override;
+  bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal) override;
   ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override;
   ModRefBehavior getModRefBehavior(const Function *F) override;
   ModRefResult getModRefInfo(ImmutableCallSite CS,
-                             const Location &Loc) override;
+                             const MemoryLocation &Loc) override;
   ModRefResult getModRefInfo(ImmutableCallSite CS1,
                              ImmutableCallSite CS2) override;
 };
@@ -176,8 +177,8 @@ ScopedNoAliasAA::mayAliasInScopes(const MDNode *Scopes,
   return true;
 }
 
-AliasAnalysis::AliasResult
-ScopedNoAliasAA::alias(const Location &LocA, const Location &LocB) {
+AliasAnalysis::AliasResult ScopedNoAliasAA::alias(const MemoryLocation &LocA,
+                                                  const MemoryLocation &LocB) {
   if (!EnableScopedNoAlias)
     return AliasAnalysis::alias(LocA, LocB);
 
@@ -198,7 +199,7 @@ ScopedNoAliasAA::alias(const Location &LocA, const Location &LocB) {
   return AliasAnalysis::alias(LocA, LocB);
 }
 
-bool ScopedNoAliasAA::pointsToConstantMemory(const Location &Loc,
+bool ScopedNoAliasAA::pointsToConstantMemory(const MemoryLocation &Loc,
                                              bool OrLocal) {
   return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
 }
@@ -214,7 +215,8 @@ ScopedNoAliasAA::getModRefBehavior(const Function *F) {
 }
 
 AliasAnalysis::ModRefResult
-ScopedNoAliasAA::getModRefInfo(ImmutableCallSite CS, const Location &Loc) {
+ScopedNoAliasAA::getModRefInfo(ImmutableCallSite CS,
+                               const MemoryLocation &Loc) {
   if (!EnableScopedNoAlias)
     return AliasAnalysis::getModRefInfo(CS, Loc);
 
diff --git a/contrib/llvm/lib/Analysis/StratifiedSets.h b/contrib/llvm/lib/Analysis/StratifiedSets.h
index fd3fbc0..878ca3d 100644
--- a/contrib/llvm/lib/Analysis/StratifiedSets.h
+++ b/contrib/llvm/lib/Analysis/StratifiedSets.h
@@ -688,5 +688,5 @@ private:
 
   bool inbounds(StratifiedIndex N) const { return N < Links.size(); }
 };
-}
+} // namespace llvm
 #endif // LLVM_ADT_STRATIFIEDSETS_H
diff --git a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 1158725..82d29e0 100644
--- a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -270,7 +270,7 @@ namespace {
       return TBAAStructTypeNode(P);
     }
   };
-}
+} // namespace
 
 namespace {
   /// TypeBasedAliasAnalysis - This is a simple alias analysis
@@ -300,12 +300,14 @@ namespace {
 
   private:
     void getAnalysisUsage(AnalysisUsage &AU) const override;
-    AliasResult alias(const Location &LocA, const Location &LocB) override;
-    bool pointsToConstantMemory(const Location &Loc, bool OrLocal) override;
+    AliasResult alias(const MemoryLocation &LocA,
+                      const MemoryLocation &LocB) override;
+    bool pointsToConstantMemory(const MemoryLocation &Loc,
+                                bool OrLocal) override;
     ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override;
     ModRefBehavior getModRefBehavior(const Function *F) override;
     ModRefResult getModRefInfo(ImmutableCallSite CS,
-                               const Location &Loc) override;
+                               const MemoryLocation &Loc) override;
     ModRefResult getModRefInfo(ImmutableCallSite CS1,
                                ImmutableCallSite CS2) override;
   };
@@ -453,8 +455,8 @@ TypeBasedAliasAnalysis::PathAliases(const MDNode *A,
 }
 
 AliasAnalysis::AliasResult
-TypeBasedAliasAnalysis::alias(const Location &LocA,
-                              const Location &LocB) {
+TypeBasedAliasAnalysis::alias(const MemoryLocation &LocA,
+                              const MemoryLocation &LocB) {
   if (!EnableTBAA)
     return AliasAnalysis::alias(LocA, LocB);
 
@@ -473,7 +475,7 @@ TypeBasedAliasAnalysis::alias(const Location &LocA,
   return NoAlias;
 }
 
-bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Location &Loc,
+bool TypeBasedAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc,
                                                     bool OrLocal) {
   if (!EnableTBAA)
     return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
@@ -515,7 +517,7 @@ TypeBasedAliasAnalysis::getModRefBehavior(const Function *F) {
 
 AliasAnalysis::ModRefResult
 TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
-                                      const Location &Loc) {
+                                      const MemoryLocation &Loc) {
   if (!EnableTBAA)
     return AliasAnalysis::getModRefInfo(CS, Loc);
 
diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp
index c4f0463..c45005f 100644
--- a/contrib/llvm/lib/Analysis/ValueTracking.cpp
+++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp
@@ -551,12 +551,17 @@ static void computeKnownBitsFromTrueCondition(Value *V, ICmpInst *Cmp,
     }
     break;
   case ICmpInst::ICMP_EQ:
-    if (LHS == V)
-      computeKnownBits(RHS, KnownZero, KnownOne, DL, Depth + 1, Q);
-    else if (RHS == V)
-      computeKnownBits(LHS, KnownZero, KnownOne, DL, Depth + 1, Q);
-    else
-      llvm_unreachable("missing use?");
+    {
+      APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
+      if (LHS == V)
+        computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q);
+      else if (RHS == V)
+        computeKnownBits(LHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q);
+      else
+        llvm_unreachable("missing use?");
+      KnownZero |= KnownZeroTemp;
+      KnownOne |= KnownOneTemp;
+    }
     break;
   case ICmpInst::ICMP_ULE:
     if (LHS == V) {
@@ -936,147 +941,11 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
   }
 }
 
-/// Determine which bits of V are known to be either zero or one and return
-/// them in the KnownZero/KnownOne bit sets.
-///
-/// NOTE: we cannot consider 'undef' to be "IsZero" here.  The problem is that
-/// we cannot optimize based on the assumption that it is zero without changing
-/// it to be an explicit zero.  If we don't change it to zero, other code could
-/// optimized based on the contradictory assumption that it is non-zero.
-/// Because instcombine aggressively folds operations with undef args anyway,
-/// this won't lose us code quality.
-///
-/// This function is defined on values with integer type, values with pointer
-/// type, and vectors of integers.  In the case
-/// where V is a vector, known zero, and known one values are the
-/// same width as the vector element, and the bit is set only if it is true
-/// for all of the elements in the vector.
-void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
-                      const DataLayout &DL, unsigned Depth, const Query &Q) {
-  assert(V && "No Value?");
-  assert(Depth <= MaxDepth && "Limit Search Depth");
+static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
+                                         APInt &KnownOne, const DataLayout &DL,
+                                         unsigned Depth, const Query &Q) {
   unsigned BitWidth = KnownZero.getBitWidth();
 
-  assert((V->getType()->isIntOrIntVectorTy() ||
-          V->getType()->getScalarType()->isPointerTy()) &&
-         "Not integer or pointer type!");
-  assert((DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
-         (!V->getType()->isIntOrIntVectorTy() ||
-          V->getType()->getScalarSizeInBits() == BitWidth) &&
-         KnownZero.getBitWidth() == BitWidth &&
-         KnownOne.getBitWidth() == BitWidth &&
-         "V, KnownOne and KnownZero should have same BitWidth");
-
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
-    // We know all of the bits for a constant!
-    KnownOne = CI->getValue();
-    KnownZero = ~KnownOne;
-    return;
-  }
-  // Null and aggregate-zero are all-zeros.
-  if (isa<ConstantPointerNull>(V) ||
-      isa<ConstantAggregateZero>(V)) {
-    KnownOne.clearAllBits();
-    KnownZero = APInt::getAllOnesValue(BitWidth);
-    return;
-  }
-  // Handle a constant vector by taking the intersection of the known bits of
-  // each element.  There is no real need to handle ConstantVector here, because
-  // we don't handle undef in any particularly useful way.
-  if (ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V)) {
-    // We know that CDS must be a vector of integers. Take the intersection of
-    // each element.
-    KnownZero.setAllBits(); KnownOne.setAllBits();
-    APInt Elt(KnownZero.getBitWidth(), 0);
-    for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
-      Elt = CDS->getElementAsInteger(i);
-      KnownZero &= ~Elt;
-      KnownOne &= Elt;
-    }
-    return;
-  }
-
-  // The address of an aligned GlobalValue has trailing zeros.
-  if (auto *GO = dyn_cast<GlobalObject>(V)) {
-    unsigned Align = GO->getAlignment();
-    if (Align == 0) {
-      if (auto *GVar = dyn_cast<GlobalVariable>(GO)) {
-        Type *ObjectType = GVar->getType()->getElementType();
-        if (ObjectType->isSized()) {
-          // If the object is defined in the current Module, we'll be giving
-          // it the preferred alignment. Otherwise, we have to assume that it
-          // may only have the minimum ABI alignment.
-          if (!GVar->isDeclaration() && !GVar->isWeakForLinker())
-            Align = DL.getPreferredAlignment(GVar);
-          else
-            Align = DL.getABITypeAlignment(ObjectType);
-        }
-      }
-    }
-    if (Align > 0)
-      KnownZero = APInt::getLowBitsSet(BitWidth,
-                                       countTrailingZeros(Align));
-    else
-      KnownZero.clearAllBits();
-    KnownOne.clearAllBits();
-    return;
-  }
-
-  if (Argument *A = dyn_cast<Argument>(V)) {
-    unsigned Align = A->getType()->isPointerTy() ? A->getParamAlignment() : 0;
-
-    if (!Align && A->hasStructRetAttr()) {
-      // An sret parameter has at least the ABI alignment of the return type.
-      Type *EltTy = cast<PointerType>(A->getType())->getElementType();
-      if (EltTy->isSized())
-        Align = DL.getABITypeAlignment(EltTy);
-    }
-
-    if (Align)
-      KnownZero = APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align));
-    else
-      KnownZero.clearAllBits();
-    KnownOne.clearAllBits();
-
-    // Don't give up yet... there might be an assumption that provides more
-    // information...
-    computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q);
-
-    // Or a dominating condition for that matter
-    if (EnableDomConditions && Depth <= DomConditionsMaxDepth)
-      computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL,
-                                              Depth, Q);
-    return;
-  }
-
-  // Start out not knowing anything.
-  KnownZero.clearAllBits(); KnownOne.clearAllBits();
-
-  // Limit search depth.
-  // All recursive calls that increase depth must come after this.
-  if (Depth == MaxDepth)
-    return;  
-
-  // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
-  // the bits of its aliasee.
-  if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
-    if (!GA->mayBeOverridden())
-      computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, DL, Depth + 1, Q);
-    return;
-  }
-
-  // Check whether a nearby assume intrinsic can determine some known bits.
-  computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q);
-
-  // Check whether there's a dominating condition which implies something about
-  // this value at the given context.
-  if (EnableDomConditions && Depth <= DomConditionsMaxDepth)
-    computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL, Depth,
-                                            Q);
-
-  Operator *I = dyn_cast<Operator>(V);
-  if (!I) return;
-
   APInt KnownZero2(KnownZero), KnownOne2(KnownOne);
   switch (I->getOpcode()) {
   default: break;
@@ -1328,7 +1197,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
   }
 
   case Instruction::Alloca: {
-    AllocaInst *AI = cast<AllocaInst>(V);
+    AllocaInst *AI = cast<AllocaInst>(I);
     unsigned Align = AI->getAlignment();
     if (Align == 0)
       Align = DL.getABITypeAlignment(AI->getType()->getElementType());
@@ -1523,6 +1392,151 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
       }
     }
   }
+}
+
+/// Determine which bits of V are known to be either zero or one and return
+/// them in the KnownZero/KnownOne bit sets.
+///
+/// NOTE: we cannot consider 'undef' to be "IsZero" here.  The problem is that
+/// we cannot optimize based on the assumption that it is zero without changing
+/// it to be an explicit zero.  If we don't change it to zero, other code could
+/// optimized based on the contradictory assumption that it is non-zero.
+/// Because instcombine aggressively folds operations with undef args anyway,
+/// this won't lose us code quality.
+///
+/// This function is defined on values with integer type, values with pointer
+/// type, and vectors of integers.  In the case
+/// where V is a vector, known zero, and known one values are the
+/// same width as the vector element, and the bit is set only if it is true
+/// for all of the elements in the vector.
+void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
+                      const DataLayout &DL, unsigned Depth, const Query &Q) {
+  assert(V && "No Value?");
+  assert(Depth <= MaxDepth && "Limit Search Depth");
+  unsigned BitWidth = KnownZero.getBitWidth();
+
+  assert((V->getType()->isIntOrIntVectorTy() ||
+          V->getType()->getScalarType()->isPointerTy()) &&
+         "Not integer or pointer type!");
+  assert((DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
+         (!V->getType()->isIntOrIntVectorTy() ||
+          V->getType()->getScalarSizeInBits() == BitWidth) &&
+         KnownZero.getBitWidth() == BitWidth &&
+         KnownOne.getBitWidth() == BitWidth &&
+         "V, KnownOne and KnownZero should have same BitWidth");
+
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+    // We know all of the bits for a constant!
+    KnownOne = CI->getValue();
+    KnownZero = ~KnownOne;
+    return;
+  }
+  // Null and aggregate-zero are all-zeros.
+  if (isa<ConstantPointerNull>(V) ||
+      isa<ConstantAggregateZero>(V)) {
+    KnownOne.clearAllBits();
+    KnownZero = APInt::getAllOnesValue(BitWidth);
+    return;
+  }
+  // Handle a constant vector by taking the intersection of the known bits of
+  // each element.  There is no real need to handle ConstantVector here, because
+  // we don't handle undef in any particularly useful way.
+  if (ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V)) {
+    // We know that CDS must be a vector of integers. Take the intersection of
+    // each element.
+    KnownZero.setAllBits(); KnownOne.setAllBits();
+    APInt Elt(KnownZero.getBitWidth(), 0);
+    for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+      Elt = CDS->getElementAsInteger(i);
+      KnownZero &= ~Elt;
+      KnownOne &= Elt;
+    }
+    return;
+  }
+
+  // The address of an aligned GlobalValue has trailing zeros.
+  if (auto *GO = dyn_cast<GlobalObject>(V)) {
+    unsigned Align = GO->getAlignment();
+    if (Align == 0) {
+      if (auto *GVar = dyn_cast<GlobalVariable>(GO)) {
+        Type *ObjectType = GVar->getType()->getElementType();
+        if (ObjectType->isSized()) {
+          // If the object is defined in the current Module, we'll be giving
+          // it the preferred alignment. Otherwise, we have to assume that it
+          // may only have the minimum ABI alignment.
+          if (!GVar->isDeclaration() && !GVar->isWeakForLinker())
+            Align = DL.getPreferredAlignment(GVar);
+          else
+            Align = DL.getABITypeAlignment(ObjectType);
+        }
+      }
+    }
+    if (Align > 0)
+      KnownZero = APInt::getLowBitsSet(BitWidth,
+                                       countTrailingZeros(Align));
+    else
+      KnownZero.clearAllBits();
+    KnownOne.clearAllBits();
+    return;
+  }
+
+  if (Argument *A = dyn_cast<Argument>(V)) {
+    unsigned Align = A->getType()->isPointerTy() ? A->getParamAlignment() : 0;
+
+    if (!Align && A->hasStructRetAttr()) {
+      // An sret parameter has at least the ABI alignment of the return type.
+      Type *EltTy = cast<PointerType>(A->getType())->getElementType();
+      if (EltTy->isSized())
+        Align = DL.getABITypeAlignment(EltTy);
+    }
+
+    if (Align)
+      KnownZero = APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align));
+    else
+      KnownZero.clearAllBits();
+    KnownOne.clearAllBits();
+
+    // Don't give up yet... there might be an assumption that provides more
+    // information...
+    computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q);
+
+    // Or a dominating condition for that matter
+    if (EnableDomConditions && Depth <= DomConditionsMaxDepth)
+      computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL,
+                                              Depth, Q);
+    return;
+  }
+
+  // Start out not knowing anything.
+  KnownZero.clearAllBits(); KnownOne.clearAllBits();
+
+  // Limit search depth.
+  // All recursive calls that increase depth must come after this.
+  if (Depth == MaxDepth)
+    return;
+
+  // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
+  // the bits of its aliasee.
+  if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+    if (!GA->mayBeOverridden())
+      computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, DL, Depth + 1, Q);
+    return;
+  }
+
+  if (Operator *I = dyn_cast<Operator>(V))
+    computeKnownBitsFromOperator(I, KnownZero, KnownOne, DL, Depth, Q);
+  // computeKnownBitsFromAssume and computeKnownBitsFromDominatingCondition
+  // strictly refines KnownZero and KnownOne. Therefore, we run them after
+  // computeKnownBitsFromOperator.
+
+  // Check whether a nearby assume intrinsic can determine some known bits.
+  computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q);
+
+  // Check whether there's a dominating condition which implies something about
+  // this value at the given context.
+  if (EnableDomConditions && Depth <= DomConditionsMaxDepth)
+    computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL, Depth,
+                                            Q);
 
   assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
 }
diff --git a/contrib/llvm/lib/AsmParser/LLLexer.cpp b/contrib/llvm/lib/AsmParser/LLLexer.cpp
index 09fe6c0..0bdc350 100644
--- a/contrib/llvm/lib/AsmParser/LLLexer.cpp
+++ b/contrib/llvm/lib/AsmParser/LLLexer.cpp
@@ -628,6 +628,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(ssp);
   KEYWORD(sspreq);
   KEYWORD(sspstrong);
+  KEYWORD(safestack);
   KEYWORD(sanitize_address);
   KEYWORD(sanitize_thread);
   KEYWORD(sanitize_memory);
diff --git a/contrib/llvm/lib/AsmParser/LLParser.cpp b/contrib/llvm/lib/AsmParser/LLParser.cpp
index 681af2a..a121e59 100644
--- a/contrib/llvm/lib/AsmParser/LLParser.cpp
+++ b/contrib/llvm/lib/AsmParser/LLParser.cpp
@@ -670,6 +670,9 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, unsigned L,
   GA->setDLLStorageClass((GlobalValue::DLLStorageClassTypes)DLLStorageClass);
   GA->setUnnamedAddr(UnnamedAddr);
 
+  if (Name.empty())
+    NumberedVals.push_back(GA.get());
+
   // See if this value already exists in the symbol table.  If so, it is either
   // a redefinition or a definition of a forward reference.
   if (GlobalValue *Val = M->getNamedValue(Name)) {
@@ -958,6 +961,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
     case lltok::kw_ssp:               B.addAttribute(Attribute::StackProtect); break;
     case lltok::kw_sspreq:            B.addAttribute(Attribute::StackProtectReq); break;
     case lltok::kw_sspstrong:         B.addAttribute(Attribute::StackProtectStrong); break;
+    case lltok::kw_safestack:         B.addAttribute(Attribute::SafeStack); break;
     case lltok::kw_sanitize_address:  B.addAttribute(Attribute::SanitizeAddress); break;
     case lltok::kw_sanitize_thread:   B.addAttribute(Attribute::SanitizeThread); break;
     case lltok::kw_sanitize_memory:   B.addAttribute(Attribute::SanitizeMemory); break;
@@ -1267,6 +1271,7 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
     case lltok::kw_ssp:
     case lltok::kw_sspreq:
     case lltok::kw_sspstrong:
+    case lltok::kw_safestack:
     case lltok::kw_uwtable:
       HaveError |= Error(Lex.getLoc(), "invalid use of function-only attribute");
       break;
@@ -1343,6 +1348,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
     case lltok::kw_ssp:
     case lltok::kw_sspreq:
     case lltok::kw_sspstrong:
+    case lltok::kw_safestack:
     case lltok::kw_uwtable:
       HaveError |= Error(Lex.getLoc(), "invalid use of function-only attribute");
       break;
@@ -4051,7 +4057,7 @@ bool LLParser::ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
 /// FunctionHeader
 ///   ::= OptionalLinkage OptionalVisibility OptionalCallingConv OptRetAttrs
 ///       OptUnnamedAddr Type GlobalName '(' ArgList ')' OptFuncAttrs OptSection
-///       OptionalAlign OptGC OptionalPrefix OptionalPrologue
+///       OptionalAlign OptGC OptionalPrefix OptionalPrologue OptPersonalityFn
 bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   // Parse the linkage.
   LocTy LinkageLoc = Lex.getLoc();
@@ -4133,6 +4139,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   LocTy UnnamedAddrLoc;
   Constant *Prefix = nullptr;
   Constant *Prologue = nullptr;
+  Constant *PersonalityFn = nullptr;
   Comdat *C;
 
   if (ParseArgumentList(ArgList, isVarArg) ||
@@ -4149,7 +4156,9 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
       (EatIfPresent(lltok::kw_prefix) &&
        ParseGlobalTypeAndValue(Prefix)) ||
       (EatIfPresent(lltok::kw_prologue) &&
-       ParseGlobalTypeAndValue(Prologue)))
+       ParseGlobalTypeAndValue(Prologue)) ||
+      (EatIfPresent(lltok::kw_personality) &&
+       ParseGlobalTypeAndValue(PersonalityFn)))
     return true;
 
   if (FuncAttrs.contains(Attribute::Builtin))
@@ -4248,6 +4257,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   Fn->setAlignment(Alignment);
   Fn->setSection(Section);
   Fn->setComdat(C);
+  Fn->setPersonalityFn(PersonalityFn);
   if (!GC.empty()) Fn->setGC(GC.c_str());
   Fn->setPrefixData(Prefix);
   Fn->setPrologueData(Prologue);
@@ -5099,14 +5109,11 @@ int LLParser::ParsePHI(Instruction *&Inst, PerFunctionState &PFS) {
 ///   ::= 'filter' TypeAndValue ( ',' TypeAndValue )*
 bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) {
   Type *Ty = nullptr; LocTy TyLoc;
-  Value *PersFn; LocTy PersFnLoc;
 
-  if (ParseType(Ty, TyLoc) ||
-      ParseToken(lltok::kw_personality, "expected 'personality'") ||
-      ParseTypeAndValue(PersFn, PersFnLoc, PFS))
+  if (ParseType(Ty, TyLoc))
     return true;
 
-  std::unique_ptr<LandingPadInst> LP(LandingPadInst::Create(Ty, PersFn, 0));
+  std::unique_ptr<LandingPadInst> LP(LandingPadInst::Create(Ty, 0));
   LP->setCleanup(EatIfPresent(lltok::kw_cleanup));
 
   while (Lex.getKind() == lltok::kw_catch || Lex.getKind() == lltok::kw_filter){
diff --git a/contrib/llvm/lib/AsmParser/LLParser.h b/contrib/llvm/lib/AsmParser/LLParser.h
index a43a4b0..9f554c0 100644
--- a/contrib/llvm/lib/AsmParser/LLParser.h
+++ b/contrib/llvm/lib/AsmParser/LLParser.h
@@ -469,6 +469,6 @@ namespace llvm {
     bool ParseUseListOrderIndexes(SmallVectorImpl<unsigned> &Indexes);
     bool sortUseListOrder(Value *V, ArrayRef<unsigned> Indexes, SMLoc Loc);
   };
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/AsmParser/LLToken.h b/contrib/llvm/lib/AsmParser/LLToken.h
index c47f5e1..2487d12 100644
--- a/contrib/llvm/lib/AsmParser/LLToken.h
+++ b/contrib/llvm/lib/AsmParser/LLToken.h
@@ -135,6 +135,7 @@ namespace lltok {
     kw_ssp,
     kw_sspreq,
     kw_sspstrong,
+    kw_safestack,
     kw_sret,
     kw_sanitize_thread,
     kw_sanitize_memory,
diff --git a/contrib/llvm/lib/Bitcode/Reader/BitReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitReader.cpp
index 868fbf0..289c76e 100644
--- a/contrib/llvm/lib/Bitcode/Reader/BitReader.cpp
+++ b/contrib/llvm/lib/Bitcode/Reader/BitReader.cpp
@@ -39,7 +39,7 @@ LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
   raw_string_ostream Stream(Message);
   DiagnosticPrinterRawOStream DP(Stream);
 
-  ErrorOr<Module *> ModuleOrErr = parseBitcodeFile(
+  ErrorOr<std::unique_ptr<Module>> ModuleOrErr = parseBitcodeFile(
       Buf, Ctx, [&](const DiagnosticInfo &DI) { DI.print(DP); });
   if (ModuleOrErr.getError()) {
     if (OutMessage) {
@@ -50,7 +50,7 @@ LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
     return 1;
   }
 
-  *OutModule = wrap(ModuleOrErr.get());
+  *OutModule = wrap(ModuleOrErr.get().release());
   return 0;
 }
 
@@ -64,7 +64,7 @@ LLVMBool LLVMGetBitcodeModuleInContext(LLVMContextRef ContextRef,
   std::string Message;
   std::unique_ptr<MemoryBuffer> Owner(unwrap(MemBuf));
 
-  ErrorOr<Module *> ModuleOrErr =
+  ErrorOr<std::unique_ptr<Module>> ModuleOrErr =
       getLazyBitcodeModule(std::move(Owner), *unwrap(ContextRef));
   Owner.release();
 
@@ -75,7 +75,7 @@ LLVMBool LLVMGetBitcodeModuleInContext(LLVMContextRef ContextRef,
     return 1;
   }
 
-  *OutM = wrap(ModuleOrErr.get());
+  *OutM = wrap(ModuleOrErr.get().release());
 
   return 0;
 
diff --git a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 056d87b..0cadd6c 100644
--- a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -44,9 +44,9 @@ enum {
 class BitcodeReaderValueList {
   std::vector<WeakVH> ValuePtrs;
 
-  /// ResolveConstants - As we resolve forward-referenced constants, we add
-  /// information about them to this vector.  This allows us to resolve them in
-  /// bulk instead of resolving each reference at a time.  See the code in
+  /// As we resolve forward-referenced constants, we add information about them
+  /// to this vector.  This allows us to resolve them in bulk instead of
+  /// resolving each reference at a time.  See the code in
   /// ResolveConstantForwardRefs for more information about this.
   ///
   /// The key of this vector is the placeholder constant, the value is the slot
@@ -86,11 +86,11 @@ public:
   Constant *getConstantFwdRef(unsigned Idx, Type *Ty);
   Value *getValueFwdRef(unsigned Idx, Type *Ty);
 
-  void AssignValue(Value *V, unsigned Idx);
+  void assignValue(Value *V, unsigned Idx);
 
-  /// ResolveConstantForwardRefs - Once all constants are read, this method bulk
-  /// resolves any forward references.
-  void ResolveConstantForwardRefs();
+  /// Once all constants are read, this method bulk resolves any forward
+  /// references.
+  void resolveConstantForwardRefs();
 };
 
 class BitcodeReaderMDValueList {
@@ -125,20 +125,20 @@ public:
   }
 
   Metadata *getValueFwdRef(unsigned Idx);
-  void AssignValue(Metadata *MD, unsigned Idx);
+  void assignValue(Metadata *MD, unsigned Idx);
   void tryToResolveCycles();
 };
 
 class BitcodeReader : public GVMaterializer {
   LLVMContext &Context;
   DiagnosticHandlerFunction DiagnosticHandler;
-  Module *TheModule;
+  Module *TheModule = nullptr;
   std::unique_ptr<MemoryBuffer> Buffer;
   std::unique_ptr<BitstreamReader> StreamFile;
   BitstreamCursor Stream;
-  DataStreamer *LazyStreamer;
-  uint64_t NextUnreadBit;
-  bool SeenValueSymbolTable;
+  bool IsStreamed;
+  uint64_t NextUnreadBit = 0;
+  bool SeenValueSymbolTable = false;
 
   std::vector<Type*> TypeList;
   BitcodeReaderValueList ValueList;
@@ -150,19 +150,19 @@ class BitcodeReader : public GVMaterializer {
   std::vector<std::pair<GlobalAlias*, unsigned> > AliasInits;
   std::vector<std::pair<Function*, unsigned> > FunctionPrefixes;
   std::vector<std::pair<Function*, unsigned> > FunctionPrologues;
+  std::vector<std::pair<Function*, unsigned> > FunctionPersonalityFns;
 
   SmallVector<Instruction*, 64> InstsWithTBAATag;
 
-  /// MAttributes - The set of attributes by index.  Index zero in the
-  /// file is for null, and is thus not represented here.  As such all indices
-  /// are off by one.
+  /// The set of attributes by index.  Index zero in the file is for null, and
+  /// is thus not represented here.  As such all indices are off by one.
   std::vector<AttributeSet> MAttributes;
 
   /// \brief The set of attribute groups.
   std::map<unsigned, AttributeSet> MAttributeGroups;
 
-  /// FunctionBBs - While parsing a function body, this is a list of the basic
-  /// blocks for the function.
+  /// While parsing a function body, this is a list of the basic blocks for the
+  /// function.
   std::vector<BasicBlock*> FunctionBBs;
 
   // When reading the module header, this list is populated with functions that
@@ -180,11 +180,10 @@ class BitcodeReader : public GVMaterializer {
   // Several operations happen after the module header has been read, but
   // before function bodies are processed. This keeps track of whether
   // we've done this yet.
-  bool SeenFirstFunctionBody;
+  bool SeenFirstFunctionBody = false;
 
-  /// DeferredFunctionInfo - When function bodies are initially scanned, this
-  /// map contains info about where to find deferred function body in the
-  /// stream.
+  /// When function bodies are initially scanned, this map contains info about
+  /// where to find deferred function body in the stream.
   DenseMap<Function*, uint64_t> DeferredFunctionInfo;
 
   /// When Metadata block is initially scanned when parsing the module, we may
@@ -198,41 +197,40 @@ class BitcodeReader : public GVMaterializer {
   DenseMap<Function *, std::vector<BasicBlock *>> BasicBlockFwdRefs;
   std::deque<Function *> BasicBlockFwdRefQueue;
 
-  /// UseRelativeIDs - Indicates that we are using a new encoding for
-  /// instruction operands where most operands in the current
-  /// FUNCTION_BLOCK are encoded relative to the instruction number,
-  /// for a more compact encoding.  Some instruction operands are not
-  /// relative to the instruction ID: basic block numbers, and types.
-  /// Once the old style function blocks have been phased out, we would
+  /// Indicates that we are using a new encoding for instruction operands where
+  /// most operands in the current FUNCTION_BLOCK are encoded relative to the
+  /// instruction number, for a more compact encoding.  Some instruction
+  /// operands are not relative to the instruction ID: basic block numbers, and
+  /// types. Once the old style function blocks have been phased out, we would
   /// not need this flag.
-  bool UseRelativeIDs;
+  bool UseRelativeIDs = false;
 
   /// True if all functions will be materialized, negating the need to process
   /// (e.g.) blockaddress forward references.
-  bool WillMaterializeAllForwardRefs;
+  bool WillMaterializeAllForwardRefs = false;
 
   /// Functions that have block addresses taken.  This is usually empty.
   SmallPtrSet<const Function *, 4> BlockAddressesTaken;
 
   /// True if any Metadata block has been materialized.
-  bool IsMetadataMaterialized;
+  bool IsMetadataMaterialized = false;
 
   bool StripDebugInfo = false;
 
 public:
-  std::error_code Error(BitcodeError E, const Twine &Message);
-  std::error_code Error(BitcodeError E);
-  std::error_code Error(const Twine &Message);
+  std::error_code error(BitcodeError E, const Twine &Message);
+  std::error_code error(BitcodeError E);
+  std::error_code error(const Twine &Message);
 
-  explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C,
-                         DiagnosticHandlerFunction DiagnosticHandler);
-  explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C,
-                         DiagnosticHandlerFunction DiagnosticHandler);
-  ~BitcodeReader() override { FreeState(); }
+  BitcodeReader(MemoryBuffer *Buffer, LLVMContext &Context,
+                DiagnosticHandlerFunction DiagnosticHandler);
+  BitcodeReader(LLVMContext &Context,
+                DiagnosticHandlerFunction DiagnosticHandler);
+  ~BitcodeReader() override { freeState(); }
 
   std::error_code materializeForwardReferencedFunctions();
 
-  void FreeState();
+  void freeState();
 
   void releaseBuffer();
 
@@ -242,13 +240,14 @@ public:
   std::vector<StructType *> getIdentifiedStructTypes() const override;
   void dematerialize(GlobalValue *GV) override;
 
-  /// @brief Main interface to parsing a bitcode buffer.
-  /// @returns true if an error occurred.
-  std::error_code ParseBitcodeInto(Module *M,
+  /// \brief Main interface to parsing a bitcode buffer.
+  /// \returns true if an error occurred.
+  std::error_code parseBitcodeInto(std::unique_ptr<DataStreamer> Streamer,
+                                   Module *M,
                                    bool ShouldLazyLoadMetadata = false);
 
-  /// @brief Cheap mechanism to just extract module triple
-  /// @returns true if an error occurred.
+  /// \brief Cheap mechanism to just extract module triple
+  /// \returns true if an error occurred.
   ErrorOr<std::string> parseTriple();
 
   static uint64_t decodeSignRotatedValue(uint64_t V);
@@ -282,9 +281,9 @@ private:
     return AttributeSet();
   }
 
-  /// getValueTypePair - Read a value/type pair out of the specified record from
-  /// slot 'Slot'.  Increment Slot past the number of slots used in the record.
-  /// Return true on failure.
+  /// Read a value/type pair out of the specified record from slot 'Slot'.
+  /// Increment Slot past the number of slots used in the record. Return true on
+  /// failure.
   bool getValueTypePair(SmallVectorImpl<uint64_t> &Record, unsigned &Slot,
                         unsigned InstNum, Value *&ResVal) {
     if (Slot == Record.size()) return true;
@@ -306,9 +305,9 @@ private:
     return ResVal == nullptr;
   }
 
-  /// popValue - Read a value out of the specified record from slot 'Slot'.
-  /// Increment Slot past the number of slots used by the value in the record.
-  /// Return true if there is an error.
+  /// Read a value out of the specified record from slot 'Slot'. Increment Slot
+  /// past the number of slots used by the value in the record. Return true if
+  /// there is an error.
   bool popValue(SmallVectorImpl<uint64_t> &Record, unsigned &Slot,
                 unsigned InstNum, Type *Ty, Value *&ResVal) {
     if (getValue(Record, Slot, InstNum, Ty, ResVal))
@@ -318,15 +317,15 @@ private:
     return false;
   }
 
-  /// getValue -- Like popValue, but does not increment the Slot number.
+  /// Like popValue, but does not increment the Slot number.
   bool getValue(SmallVectorImpl<uint64_t> &Record, unsigned Slot,
                 unsigned InstNum, Type *Ty, Value *&ResVal) {
     ResVal = getValue(Record, Slot, InstNum, Ty);
     return ResVal == nullptr;
   }
 
-  /// getValue -- Version of getValue that returns ResVal directly,
-  /// or 0 if there is an error.
+  /// Version of getValue that returns ResVal directly, or 0 if there is an
+  /// error.
   Value *getValue(SmallVectorImpl<uint64_t> &Record, unsigned Slot,
                   unsigned InstNum, Type *Ty) {
     if (Slot == Record.size()) return nullptr;
@@ -337,7 +336,7 @@ private:
     return getFnValueByID(ValNo, Ty);
   }
 
-  /// getValueSigned -- Like getValue, but decodes signed VBRs.
+  /// Like getValue, but decodes signed VBRs.
   Value *getValueSigned(SmallVectorImpl<uint64_t> &Record, unsigned Slot,
                         unsigned InstNum, Type *Ty) {
     if (Slot == Record.size()) return nullptr;
@@ -352,29 +351,29 @@ private:
   /// corresponding alignment to use. If alignment is too large, returns
   /// a corresponding error code.
   std::error_code parseAlignmentValue(uint64_t Exponent, unsigned &Alignment);
-  std::error_code ParseAttrKind(uint64_t Code, Attribute::AttrKind *Kind);
-  std::error_code ParseModule(bool Resume, bool ShouldLazyLoadMetadata = false);
-  std::error_code ParseAttributeBlock();
-  std::error_code ParseAttributeGroupBlock();
-  std::error_code ParseTypeTable();
-  std::error_code ParseTypeTableBody();
-
-  std::error_code ParseValueSymbolTable();
-  std::error_code ParseConstants();
-  std::error_code RememberAndSkipFunctionBody();
+  std::error_code parseAttrKind(uint64_t Code, Attribute::AttrKind *Kind);
+  std::error_code parseModule(bool Resume, bool ShouldLazyLoadMetadata = false);
+  std::error_code parseAttributeBlock();
+  std::error_code parseAttributeGroupBlock();
+  std::error_code parseTypeTable();
+  std::error_code parseTypeTableBody();
+
+  std::error_code parseValueSymbolTable();
+  std::error_code parseConstants();
+  std::error_code rememberAndSkipFunctionBody();
   /// Save the positions of the Metadata blocks and skip parsing the blocks.
   std::error_code rememberAndSkipMetadata();
-  std::error_code ParseFunctionBody(Function *F);
-  std::error_code GlobalCleanup();
-  std::error_code ResolveGlobalAndAliasInits();
-  std::error_code ParseMetadata();
-  std::error_code ParseMetadataAttachment(Function &F);
+  std::error_code parseFunctionBody(Function *F);
+  std::error_code globalCleanup();
+  std::error_code resolveGlobalAndAliasInits();
+  std::error_code parseMetadata();
+  std::error_code parseMetadataAttachment(Function &F);
   ErrorOr<std::string> parseModuleTriple();
-  std::error_code ParseUseLists();
-  std::error_code InitStream();
-  std::error_code InitStreamFromBuffer();
-  std::error_code InitLazyStream();
-  std::error_code FindFunctionInStream(
+  std::error_code parseUseLists();
+  std::error_code initStream(std::unique_ptr<DataStreamer> Streamer);
+  std::error_code initStreamFromBuffer();
+  std::error_code initLazyStream(std::unique_ptr<DataStreamer> Streamer);
+  std::error_code findFunctionInStream(
       Function *F,
       DenseMap<Function *, uint64_t>::iterator DeferredFunctionInfoIterator);
 };
@@ -387,35 +386,35 @@ BitcodeDiagnosticInfo::BitcodeDiagnosticInfo(std::error_code EC,
 
 void BitcodeDiagnosticInfo::print(DiagnosticPrinter &DP) const { DP << Msg; }
 
-static std::error_code Error(DiagnosticHandlerFunction DiagnosticHandler,
+static std::error_code error(DiagnosticHandlerFunction DiagnosticHandler,
                              std::error_code EC, const Twine &Message) {
   BitcodeDiagnosticInfo DI(EC, DS_Error, Message);
   DiagnosticHandler(DI);
   return EC;
 }
 
-static std::error_code Error(DiagnosticHandlerFunction DiagnosticHandler,
+static std::error_code error(DiagnosticHandlerFunction DiagnosticHandler,
                              std::error_code EC) {
-  return Error(DiagnosticHandler, EC, EC.message());
+  return error(DiagnosticHandler, EC, EC.message());
 }
 
-static std::error_code Error(DiagnosticHandlerFunction DiagnosticHandler,
+static std::error_code error(DiagnosticHandlerFunction DiagnosticHandler,
                              const Twine &Message) {
-  return Error(DiagnosticHandler,
+  return error(DiagnosticHandler,
                make_error_code(BitcodeError::CorruptedBitcode), Message);
 }
 
-std::error_code BitcodeReader::Error(BitcodeError E, const Twine &Message) {
-  return ::Error(DiagnosticHandler, make_error_code(E), Message);
+std::error_code BitcodeReader::error(BitcodeError E, const Twine &Message) {
+  return ::error(DiagnosticHandler, make_error_code(E), Message);
 }
 
-std::error_code BitcodeReader::Error(const Twine &Message) {
-  return ::Error(DiagnosticHandler,
+std::error_code BitcodeReader::error(const Twine &Message) {
+  return ::error(DiagnosticHandler,
                  make_error_code(BitcodeError::CorruptedBitcode), Message);
 }
 
-std::error_code BitcodeReader::Error(BitcodeError E) {
-  return ::Error(DiagnosticHandler, make_error_code(E));
+std::error_code BitcodeReader::error(BitcodeError E) {
+  return ::error(DiagnosticHandler, make_error_code(E));
 }
 
 static DiagnosticHandlerFunction getDiagHandler(DiagnosticHandlerFunction F,
@@ -425,21 +424,19 @@ static DiagnosticHandlerFunction getDiagHandler(DiagnosticHandlerFunction F,
   return [&C](const DiagnosticInfo &DI) { C.diagnose(DI); };
 }
 
-BitcodeReader::BitcodeReader(MemoryBuffer *buffer, LLVMContext &C,
+BitcodeReader::BitcodeReader(MemoryBuffer *Buffer, LLVMContext &Context,
                              DiagnosticHandlerFunction DiagnosticHandler)
-    : Context(C), DiagnosticHandler(getDiagHandler(DiagnosticHandler, C)),
-      TheModule(nullptr), Buffer(buffer), LazyStreamer(nullptr),
-      NextUnreadBit(0), SeenValueSymbolTable(false), ValueList(C),
-      MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false),
-      WillMaterializeAllForwardRefs(false), IsMetadataMaterialized(false) {}
+    : Context(Context),
+      DiagnosticHandler(getDiagHandler(DiagnosticHandler, Context)),
+      Buffer(Buffer), IsStreamed(false), ValueList(Context),
+      MDValueList(Context) {}
 
-BitcodeReader::BitcodeReader(DataStreamer *streamer, LLVMContext &C,
+BitcodeReader::BitcodeReader(LLVMContext &Context,
                              DiagnosticHandlerFunction DiagnosticHandler)
-    : Context(C), DiagnosticHandler(getDiagHandler(DiagnosticHandler, C)),
-      TheModule(nullptr), Buffer(nullptr), LazyStreamer(streamer),
-      NextUnreadBit(0), SeenValueSymbolTable(false), ValueList(C),
-      MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false),
-      WillMaterializeAllForwardRefs(false), IsMetadataMaterialized(false) {}
+    : Context(Context),
+      DiagnosticHandler(getDiagHandler(DiagnosticHandler, Context)),
+      Buffer(nullptr), IsStreamed(true), ValueList(Context),
+      MDValueList(Context) {}
 
 std::error_code BitcodeReader::materializeForwardReferencedFunctions() {
   if (WillMaterializeAllForwardRefs)
@@ -461,7 +458,7 @@ std::error_code BitcodeReader::materializeForwardReferencedFunctions() {
     // isn't a trivial way to check if a function will have a body without a
     // linear search through FunctionsWithBodies, so just check it here.
     if (!F->isMaterializable())
-      return Error("Never resolved function from blockaddress");
+      return error("Never resolved function from blockaddress");
 
     // Try to materialize F.
     if (std::error_code EC = materialize(F))
@@ -474,7 +471,7 @@ std::error_code BitcodeReader::materializeForwardReferencedFunctions() {
   return std::error_code();
 }
 
-void BitcodeReader::FreeState() {
+void BitcodeReader::freeState() {
   Buffer = nullptr;
   std::vector<Type*>().swap(TypeList);
   ValueList.clear();
@@ -496,10 +493,9 @@ void BitcodeReader::FreeState() {
 //  Helper functions to implement forward reference resolution, etc.
 //===----------------------------------------------------------------------===//
 
-/// ConvertToString - Convert a string from a record into an std::string, return
-/// true on failure.
-template<typename StrTy>
-static bool ConvertToString(ArrayRef<uint64_t> Record, unsigned Idx,
+/// Convert a string from a record into an std::string, return true on failure.
+template <typename StrTy>
+static bool convertToString(ArrayRef<uint64_t> Record, unsigned Idx,
                             StrTy &Result) {
   if (Idx > Record.size())
     return true;
@@ -563,7 +559,7 @@ static GlobalValue::LinkageTypes getDecodedLinkage(unsigned Val) {
   }
 }
 
-static GlobalValue::VisibilityTypes GetDecodedVisibility(unsigned Val) {
+static GlobalValue::VisibilityTypes getDecodedVisibility(unsigned Val) {
   switch (Val) {
   default: // Map unknown visibilities to default.
   case 0: return GlobalValue::DefaultVisibility;
@@ -573,7 +569,7 @@ static GlobalValue::VisibilityTypes GetDecodedVisibility(unsigned Val) {
 }
 
 static GlobalValue::DLLStorageClassTypes
-GetDecodedDLLStorageClass(unsigned Val) {
+getDecodedDLLStorageClass(unsigned Val) {
   switch (Val) {
   default: // Map unknown values to default.
   case 0: return GlobalValue::DefaultStorageClass;
@@ -582,7 +578,7 @@ GetDecodedDLLStorageClass(unsigned Val) {
   }
 }
 
-static GlobalVariable::ThreadLocalMode GetDecodedThreadLocalMode(unsigned Val) {
+static GlobalVariable::ThreadLocalMode getDecodedThreadLocalMode(unsigned Val) {
   switch (Val) {
     case 0: return GlobalVariable::NotThreadLocal;
     default: // Map unknown non-zero value to general dynamic.
@@ -593,7 +589,7 @@ static GlobalVariable::ThreadLocalMode GetDecodedThreadLocalMode(unsigned Val) {
   }
 }
 
-static int GetDecodedCastOpcode(unsigned Val) {
+static int getDecodedCastOpcode(unsigned Val) {
   switch (Val) {
   default: return -1;
   case bitc::CAST_TRUNC   : return Instruction::Trunc;
@@ -612,7 +608,7 @@ static int GetDecodedCastOpcode(unsigned Val) {
   }
 }
 
-static int GetDecodedBinaryOpcode(unsigned Val, Type *Ty) {
+static int getDecodedBinaryOpcode(unsigned Val, Type *Ty) {
   bool IsFP = Ty->isFPOrFPVectorTy();
   // BinOps are only valid for int/fp or vector of int/fp types
   if (!IsFP && !Ty->isIntOrIntVectorTy())
@@ -650,7 +646,7 @@ static int GetDecodedBinaryOpcode(unsigned Val, Type *Ty) {
   }
 }
 
-static AtomicRMWInst::BinOp GetDecodedRMWOperation(unsigned Val) {
+static AtomicRMWInst::BinOp getDecodedRMWOperation(unsigned Val) {
   switch (Val) {
   default: return AtomicRMWInst::BAD_BINOP;
   case bitc::RMW_XCHG: return AtomicRMWInst::Xchg;
@@ -667,7 +663,7 @@ static AtomicRMWInst::BinOp GetDecodedRMWOperation(unsigned Val) {
   }
 }
 
-static AtomicOrdering GetDecodedOrdering(unsigned Val) {
+static AtomicOrdering getDecodedOrdering(unsigned Val) {
   switch (Val) {
   case bitc::ORDERING_NOTATOMIC: return NotAtomic;
   case bitc::ORDERING_UNORDERED: return Unordered;
@@ -680,7 +676,7 @@ static AtomicOrdering GetDecodedOrdering(unsigned Val) {
   }
 }
 
-static SynchronizationScope GetDecodedSynchScope(unsigned Val) {
+static SynchronizationScope getDecodedSynchScope(unsigned Val) {
   switch (Val) {
   case bitc::SYNCHSCOPE_SINGLETHREAD: return SingleThread;
   default: // Map unknown scopes to cross-thread.
@@ -704,7 +700,7 @@ static Comdat::SelectionKind getDecodedComdatSelectionKind(unsigned Val) {
   }
 }
 
-static void UpgradeDLLImportExportLinkage(llvm::GlobalValue *GV, unsigned Val) {
+static void upgradeDLLImportExportLinkage(llvm::GlobalValue *GV, unsigned Val) {
   switch (Val) {
   case 5: GV->setDLLStorageClass(GlobalValue::DLLImportStorageClass); break;
   case 6: GV->setDLLStorageClass(GlobalValue::DLLExportStorageClass); break;
@@ -713,31 +709,29 @@ static void UpgradeDLLImportExportLinkage(llvm::GlobalValue *GV, unsigned Val) {
 
 namespace llvm {
 namespace {
-  /// @brief A class for maintaining the slot number definition
-  /// as a placeholder for the actual definition for forward constants defs.
-  class ConstantPlaceHolder : public ConstantExpr {
-    void operator=(const ConstantPlaceHolder &) = delete;
-  public:
-    // allocate space for exactly one operand
-    void *operator new(size_t s) {
-      return User::operator new(s, 1);
-    }
-    explicit ConstantPlaceHolder(Type *Ty, LLVMContext& Context)
-      : ConstantExpr(Ty, Instruction::UserOp1, &Op<0>(), 1) {
-      Op<0>() = UndefValue::get(Type::getInt32Ty(Context));
-    }
+/// \brief A class for maintaining the slot number definition
+/// as a placeholder for the actual definition for forward constants defs.
+class ConstantPlaceHolder : public ConstantExpr {
+  void operator=(const ConstantPlaceHolder &) = delete;
 
-    /// @brief Methods to support type inquiry through isa, cast, and dyn_cast.
-    static bool classof(const Value *V) {
-      return isa<ConstantExpr>(V) &&
-             cast<ConstantExpr>(V)->getOpcode() == Instruction::UserOp1;
-    }
+public:
+  // allocate space for exactly one operand
+  void *operator new(size_t s) { return User::operator new(s, 1); }
+  explicit ConstantPlaceHolder(Type *Ty, LLVMContext &Context)
+      : ConstantExpr(Ty, Instruction::UserOp1, &Op<0>(), 1) {
+    Op<0>() = UndefValue::get(Type::getInt32Ty(Context));
+  }
 
+  /// \brief Methods to support type inquiry through isa, cast, and dyn_cast.
+  static bool classof(const Value *V) {
+    return isa<ConstantExpr>(V) &&
+           cast<ConstantExpr>(V)->getOpcode() == Instruction::UserOp1;
+  }
 
-    /// Provide fast operand accessors
-    DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-  };
-}
+  /// Provide fast operand accessors
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+} // namespace
 
 // FIXME: can we inherit this from ConstantExpr?
 template <>
@@ -745,10 +739,9 @@ struct OperandTraits<ConstantPlaceHolder> :
   public FixedNumOperandTraits<ConstantPlaceHolder, 1> {
 };
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ConstantPlaceHolder, Value)
-}
+} // namespace llvm
 
-
-void BitcodeReaderValueList::AssignValue(Value *V, unsigned Idx) {
+void BitcodeReaderValueList::assignValue(Value *V, unsigned Idx) {
   if (Idx == size()) {
     push_back(V);
     return;
@@ -818,14 +811,13 @@ Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, Type *Ty) {
   return V;
 }
 
-/// ResolveConstantForwardRefs - Once all constants are read, this method bulk
-/// resolves any forward references.  The idea behind this is that we sometimes
-/// get constants (such as large arrays) which reference *many* forward ref
-/// constants.  Replacing each of these causes a lot of thrashing when
-/// building/reuniquing the constant.  Instead of doing this, we look at all the
-/// uses and rewrite all the place holders at once for any constant that uses
-/// a placeholder.
-void BitcodeReaderValueList::ResolveConstantForwardRefs() {
+/// Once all constants are read, this method bulk resolves any forward
+/// references.  The idea behind this is that we sometimes get constants (such
+/// as large arrays) which reference *many* forward ref constants.  Replacing
+/// each of these causes a lot of thrashing when building/reuniquing the
+/// constant.  Instead of doing this, we look at all the uses and rewrite all
+/// the place holders at once for any constant that uses a placeholder.
+void BitcodeReaderValueList::resolveConstantForwardRefs() {
   // Sort the values by-pointer so that they are efficient to look up with a
   // binary search.
   std::sort(ResolveConstants.begin(), ResolveConstants.end());
@@ -900,7 +892,7 @@ void BitcodeReaderValueList::ResolveConstantForwardRefs() {
   }
 }
 
-void BitcodeReaderMDValueList::AssignValue(Metadata *MD, unsigned Idx) {
+void BitcodeReaderMDValueList::assignValue(Metadata *MD, unsigned Idx) {
   if (Idx == size()) {
     push_back(MD);
     return;
@@ -1019,12 +1011,12 @@ static void decodeLLVMAttributesForBitcode(AttrBuilder &B,
                 (EncodedAttrs & 0xffff));
 }
 
-std::error_code BitcodeReader::ParseAttributeBlock() {
+std::error_code BitcodeReader::parseAttributeBlock() {
   if (Stream.EnterSubBlock(bitc::PARAMATTR_BLOCK_ID))
-    return Error("Invalid record");
+    return error("Invalid record");
 
   if (!MAttributes.empty())
-    return Error("Invalid multiple blocks");
+    return error("Invalid multiple blocks");
 
   SmallVector<uint64_t, 64> Record;
 
@@ -1037,7 +1029,7 @@ std::error_code BitcodeReader::ParseAttributeBlock() {
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
     case BitstreamEntry::Error:
-      return Error("Malformed block");
+      return error("Malformed block");
     case BitstreamEntry::EndBlock:
       return std::error_code();
     case BitstreamEntry::Record:
@@ -1053,7 +1045,7 @@ std::error_code BitcodeReader::ParseAttributeBlock() {
     case bitc::PARAMATTR_CODE_ENTRY_OLD: { // ENTRY: [paramidx0, attr0, ...]
       // FIXME: Remove in 4.0.
       if (Record.size() & 1)
-        return Error("Invalid record");
+        return error("Invalid record");
 
       for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
         AttrBuilder B;
@@ -1078,7 +1070,7 @@ std::error_code BitcodeReader::ParseAttributeBlock() {
 }
 
 // Returns Attribute::None on unrecognized codes.
-static Attribute::AttrKind GetAttrFromCode(uint64_t Code) {
+static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
   switch (Code) {
   default:
     return Attribute::None;
@@ -1156,6 +1148,8 @@ static Attribute::AttrKind GetAttrFromCode(uint64_t Code) {
     return Attribute::StackProtectReq;
   case bitc::ATTR_KIND_STACK_PROTECT_STRONG:
     return Attribute::StackProtectStrong;
+  case bitc::ATTR_KIND_SAFESTACK:
+    return Attribute::SafeStack;
   case bitc::ATTR_KIND_STRUCT_RET:
     return Attribute::StructRet;
   case bitc::ATTR_KIND_SANITIZE_ADDRESS:
@@ -1176,26 +1170,26 @@ std::error_code BitcodeReader::parseAlignmentValue(uint64_t Exponent,
   // Note: Alignment in bitcode files is incremented by 1, so that zero
   // can be used for default alignment.
   if (Exponent > Value::MaxAlignmentExponent + 1)
-    return Error("Invalid alignment value");
+    return error("Invalid alignment value");
   Alignment = (1 << static_cast<unsigned>(Exponent)) >> 1;
   return std::error_code();
 }
 
-std::error_code BitcodeReader::ParseAttrKind(uint64_t Code,
+std::error_code BitcodeReader::parseAttrKind(uint64_t Code,
                                              Attribute::AttrKind *Kind) {
-  *Kind = GetAttrFromCode(Code);
+  *Kind = getAttrFromCode(Code);
   if (*Kind == Attribute::None)
-    return Error(BitcodeError::CorruptedBitcode,
+    return error(BitcodeError::CorruptedBitcode,
                  "Unknown attribute kind (" + Twine(Code) + ")");
   return std::error_code();
 }
 
-std::error_code BitcodeReader::ParseAttributeGroupBlock() {
+std::error_code BitcodeReader::parseAttributeGroupBlock() {
   if (Stream.EnterSubBlock(bitc::PARAMATTR_GROUP_BLOCK_ID))
-    return Error("Invalid record");
+    return error("Invalid record");
 
   if (!MAttributeGroups.empty())
-    return Error("Invalid multiple blocks");
+    return error("Invalid multiple blocks");
 
   SmallVector<uint64_t, 64> Record;
 
@@ -1206,7 +1200,7 @@ std::error_code BitcodeReader::ParseAttributeGroupBlock() {
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
     case BitstreamEntry::Error:
-      return Error("Malformed block");
+      return error("Malformed block");
     case BitstreamEntry::EndBlock:
       return std::error_code();
     case BitstreamEntry::Record:
@@ -1221,7 +1215,7 @@ std::error_code BitcodeReader::ParseAttributeGroupBlock() {
       break;
     case bitc::PARAMATTR_GRP_CODE_ENTRY: { // ENTRY: [grpid, idx, a0, a1, ...]
       if (Record.size() < 3)
-        return Error("Invalid record");
+        return error("Invalid record");
 
       uint64_t GrpID = Record[0];
       uint64_t Idx = Record[1]; // Index of the object this attribute refers to.
@@ -1230,13 +1224,13 @@ std::error_code BitcodeReader::ParseAttributeGroupBlock() {
       for (unsigned i = 2, e = Record.size(); i != e; ++i) {
         if (Record[i] == 0) {        // Enum attribute
           Attribute::AttrKind Kind;
-          if (std::error_code EC = ParseAttrKind(Record[++i], &Kind))
+          if (std::error_code EC = parseAttrKind(Record[++i], &Kind))
             return EC;
 
           B.addAttribute(Kind);
         } else if (Record[i] == 1) { // Integer attribute
           Attribute::AttrKind Kind;
-          if (std::error_code EC = ParseAttrKind(Record[++i], &Kind))
+          if (std::error_code EC = parseAttrKind(Record[++i], &Kind))
             return EC;
           if (Kind == Attribute::Alignment)
             B.addAlignmentAttr(Record[++i]);
@@ -1276,16 +1270,16 @@ std::error_code BitcodeReader::ParseAttributeGroupBlock() {
   }
 }
 
-std::error_code BitcodeReader::ParseTypeTable() {
+std::error_code BitcodeReader::parseTypeTable() {
   if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID_NEW))
-    return Error("Invalid record");
+    return error("Invalid record");
 
-  return ParseTypeTableBody();
+  return parseTypeTableBody();
 }
 
-std::error_code BitcodeReader::ParseTypeTableBody() {
+std::error_code BitcodeReader::parseTypeTableBody() {
   if (!TypeList.empty())
-    return Error("Invalid multiple blocks");
+    return error("Invalid multiple blocks");
 
   SmallVector<uint64_t, 64> Record;
   unsigned NumRecords = 0;
@@ -1299,10 +1293,10 @@ std::error_code BitcodeReader::ParseTypeTableBody() {
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
     case BitstreamEntry::Error:
-      return Error("Malformed block");
+      return error("Malformed block");
     case BitstreamEntry::EndBlock:
       if (NumRecords != TypeList.size())
-        return Error("Malformed block");
+        return error("Malformed block");
       return std::error_code();
     case BitstreamEntry::Record:
       // The interesting case.
@@ -1314,12 +1308,12 @@ std::error_code BitcodeReader::ParseTypeTableBody() {
     Type *ResultTy = nullptr;
     switch (Stream.readRecord(Entry.ID, Record)) {
     default:
-      return Error("Invalid value");
+      return error("Invalid value");
     case bitc::TYPE_CODE_NUMENTRY: // TYPE_CODE_NUMENTRY: [numentries]
       // TYPE_CODE_NUMENTRY contains a count of the number of types in the
       // type list.  This allows us to reserve space.
       if (Record.size() < 1)
-        return Error("Invalid record");
+        return error("Invalid record");
       TypeList.resize(Record[0]);
       continue;
     case bitc::TYPE_CODE_VOID:      // VOID
@@ -1354,26 +1348,26 @@ std::error_code BitcodeReader::ParseTypeTableBody() {
       break;
     case bitc::TYPE_CODE_INTEGER: { // INTEGER: [width]
       if (Record.size() < 1)
-        return Error("Invalid record");
+        return error("Invalid record");
 
       uint64_t NumBits = Record[0];
       if (NumBits < IntegerType::MIN_INT_BITS ||
           NumBits > IntegerType::MAX_INT_BITS)
-        return Error("Bitwidth for integer type out of range");
+        return error("Bitwidth for integer type out of range");
       ResultTy = IntegerType::get(Context, NumBits);
       break;
     }
     case bitc::TYPE_CODE_POINTER: { // POINTER: [pointee type] or
                                     //          [pointee type, address space]
       if (Record.size() < 1)
-        return Error("Invalid record");
+        return error("Invalid record");
       unsigned AddressSpace = 0;
       if (Record.size() == 2)
         AddressSpace = Record[1];
       ResultTy = getTypeByID(Record[0]);
       if (!ResultTy ||
           !PointerType::isValidElementType(ResultTy))
-        return Error("Invalid type");
+        return error("Invalid type");
       ResultTy = PointerType::get(ResultTy, AddressSpace);
       break;
     }
@@ -1381,7 +1375,7 @@ std::error_code BitcodeReader::ParseTypeTableBody() {
       // FIXME: attrid is dead, remove it in LLVM 4.0
       // FUNCTION: [vararg, attrid, retty, paramty x N]
       if (Record.size() < 3)
-        return Error("Invalid record");
+        return error("Invalid record");
       SmallVector<Type*, 8> ArgTys;
       for (unsigned i = 3, e = Record.size(); i != e; ++i) {
         if (Type *T = getTypeByID(Record[i]))
@@ -1392,7 +1386,7 @@ std::error_code BitcodeReader::ParseTypeTableBody() {
 
       ResultTy = getTypeByID(Record[2]);
       if (!ResultTy || ArgTys.size() < Record.size()-3)
-        return Error("Invalid type");
+        return error("Invalid type");
 
       ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]);
       break;
@@ -1400,12 +1394,12 @@ std::error_code BitcodeReader::ParseTypeTableBody() {
     case bitc::TYPE_CODE_FUNCTION: {
       // FUNCTION: [vararg, retty, paramty x N]
       if (Record.size() < 2)
-        return Error("Invalid record");
+        return error("Invalid record");
       SmallVector<Type*, 8> ArgTys;
       for (unsigned i = 2, e = Record.size(); i != e; ++i) {
         if (Type *T = getTypeByID(Record[i])) {
           if (!FunctionType::isValidArgumentType(T))
-            return Error("Invalid function argument type");
+            return error("Invalid function argument type");
           ArgTys.push_back(T);
         }
         else
@@ -1414,14 +1408,14 @@ std::error_code BitcodeReader::ParseTypeTableBody() {
 
       ResultTy = getTypeByID(Record[1]);
       if (!ResultTy || ArgTys.size() < Record.size()-2)
-        return Error("Invalid type");
+        return error("Invalid type");
 
       ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]);
       break;
     }
     case bitc::TYPE_CODE_STRUCT_ANON: {  // STRUCT: [ispacked, eltty x N]
       if (Record.size() < 1)
-        return Error("Invalid record");
+        return error("Invalid record");
       SmallVector<Type*, 8> EltTys;
       for (unsigned i = 1, e = Record.size(); i != e; ++i) {
         if (Type *T = getTypeByID(Record[i]))
@@ -1430,21 +1424,21 @@ std::error_code BitcodeReader::ParseTypeTableBody() {
           break;
       }
       if (EltTys.size() != Record.size()-1)
-        return Error("Invalid type");
+        return error("Invalid type");
       ResultTy = StructType::get(Context, EltTys, Record[0]);
       break;
     }
     case bitc::TYPE_CODE_STRUCT_NAME:   // STRUCT_NAME: [strchr x N]
-      if (ConvertToString(Record, 0, TypeName))
-        return Error("Invalid record");
+      if (convertToString(Record, 0, TypeName))
+        return error("Invalid record");
       continue;
 
     case bitc::TYPE_CODE_STRUCT_NAMED: { // STRUCT: [ispacked, eltty x N]
       if (Record.size() < 1)
-        return Error("Invalid record");
+        return error("Invalid record");
 
       if (NumRecords >= TypeList.size())
-        return Error("Invalid TYPE table");
+        return error("Invalid TYPE table");
 
       // Check to see if this was forward referenced, if so fill in the temp.
       StructType *Res = cast_or_null<StructType>(TypeList[NumRecords]);
@@ -1463,17 +1457,17 @@ std::error_code BitcodeReader::ParseTypeTableBody() {
           break;
       }
       if (EltTys.size() != Record.size()-1)
-        return Error("Invalid record");
+        return error("Invalid record");
       Res->setBody(EltTys, Record[0]);
       ResultTy = Res;
       break;
     }
     case bitc::TYPE_CODE_OPAQUE: {       // OPAQUE: []
       if (Record.size() != 1)
-        return Error("Invalid record");
+        return error("Invalid record");
 
       if (NumRecords >= TypeList.size())
-        return Error("Invalid TYPE table");
+        return error("Invalid TYPE table");
 
       // Check to see if this was forward referenced, if so fill in the temp.
       StructType *Res = cast_or_null<StructType>(TypeList[NumRecords]);
@@ -1488,37 +1482,37 @@ std::error_code BitcodeReader::ParseTypeTableBody() {
     }
     case bitc::TYPE_CODE_ARRAY:     // ARRAY: [numelts, eltty]
       if (Record.size() < 2)
-        return Error("Invalid record");
+        return error("Invalid record");
       ResultTy = getTypeByID(Record[1]);
       if (!ResultTy || !ArrayType::isValidElementType(ResultTy))
-        return Error("Invalid type");
+        return error("Invalid type");
       ResultTy = ArrayType::get(ResultTy, Record[0]);
       break;
     case bitc::TYPE_CODE_VECTOR:    // VECTOR: [numelts, eltty]
       if (Record.size() < 2)
-        return Error("Invalid record");
+        return error("Invalid record");
       if (Record[0] == 0)
-        return Error("Invalid vector length");
+        return error("Invalid vector length");
       ResultTy = getTypeByID(Record[1]);
       if (!ResultTy || !StructType::isValidElementType(ResultTy))
-        return Error("Invalid type");
+        return error("Invalid type");
       ResultTy = VectorType::get(ResultTy, Record[0]);
       break;
     }
 
     if (NumRecords >= TypeList.size())
-      return Error("Invalid TYPE table");
+      return error("Invalid TYPE table");
     if (TypeList[NumRecords])
-      return Error(
+      return error(
           "Invalid TYPE table: Only named structs can be forward referenced");
     assert(ResultTy && "Didn't read a type?");
     TypeList[NumRecords++] = ResultTy;
   }
 }
 
-std::error_code BitcodeReader::ParseValueSymbolTable() {
+std::error_code BitcodeReader::parseValueSymbolTable() {
   if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
-    return Error("Invalid record");
+    return error("Invalid record");
 
   SmallVector<uint64_t, 64> Record;
 
@@ -1532,7 +1526,7 @@ std::error_code BitcodeReader::ParseValueSymbolTable() {
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
     case BitstreamEntry::Error:
-      return Error("Malformed block");
+      return error("Malformed block");
     case BitstreamEntry::EndBlock:
       return std::error_code();
     case BitstreamEntry::Record:
@@ -1546,11 +1540,11 @@ std::error_code BitcodeReader::ParseValueSymbolTable() {
     default:  // Default behavior: unknown type.
       break;
     case bitc::VST_CODE_ENTRY: {  // VST_ENTRY: [valueid, namechar x N]
-      if (ConvertToString(Record, 1, ValueName))
-        return Error("Invalid record");
+      if (convertToString(Record, 1, ValueName))
+        return error("Invalid record");
       unsigned ValueID = Record[0];
       if (ValueID >= ValueList.size() || !ValueList[ValueID])
-        return Error("Invalid record");
+        return error("Invalid record");
       Value *V = ValueList[ValueID];
 
       V->setName(StringRef(ValueName.data(), ValueName.size()));
@@ -1566,11 +1560,11 @@ std::error_code BitcodeReader::ParseValueSymbolTable() {
       break;
     }
     case bitc::VST_CODE_BBENTRY: {
-      if (ConvertToString(Record, 1, ValueName))
-        return Error("Invalid record");
+      if (convertToString(Record, 1, ValueName))
+        return error("Invalid record");
       BasicBlock *BB = getBasicBlock(Record[0]);
       if (!BB)
-        return Error("Invalid record");
+        return error("Invalid record");
 
       BB->setName(StringRef(ValueName.data(), ValueName.size()));
       ValueName.clear();
@@ -1582,12 +1576,12 @@ std::error_code BitcodeReader::ParseValueSymbolTable() {
 
 static int64_t unrotateSign(uint64_t U) { return U & 1 ? ~(U >> 1) : U >> 1; }
 
-std::error_code BitcodeReader::ParseMetadata() {
+std::error_code BitcodeReader::parseMetadata() {
   IsMetadataMaterialized = true;
   unsigned NextMDValueNo = MDValueList.size();
 
   if (Stream.EnterSubBlock(bitc::METADATA_BLOCK_ID))
-    return Error("Invalid record");
+    return error("Invalid record");
 
   SmallVector<uint64_t, 64> Record;
 
@@ -1614,7 +1608,7 @@ std::error_code BitcodeReader::ParseMetadata() {
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
     case BitstreamEntry::Error:
-      return Error("Malformed block");
+      return error("Malformed block");
     case BitstreamEntry::EndBlock:
       MDValueList.tryToResolveCycles();
       return std::error_code();
@@ -1638,7 +1632,7 @@ std::error_code BitcodeReader::ParseMetadata() {
 
       unsigned NextBitCode = Stream.readRecord(Code, Record);
       if (NextBitCode != bitc::METADATA_NAMED_NODE)
-        return Error("METADATA_NAME not followed by METADATA_NAMED_NODE");
+        return error("METADATA_NAME not followed by METADATA_NAMED_NODE");
 
       // Read named metadata elements.
       unsigned Size = Record.size();
@@ -1646,7 +1640,7 @@ std::error_code BitcodeReader::ParseMetadata() {
       for (unsigned i = 0; i != Size; ++i) {
         MDNode *MD = dyn_cast_or_null<MDNode>(MDValueList.getValueFwdRef(Record[i]));
         if (!MD)
-          return Error("Invalid record");
+          return error("Invalid record");
         NMD->addOperand(MD);
       }
       break;
@@ -1656,12 +1650,12 @@ std::error_code BitcodeReader::ParseMetadata() {
       // This is a LocalAsMetadata record, the only type of function-local
       // metadata.
       if (Record.size() % 2 == 1)
-        return Error("Invalid record");
+        return error("Invalid record");
 
       // If this isn't a LocalAsMetadata record, we're dropping it.  This used
       // to be legal, but there's no upgrade path.
       auto dropRecord = [&] {
-        MDValueList.AssignValue(MDNode::get(Context, None), NextMDValueNo++);
+        MDValueList.assignValue(MDNode::get(Context, None), NextMDValueNo++);
       };
       if (Record.size() != 2) {
         dropRecord();
@@ -1674,7 +1668,7 @@ std::error_code BitcodeReader::ParseMetadata() {
         break;
       }
 
-      MDValueList.AssignValue(
+      MDValueList.assignValue(
           LocalAsMetadata::get(ValueList.getValueFwdRef(Record[1], Ty)),
           NextMDValueNo++);
       break;
@@ -1682,14 +1676,14 @@ std::error_code BitcodeReader::ParseMetadata() {
     case bitc::METADATA_OLD_NODE: {
       // FIXME: Remove in 4.0.
       if (Record.size() % 2 == 1)
-        return Error("Invalid record");
+        return error("Invalid record");
 
       unsigned Size = Record.size();
       SmallVector<Metadata *, 8> Elts;
       for (unsigned i = 0; i != Size; i += 2) {
         Type *Ty = getTypeByID(Record[i]);
         if (!Ty)
-          return Error("Invalid record");
+          return error("Invalid record");
         if (Ty->isMetadataTy())
           Elts.push_back(MDValueList.getValueFwdRef(Record[i+1]));
         else if (!Ty->isVoidTy()) {
@@ -1701,18 +1695,18 @@ std::error_code BitcodeReader::ParseMetadata() {
         } else
           Elts.push_back(nullptr);
       }
-      MDValueList.AssignValue(MDNode::get(Context, Elts), NextMDValueNo++);
+      MDValueList.assignValue(MDNode::get(Context, Elts), NextMDValueNo++);
       break;
     }
     case bitc::METADATA_VALUE: {
       if (Record.size() != 2)
-        return Error("Invalid record");
+        return error("Invalid record");
 
       Type *Ty = getTypeByID(Record[0]);
       if (Ty->isMetadataTy() || Ty->isVoidTy())
-        return Error("Invalid record");
+        return error("Invalid record");
 
-      MDValueList.AssignValue(
+      MDValueList.assignValue(
           ValueAsMetadata::get(ValueList.getValueFwdRef(Record[1], Ty)),
           NextMDValueNo++);
       break;
@@ -1725,21 +1719,21 @@ std::error_code BitcodeReader::ParseMetadata() {
       Elts.reserve(Record.size());
       for (unsigned ID : Record)
         Elts.push_back(ID ? MDValueList.getValueFwdRef(ID - 1) : nullptr);
-      MDValueList.AssignValue(IsDistinct ? MDNode::getDistinct(Context, Elts)
+      MDValueList.assignValue(IsDistinct ? MDNode::getDistinct(Context, Elts)
                                          : MDNode::get(Context, Elts),
                               NextMDValueNo++);
       break;
     }
     case bitc::METADATA_LOCATION: {
       if (Record.size() != 5)
-        return Error("Invalid record");
+        return error("Invalid record");
 
       unsigned Line = Record[1];
       unsigned Column = Record[2];
       MDNode *Scope = cast<MDNode>(MDValueList.getValueFwdRef(Record[3]));
       Metadata *InlinedAt =
           Record[4] ? MDValueList.getValueFwdRef(Record[4] - 1) : nullptr;
-      MDValueList.AssignValue(
+      MDValueList.assignValue(
           GET_OR_DISTINCT(DILocation, Record[0],
                           (Context, Line, Column, Scope, InlinedAt)),
           NextMDValueNo++);
@@ -1747,29 +1741,29 @@ std::error_code BitcodeReader::ParseMetadata() {
     }
     case bitc::METADATA_GENERIC_DEBUG: {
       if (Record.size() < 4)
-        return Error("Invalid record");
+        return error("Invalid record");
 
       unsigned Tag = Record[1];
       unsigned Version = Record[2];
 
       if (Tag >= 1u << 16 || Version != 0)
-        return Error("Invalid record");
+        return error("Invalid record");
 
       auto *Header = getMDString(Record[3]);
       SmallVector<Metadata *, 8> DwarfOps;
       for (unsigned I = 4, E = Record.size(); I != E; ++I)
         DwarfOps.push_back(Record[I] ? MDValueList.getValueFwdRef(Record[I] - 1)
                                      : nullptr);
-      MDValueList.AssignValue(GET_OR_DISTINCT(GenericDINode, Record[0],
+      MDValueList.assignValue(GET_OR_DISTINCT(GenericDINode, Record[0],
                                               (Context, Tag, Header, DwarfOps)),
                               NextMDValueNo++);
       break;
     }
     case bitc::METADATA_SUBRANGE: {
       if (Record.size() != 3)
-        return Error("Invalid record");
+        return error("Invalid record");
 
-      MDValueList.AssignValue(
+      MDValueList.assignValue(
           GET_OR_DISTINCT(DISubrange, Record[0],
                           (Context, Record[1], unrotateSign(Record[2]))),
           NextMDValueNo++);
@@ -1777,9 +1771,9 @@ std::error_code BitcodeReader::ParseMetadata() {
     }
     case bitc::METADATA_ENUMERATOR: {
       if (Record.size() != 3)
-        return Error("Invalid record");
+        return error("Invalid record");
 
-      MDValueList.AssignValue(GET_OR_DISTINCT(DIEnumerator, Record[0],
+      MDValueList.assignValue(GET_OR_DISTINCT(DIEnumerator, Record[0],
                                               (Context, unrotateSign(Record[1]),
                                                getMDString(Record[2]))),
                               NextMDValueNo++);
@@ -1787,9 +1781,9 @@ std::error_code BitcodeReader::ParseMetadata() {
     }
     case bitc::METADATA_BASIC_TYPE: {
       if (Record.size() != 6)
-        return Error("Invalid record");
+        return error("Invalid record");
 
-      MDValueList.AssignValue(
+      MDValueList.assignValue(
           GET_OR_DISTINCT(DIBasicType, Record[0],
                           (Context, Record[1], getMDString(Record[2]),
                            Record[3], Record[4], Record[5])),
@@ -1798,9 +1792,9 @@ std::error_code BitcodeReader::ParseMetadata() {
     }
     case bitc::METADATA_DERIVED_TYPE: {
       if (Record.size() != 12)
-        return Error("Invalid record");
+        return error("Invalid record");
 
-      MDValueList.AssignValue(
+      MDValueList.assignValue(
           GET_OR_DISTINCT(DIDerivedType, Record[0],
                           (Context, Record[1], getMDString(Record[2]),
                            getMDOrNull(Record[3]), Record[4],
@@ -1812,9 +1806,9 @@ std::error_code BitcodeReader::ParseMetadata() {
     }
     case bitc::METADATA_COMPOSITE_TYPE: {
       if (Record.size() != 16)
-        return Error("Invalid record");
+        return error("Invalid record");
 
-      MDValueList.AssignValue(
+      MDValueList.assignValue(
           GET_OR_DISTINCT(DICompositeType, Record[0],
                           (Context, Record[1], getMDString(Record[2]),
                            getMDOrNull(Record[3]), Record[4],
@@ -1828,9 +1822,9 @@ std::error_code BitcodeReader::ParseMetadata() {
     }
     case bitc::METADATA_SUBROUTINE_TYPE: {
       if (Record.size() != 3)
-        return Error("Invalid record");
+        return error("Invalid record");
 
-      MDValueList.AssignValue(
+      MDValueList.assignValue(
           GET_OR_DISTINCT(DISubroutineType, Record[0],
                           (Context, Record[1], getMDOrNull(Record[2]))),
           NextMDValueNo++);
@@ -1838,9 +1832,9 @@ std::error_code BitcodeReader::ParseMetadata() {
     }
     case bitc::METADATA_FILE: {
       if (Record.size() != 3)
-        return Error("Invalid record");
+        return error("Invalid record");
 
-      MDValueList.AssignValue(
+      MDValueList.assignValue(
           GET_OR_DISTINCT(DIFile, Record[0], (Context, getMDString(Record[1]),
                                               getMDString(Record[2]))),
           NextMDValueNo++);
@@ -1848,26 +1842,25 @@ std::error_code BitcodeReader::ParseMetadata() {
     }
     case bitc::METADATA_COMPILE_UNIT: {
       if (Record.size() < 14 || Record.size() > 15)
-        return Error("Invalid record");
+        return error("Invalid record");
 
-      MDValueList.AssignValue(
-          GET_OR_DISTINCT(DICompileUnit, Record[0],
-                          (Context, Record[1], getMDOrNull(Record[2]),
-                           getMDString(Record[3]), Record[4],
-                           getMDString(Record[5]), Record[6],
-                           getMDString(Record[7]), Record[8],
-                           getMDOrNull(Record[9]), getMDOrNull(Record[10]),
-                           getMDOrNull(Record[11]), getMDOrNull(Record[12]),
-                           getMDOrNull(Record[13]),
-                           Record.size() == 14 ? 0 : Record[14])),
+      MDValueList.assignValue(
+          GET_OR_DISTINCT(
+              DICompileUnit, Record[0],
+              (Context, Record[1], getMDOrNull(Record[2]),
+               getMDString(Record[3]), Record[4], getMDString(Record[5]),
+               Record[6], getMDString(Record[7]), Record[8],
+               getMDOrNull(Record[9]), getMDOrNull(Record[10]),
+               getMDOrNull(Record[11]), getMDOrNull(Record[12]),
+               getMDOrNull(Record[13]), Record.size() == 14 ? 0 : Record[14])),
           NextMDValueNo++);
       break;
     }
     case bitc::METADATA_SUBPROGRAM: {
       if (Record.size() != 19)
-        return Error("Invalid record");
+        return error("Invalid record");
 
-      MDValueList.AssignValue(
+      MDValueList.assignValue(
           GET_OR_DISTINCT(
               DISubprogram, Record[0],
               (Context, getMDOrNull(Record[1]), getMDString(Record[2]),
@@ -1881,9 +1874,9 @@ std::error_code BitcodeReader::ParseMetadata() {
     }
     case bitc::METADATA_LEXICAL_BLOCK: {
       if (Record.size() != 5)
-        return Error("Invalid record");
+        return error("Invalid record");
 
-      MDValueList.AssignValue(
+      MDValueList.assignValue(
           GET_OR_DISTINCT(DILexicalBlock, Record[0],
                           (Context, getMDOrNull(Record[1]),
                            getMDOrNull(Record[2]), Record[3], Record[4])),
@@ -1892,9 +1885,9 @@ std::error_code BitcodeReader::ParseMetadata() {
     }
     case bitc::METADATA_LEXICAL_BLOCK_FILE: {
       if (Record.size() != 4)
-        return Error("Invalid record");
+        return error("Invalid record");
 
-      MDValueList.AssignValue(
+      MDValueList.assignValue(
           GET_OR_DISTINCT(DILexicalBlockFile, Record[0],
                           (Context, getMDOrNull(Record[1]),
                            getMDOrNull(Record[2]), Record[3])),
@@ -1903,9 +1896,9 @@ std::error_code BitcodeReader::ParseMetadata() {
     }
     case bitc::METADATA_NAMESPACE: {
       if (Record.size() != 5)
-        return Error("Invalid record");
+        return error("Invalid record");
 
-      MDValueList.AssignValue(
+      MDValueList.assignValue(
           GET_OR_DISTINCT(DINamespace, Record[0],
                           (Context, getMDOrNull(Record[1]),
                            getMDOrNull(Record[2]), getMDString(Record[3]),
@@ -1915,9 +1908,9 @@ std::error_code BitcodeReader::ParseMetadata() {
     }
     case bitc::METADATA_TEMPLATE_TYPE: {
       if (Record.size() != 3)
-        return Error("Invalid record");
+        return error("Invalid record");
 
-      MDValueList.AssignValue(GET_OR_DISTINCT(DITemplateTypeParameter,
+      MDValueList.assignValue(GET_OR_DISTINCT(DITemplateTypeParameter,
                                               Record[0],
                                               (Context, getMDString(Record[1]),
                                                getMDOrNull(Record[2]))),
@@ -1926,9 +1919,9 @@ std::error_code BitcodeReader::ParseMetadata() {
     }
     case bitc::METADATA_TEMPLATE_VALUE: {
       if (Record.size() != 5)
-        return Error("Invalid record");
+        return error("Invalid record");
 
-      MDValueList.AssignValue(
+      MDValueList.assignValue(
           GET_OR_DISTINCT(DITemplateValueParameter, Record[0],
                           (Context, Record[1], getMDString(Record[2]),
                            getMDOrNull(Record[3]), getMDOrNull(Record[4]))),
@@ -1937,9 +1930,9 @@ std::error_code BitcodeReader::ParseMetadata() {
     }
     case bitc::METADATA_GLOBAL_VAR: {
       if (Record.size() != 11)
-        return Error("Invalid record");
+        return error("Invalid record");
 
-      MDValueList.AssignValue(
+      MDValueList.assignValue(
           GET_OR_DISTINCT(DIGlobalVariable, Record[0],
                           (Context, getMDOrNull(Record[1]),
                            getMDString(Record[2]), getMDString(Record[3]),
@@ -1952,9 +1945,9 @@ std::error_code BitcodeReader::ParseMetadata() {
     case bitc::METADATA_LOCAL_VAR: {
       // 10th field is for the obseleted 'inlinedAt:' field.
       if (Record.size() != 9 && Record.size() != 10)
-        return Error("Invalid record");
+        return error("Invalid record");
 
-      MDValueList.AssignValue(
+      MDValueList.assignValue(
           GET_OR_DISTINCT(DILocalVariable, Record[0],
                           (Context, Record[1], getMDOrNull(Record[2]),
                            getMDString(Record[3]), getMDOrNull(Record[4]),
@@ -1965,9 +1958,9 @@ std::error_code BitcodeReader::ParseMetadata() {
     }
     case bitc::METADATA_EXPRESSION: {
       if (Record.size() < 1)
-        return Error("Invalid record");
+        return error("Invalid record");
 
-      MDValueList.AssignValue(
+      MDValueList.assignValue(
           GET_OR_DISTINCT(DIExpression, Record[0],
                           (Context, makeArrayRef(Record).slice(1))),
           NextMDValueNo++);
@@ -1975,9 +1968,9 @@ std::error_code BitcodeReader::ParseMetadata() {
     }
     case bitc::METADATA_OBJC_PROPERTY: {
       if (Record.size() != 8)
-        return Error("Invalid record");
+        return error("Invalid record");
 
-      MDValueList.AssignValue(
+      MDValueList.assignValue(
           GET_OR_DISTINCT(DIObjCProperty, Record[0],
                           (Context, getMDString(Record[1]),
                            getMDOrNull(Record[2]), Record[3],
@@ -1988,9 +1981,9 @@ std::error_code BitcodeReader::ParseMetadata() {
     }
     case bitc::METADATA_IMPORTED_ENTITY: {
       if (Record.size() != 6)
-        return Error("Invalid record");
+        return error("Invalid record");
 
-      MDValueList.AssignValue(
+      MDValueList.assignValue(
           GET_OR_DISTINCT(DIImportedEntity, Record[0],
                           (Context, Record[1], getMDOrNull(Record[2]),
                            getMDOrNull(Record[3]), Record[4],
@@ -2002,19 +1995,19 @@ std::error_code BitcodeReader::ParseMetadata() {
       std::string String(Record.begin(), Record.end());
       llvm::UpgradeMDStringConstant(String);
       Metadata *MD = MDString::get(Context, String);
-      MDValueList.AssignValue(MD, NextMDValueNo++);
+      MDValueList.assignValue(MD, NextMDValueNo++);
       break;
     }
     case bitc::METADATA_KIND: {
       if (Record.size() < 2)
-        return Error("Invalid record");
+        return error("Invalid record");
 
       unsigned Kind = Record[0];
       SmallString<8> Name(Record.begin()+1, Record.end());
 
       unsigned NewKind = TheModule->getMDKindID(Name.str());
       if (!MDKindMap.insert(std::make_pair(Kind, NewKind)).second)
-        return Error("Conflicting METADATA_KIND records");
+        return error("Conflicting METADATA_KIND records");
       break;
     }
     }
@@ -2022,8 +2015,8 @@ std::error_code BitcodeReader::ParseMetadata() {
 #undef GET_OR_DISTINCT
 }
 
-/// decodeSignRotatedValue - Decode a signed value stored with the sign bit in
-/// the LSB for dense VBR encoding.
+/// Decode a signed value stored with the sign bit in the LSB for dense VBR
+/// encoding.
 uint64_t BitcodeReader::decodeSignRotatedValue(uint64_t V) {
   if ((V & 1) == 0)
     return V >> 1;
@@ -2033,18 +2026,19 @@ uint64_t BitcodeReader::decodeSignRotatedValue(uint64_t V) {
   return 1ULL << 63;
 }
 
-/// ResolveGlobalAndAliasInits - Resolve all of the initializers for global
-/// values and aliases that we can.
-std::error_code BitcodeReader::ResolveGlobalAndAliasInits() {
+/// Resolve all of the initializers for global values and aliases that we can.
+std::error_code BitcodeReader::resolveGlobalAndAliasInits() {
   std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInitWorklist;
   std::vector<std::pair<GlobalAlias*, unsigned> > AliasInitWorklist;
   std::vector<std::pair<Function*, unsigned> > FunctionPrefixWorklist;
   std::vector<std::pair<Function*, unsigned> > FunctionPrologueWorklist;
+  std::vector<std::pair<Function*, unsigned> > FunctionPersonalityFnWorklist;
 
   GlobalInitWorklist.swap(GlobalInits);
   AliasInitWorklist.swap(AliasInits);
   FunctionPrefixWorklist.swap(FunctionPrefixes);
   FunctionPrologueWorklist.swap(FunctionPrologues);
+  FunctionPersonalityFnWorklist.swap(FunctionPersonalityFns);
 
   while (!GlobalInitWorklist.empty()) {
     unsigned ValID = GlobalInitWorklist.back().second;
@@ -2055,7 +2049,7 @@ std::error_code BitcodeReader::ResolveGlobalAndAliasInits() {
       if (Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]))
         GlobalInitWorklist.back().first->setInitializer(C);
       else
-        return Error("Expected a constant");
+        return error("Expected a constant");
     }
     GlobalInitWorklist.pop_back();
   }
@@ -2067,10 +2061,10 @@ std::error_code BitcodeReader::ResolveGlobalAndAliasInits() {
     } else {
       Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]);
       if (!C)
-        return Error("Expected a constant");
+        return error("Expected a constant");
       GlobalAlias *Alias = AliasInitWorklist.back().first;
       if (C->getType() != Alias->getType())
-        return Error("Alias and aliasee types don't match");
+        return error("Alias and aliasee types don't match");
       Alias->setAliasee(C);
     }
     AliasInitWorklist.pop_back();
@@ -2084,7 +2078,7 @@ std::error_code BitcodeReader::ResolveGlobalAndAliasInits() {
       if (Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]))
         FunctionPrefixWorklist.back().first->setPrefixData(C);
       else
-        return Error("Expected a constant");
+        return error("Expected a constant");
     }
     FunctionPrefixWorklist.pop_back();
   }
@@ -2097,15 +2091,28 @@ std::error_code BitcodeReader::ResolveGlobalAndAliasInits() {
       if (Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]))
         FunctionPrologueWorklist.back().first->setPrologueData(C);
       else
-        return Error("Expected a constant");
+        return error("Expected a constant");
     }
     FunctionPrologueWorklist.pop_back();
   }
 
+  while (!FunctionPersonalityFnWorklist.empty()) {
+    unsigned ValID = FunctionPersonalityFnWorklist.back().second;
+    if (ValID >= ValueList.size()) {
+      FunctionPersonalityFns.push_back(FunctionPersonalityFnWorklist.back());
+    } else {
+      if (Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]))
+        FunctionPersonalityFnWorklist.back().first->setPersonalityFn(C);
+      else
+        return error("Expected a constant");
+    }
+    FunctionPersonalityFnWorklist.pop_back();
+  }
+
   return std::error_code();
 }
 
-static APInt ReadWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits) {
+static APInt readWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits) {
   SmallVector<uint64_t, 8> Words(Vals.size());
   std::transform(Vals.begin(), Vals.end(), Words.begin(),
                  BitcodeReader::decodeSignRotatedValue);
@@ -2113,9 +2120,9 @@ static APInt ReadWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits) {
   return APInt(TypeBits, Words);
 }
 
-std::error_code BitcodeReader::ParseConstants() {
+std::error_code BitcodeReader::parseConstants() {
   if (Stream.EnterSubBlock(bitc::CONSTANTS_BLOCK_ID))
-    return Error("Invalid record");
+    return error("Invalid record");
 
   SmallVector<uint64_t, 64> Record;
 
@@ -2128,14 +2135,14 @@ std::error_code BitcodeReader::ParseConstants() {
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
     case BitstreamEntry::Error:
-      return Error("Malformed block");
+      return error("Malformed block");
     case BitstreamEntry::EndBlock:
       if (NextCstNo != ValueList.size())
-        return Error("Invalid ronstant reference");
+        return error("Invalid ronstant reference");
 
       // Once all the constants have been read, go through and resolve forward
       // references.
-      ValueList.ResolveConstantForwardRefs();
+      ValueList.resolveConstantForwardRefs();
       return std::error_code();
     case BitstreamEntry::Record:
       // The interesting case.
@@ -2153,9 +2160,9 @@ std::error_code BitcodeReader::ParseConstants() {
       break;
     case bitc::CST_CODE_SETTYPE:   // SETTYPE: [typeid]
       if (Record.empty())
-        return Error("Invalid record");
+        return error("Invalid record");
       if (Record[0] >= TypeList.size() || !TypeList[Record[0]])
-        return Error("Invalid record");
+        return error("Invalid record");
       CurTy = TypeList[Record[0]];
       continue;  // Skip the ValueList manipulation.
     case bitc::CST_CODE_NULL:      // NULL
@@ -2163,22 +2170,22 @@ std::error_code BitcodeReader::ParseConstants() {
       break;
     case bitc::CST_CODE_INTEGER:   // INTEGER: [intval]
       if (!CurTy->isIntegerTy() || Record.empty())
-        return Error("Invalid record");
+        return error("Invalid record");
       V = ConstantInt::get(CurTy, decodeSignRotatedValue(Record[0]));
       break;
     case bitc::CST_CODE_WIDE_INTEGER: {// WIDE_INTEGER: [n x intval]
       if (!CurTy->isIntegerTy() || Record.empty())
-        return Error("Invalid record");
+        return error("Invalid record");
 
-      APInt VInt = ReadWideAPInt(Record,
-                                 cast<IntegerType>(CurTy)->getBitWidth());
+      APInt VInt =
+          readWideAPInt(Record, cast<IntegerType>(CurTy)->getBitWidth());
       V = ConstantInt::get(Context, VInt);
 
       break;
     }
     case bitc::CST_CODE_FLOAT: {    // FLOAT: [fpval]
       if (Record.empty())
-        return Error("Invalid record");
+        return error("Invalid record");
       if (CurTy->isHalfTy())
         V = ConstantFP::get(Context, APFloat(APFloat::IEEEhalf,
                                              APInt(16, (uint16_t)Record[0])));
@@ -2208,7 +2215,7 @@ std::error_code BitcodeReader::ParseConstants() {
 
     case bitc::CST_CODE_AGGREGATE: {// AGGREGATE: [n x value number]
       if (Record.empty())
-        return Error("Invalid record");
+        return error("Invalid record");
 
       unsigned Size = Record.size();
       SmallVector<Constant*, 16> Elts;
@@ -2236,7 +2243,7 @@ std::error_code BitcodeReader::ParseConstants() {
     case bitc::CST_CODE_STRING:    // STRING: [values]
     case bitc::CST_CODE_CSTRING: { // CSTRING: [values]
       if (Record.empty())
-        return Error("Invalid record");
+        return error("Invalid record");
 
       SmallString<16> Elts(Record.begin(), Record.end());
       V = ConstantDataArray::getString(Context, Elts,
@@ -2245,7 +2252,7 @@ std::error_code BitcodeReader::ParseConstants() {
     }
     case bitc::CST_CODE_DATA: {// DATA: [n x value]
       if (Record.empty())
-        return Error("Invalid record");
+        return error("Invalid record");
 
       Type *EltTy = cast<SequentialType>(CurTy)->getElementType();
       unsigned Size = Record.size();
@@ -2290,15 +2297,15 @@ std::error_code BitcodeReader::ParseConstants() {
         else
           V = ConstantDataArray::get(Context, Elts);
       } else {
-        return Error("Invalid type for value");
+        return error("Invalid type for value");
       }
       break;
     }
 
     case bitc::CST_CODE_CE_BINOP: {  // CE_BINOP: [opcode, opval, opval]
       if (Record.size() < 3)
-        return Error("Invalid record");
-      int Opc = GetDecodedBinaryOpcode(Record[0], CurTy);
+        return error("Invalid record");
+      int Opc = getDecodedBinaryOpcode(Record[0], CurTy);
       if (Opc < 0) {
         V = UndefValue::get(CurTy);  // Unknown binop.
       } else {
@@ -2328,14 +2335,14 @@ std::error_code BitcodeReader::ParseConstants() {
     }
     case bitc::CST_CODE_CE_CAST: {  // CE_CAST: [opcode, opty, opval]
       if (Record.size() < 3)
-        return Error("Invalid record");
-      int Opc = GetDecodedCastOpcode(Record[0]);
+        return error("Invalid record");
+      int Opc = getDecodedCastOpcode(Record[0]);
       if (Opc < 0) {
         V = UndefValue::get(CurTy);  // Unknown cast.
       } else {
         Type *OpTy = getTypeByID(Record[1]);
         if (!OpTy)
-          return Error("Invalid record");
+          return error("Invalid record");
         Constant *Op = ValueList.getConstantFwdRef(Record[2], OpTy);
         V = UpgradeBitCastExpr(Opc, Op, CurTy);
         if (!V) V = ConstantExpr::getCast(Opc, Op, CurTy);
@@ -2352,7 +2359,7 @@ std::error_code BitcodeReader::ParseConstants() {
       while (OpNum != Record.size()) {
         Type *ElTy = getTypeByID(Record[OpNum++]);
         if (!ElTy)
-          return Error("Invalid record");
+          return error("Invalid record");
         Elts.push_back(ValueList.getConstantFwdRef(Record[OpNum++], ElTy));
       }
 
@@ -2360,7 +2367,7 @@ std::error_code BitcodeReader::ParseConstants() {
           PointeeType !=
               cast<SequentialType>(Elts[0]->getType()->getScalarType())
                   ->getElementType())
-        return Error("Explicit gep operator type does not match pointee type "
+        return error("Explicit gep operator type does not match pointee type "
                      "of pointer operand");
 
       ArrayRef<Constant *> Indices(Elts.begin() + 1, Elts.end());
@@ -2371,7 +2378,7 @@ std::error_code BitcodeReader::ParseConstants() {
     }
     case bitc::CST_CODE_CE_SELECT: {  // CE_SELECT: [opval#, opval#, opval#]
       if (Record.size() < 3)
-        return Error("Invalid record");
+        return error("Invalid record");
 
       Type *SelectorTy = Type::getInt1Ty(Context);
 
@@ -2390,22 +2397,22 @@ std::error_code BitcodeReader::ParseConstants() {
     case bitc::CST_CODE_CE_EXTRACTELT
         : { // CE_EXTRACTELT: [opty, opval, opty, opval]
       if (Record.size() < 3)
-        return Error("Invalid record");
+        return error("Invalid record");
       VectorType *OpTy =
         dyn_cast_or_null<VectorType>(getTypeByID(Record[0]));
       if (!OpTy)
-        return Error("Invalid record");
+        return error("Invalid record");
       Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
       Constant *Op1 = nullptr;
       if (Record.size() == 4) {
         Type *IdxTy = getTypeByID(Record[2]);
         if (!IdxTy)
-          return Error("Invalid record");
+          return error("Invalid record");
         Op1 = ValueList.getConstantFwdRef(Record[3], IdxTy);
       } else // TODO: Remove with llvm 4.0
         Op1 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context));
       if (!Op1)
-        return Error("Invalid record");
+        return error("Invalid record");
       V = ConstantExpr::getExtractElement(Op0, Op1);
       break;
     }
@@ -2413,7 +2420,7 @@ std::error_code BitcodeReader::ParseConstants() {
         : { // CE_INSERTELT: [opval, opval, opty, opval]
       VectorType *OpTy = dyn_cast<VectorType>(CurTy);
       if (Record.size() < 3 || !OpTy)
-        return Error("Invalid record");
+        return error("Invalid record");
       Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy);
       Constant *Op1 = ValueList.getConstantFwdRef(Record[1],
                                                   OpTy->getElementType());
@@ -2421,19 +2428,19 @@ std::error_code BitcodeReader::ParseConstants() {
       if (Record.size() == 4) {
         Type *IdxTy = getTypeByID(Record[2]);
         if (!IdxTy)
-          return Error("Invalid record");
+          return error("Invalid record");
         Op2 = ValueList.getConstantFwdRef(Record[3], IdxTy);
       } else // TODO: Remove with llvm 4.0
         Op2 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context));
       if (!Op2)
-        return Error("Invalid record");
+        return error("Invalid record");
       V = ConstantExpr::getInsertElement(Op0, Op1, Op2);
       break;
     }
     case bitc::CST_CODE_CE_SHUFFLEVEC: { // CE_SHUFFLEVEC: [opval, opval, opval]
       VectorType *OpTy = dyn_cast<VectorType>(CurTy);
       if (Record.size() < 3 || !OpTy)
-        return Error("Invalid record");
+        return error("Invalid record");
       Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy);
       Constant *Op1 = ValueList.getConstantFwdRef(Record[1], OpTy);
       Type *ShufTy = VectorType::get(Type::getInt32Ty(Context),
@@ -2447,7 +2454,7 @@ std::error_code BitcodeReader::ParseConstants() {
       VectorType *OpTy =
         dyn_cast_or_null<VectorType>(getTypeByID(Record[0]));
       if (Record.size() < 4 || !RTy || !OpTy)
-        return Error("Invalid record");
+        return error("Invalid record");
       Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
       Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy);
       Type *ShufTy = VectorType::get(Type::getInt32Ty(Context),
@@ -2458,10 +2465,10 @@ std::error_code BitcodeReader::ParseConstants() {
     }
     case bitc::CST_CODE_CE_CMP: {     // CE_CMP: [opty, opval, opval, pred]
       if (Record.size() < 4)
-        return Error("Invalid record");
+        return error("Invalid record");
       Type *OpTy = getTypeByID(Record[0]);
       if (!OpTy)
-        return Error("Invalid record");
+        return error("Invalid record");
       Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
       Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy);
 
@@ -2475,16 +2482,16 @@ std::error_code BitcodeReader::ParseConstants() {
     // FIXME: Remove with the 4.0 release.
     case bitc::CST_CODE_INLINEASM_OLD: {
       if (Record.size() < 2)
-        return Error("Invalid record");
+        return error("Invalid record");
       std::string AsmStr, ConstrStr;
       bool HasSideEffects = Record[0] & 1;
       bool IsAlignStack = Record[0] >> 1;
       unsigned AsmStrSize = Record[1];
       if (2+AsmStrSize >= Record.size())
-        return Error("Invalid record");
+        return error("Invalid record");
       unsigned ConstStrSize = Record[2+AsmStrSize];
       if (3+AsmStrSize+ConstStrSize > Record.size())
-        return Error("Invalid record");
+        return error("Invalid record");
 
       for (unsigned i = 0; i != AsmStrSize; ++i)
         AsmStr += (char)Record[2+i];
@@ -2499,17 +2506,17 @@ std::error_code BitcodeReader::ParseConstants() {
     // inteldialect).
     case bitc::CST_CODE_INLINEASM: {
       if (Record.size() < 2)
-        return Error("Invalid record");
+        return error("Invalid record");
       std::string AsmStr, ConstrStr;
       bool HasSideEffects = Record[0] & 1;
       bool IsAlignStack = (Record[0] >> 1) & 1;
       unsigned AsmDialect = Record[0] >> 2;
       unsigned AsmStrSize = Record[1];
       if (2+AsmStrSize >= Record.size())
-        return Error("Invalid record");
+        return error("Invalid record");
       unsigned ConstStrSize = Record[2+AsmStrSize];
       if (3+AsmStrSize+ConstStrSize > Record.size())
-        return Error("Invalid record");
+        return error("Invalid record");
 
       for (unsigned i = 0; i != AsmStrSize; ++i)
         AsmStr += (char)Record[2+i];
@@ -2523,14 +2530,14 @@ std::error_code BitcodeReader::ParseConstants() {
     }
     case bitc::CST_CODE_BLOCKADDRESS:{
       if (Record.size() < 3)
-        return Error("Invalid record");
+        return error("Invalid record");
       Type *FnTy = getTypeByID(Record[0]);
       if (!FnTy)
-        return Error("Invalid record");
+        return error("Invalid record");
       Function *Fn =
         dyn_cast_or_null<Function>(ValueList.getConstantFwdRef(Record[1],FnTy));
       if (!Fn)
-        return Error("Invalid record");
+        return error("Invalid record");
 
       // Don't let Fn get dematerialized.
       BlockAddressesTaken.insert(Fn);
@@ -2541,12 +2548,12 @@ std::error_code BitcodeReader::ParseConstants() {
       unsigned BBID = Record[2];
       if (!BBID)
         // Invalid reference to entry block.
-        return Error("Invalid ID");
+        return error("Invalid ID");
       if (!Fn->empty()) {
         Function::iterator BBI = Fn->begin(), BBE = Fn->end();
         for (size_t I = 0, E = BBID; I != E; ++I) {
           if (BBI == BBE)
-            return Error("Invalid ID");
+            return error("Invalid ID");
           ++BBI;
         }
         BB = BBI;
@@ -2567,14 +2574,14 @@ std::error_code BitcodeReader::ParseConstants() {
     }
     }
 
-    ValueList.AssignValue(V, NextCstNo);
+    ValueList.assignValue(V, NextCstNo);
     ++NextCstNo;
   }
 }
 
-std::error_code BitcodeReader::ParseUseLists() {
+std::error_code BitcodeReader::parseUseLists() {
   if (Stream.EnterSubBlock(bitc::USELIST_BLOCK_ID))
-    return Error("Invalid record");
+    return error("Invalid record");
 
   // Read all the records.
   SmallVector<uint64_t, 64> Record;
@@ -2584,7 +2591,7 @@ std::error_code BitcodeReader::ParseUseLists() {
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
     case BitstreamEntry::Error:
-      return Error("Malformed block");
+      return error("Malformed block");
     case BitstreamEntry::EndBlock:
       return std::error_code();
     case BitstreamEntry::Record:
@@ -2605,7 +2612,7 @@ std::error_code BitcodeReader::ParseUseLists() {
       unsigned RecordLength = Record.size();
       if (RecordLength < 3)
         // Records should have at least an ID and two indexes.
-        return Error("Invalid record");
+        return error("Invalid record");
       unsigned ID = Record.back();
       Record.pop_back();
 
@@ -2645,7 +2652,7 @@ std::error_code BitcodeReader::rememberAndSkipMetadata() {
 
   // Skip over the block for now.
   if (Stream.SkipBlock())
-    return Error("Invalid record");
+    return error("Invalid record");
   return std::error_code();
 }
 
@@ -2653,7 +2660,7 @@ std::error_code BitcodeReader::materializeMetadata() {
   for (uint64_t BitPos : DeferredMetadataInfo) {
     // Move the bit stream to the saved position.
     Stream.JumpToBit(BitPos);
-    if (std::error_code EC = ParseMetadata())
+    if (std::error_code EC = parseMetadata())
       return EC;
   }
   DeferredMetadataInfo.clear();
@@ -2662,13 +2669,12 @@ std::error_code BitcodeReader::materializeMetadata() {
 
 void BitcodeReader::setStripDebugInfo() { StripDebugInfo = true; }
 
-/// RememberAndSkipFunctionBody - When we see the block for a function body,
-/// remember where it is and then skip it.  This lets us lazily deserialize the
-/// functions.
-std::error_code BitcodeReader::RememberAndSkipFunctionBody() {
+/// When we see the block for a function body, remember where it is and then
+/// skip it.  This lets us lazily deserialize the functions.
+std::error_code BitcodeReader::rememberAndSkipFunctionBody() {
   // Get the function we are talking about.
   if (FunctionsWithBodies.empty())
-    return Error("Insufficient function protos");
+    return error("Insufficient function protos");
 
   Function *Fn = FunctionsWithBodies.back();
   FunctionsWithBodies.pop_back();
@@ -2679,31 +2685,26 @@ std::error_code BitcodeReader::RememberAndSkipFunctionBody() {
 
   // Skip over the function block for now.
   if (Stream.SkipBlock())
-    return Error("Invalid record");
+    return error("Invalid record");
   return std::error_code();
 }
 
-std::error_code BitcodeReader::GlobalCleanup() {
+std::error_code BitcodeReader::globalCleanup() {
   // Patch the initializers for globals and aliases up.
-  ResolveGlobalAndAliasInits();
+  resolveGlobalAndAliasInits();
   if (!GlobalInits.empty() || !AliasInits.empty())
-    return Error("Malformed global initializer set");
+    return error("Malformed global initializer set");
 
   // Look for intrinsic functions which need to be upgraded at some point
-  for (Module::iterator FI = TheModule->begin(), FE = TheModule->end();
-       FI != FE; ++FI) {
+  for (Function &F : *TheModule) {
     Function *NewFn;
-    if (UpgradeIntrinsicFunction(FI, NewFn))
-      UpgradedIntrinsics.push_back(std::make_pair(FI, NewFn));
+    if (UpgradeIntrinsicFunction(&F, NewFn))
+      UpgradedIntrinsics.push_back(std::make_pair(&F, NewFn));
   }
 
   // Look for global variables which need to be renamed.
-  for (Module::global_iterator
-         GI = TheModule->global_begin(), GE = TheModule->global_end();
-       GI != GE;) {
-    GlobalVariable *GV = GI++;
-    UpgradeGlobalVariable(GV);
-  }
+  for (GlobalVariable &GV : TheModule->globals())
+    UpgradeGlobalVariable(&GV);
 
   // Force deallocation of memory for these vectors to favor the client that
   // want lazy deserialization.
@@ -2712,12 +2713,12 @@ std::error_code BitcodeReader::GlobalCleanup() {
   return std::error_code();
 }
 
-std::error_code BitcodeReader::ParseModule(bool Resume,
+std::error_code BitcodeReader::parseModule(bool Resume,
                                            bool ShouldLazyLoadMetadata) {
   if (Resume)
     Stream.JumpToBit(NextUnreadBit);
   else if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
-    return Error("Invalid record");
+    return error("Invalid record");
 
   SmallVector<uint64_t, 64> Record;
   std::vector<std::string> SectionTable;
@@ -2729,41 +2730,41 @@ std::error_code BitcodeReader::ParseModule(bool Resume,
 
     switch (Entry.Kind) {
     case BitstreamEntry::Error:
-      return Error("Malformed block");
+      return error("Malformed block");
     case BitstreamEntry::EndBlock:
-      return GlobalCleanup();
+      return globalCleanup();
 
     case BitstreamEntry::SubBlock:
       switch (Entry.ID) {
       default:  // Skip unknown content.
         if (Stream.SkipBlock())
-          return Error("Invalid record");
+          return error("Invalid record");
         break;
       case bitc::BLOCKINFO_BLOCK_ID:
         if (Stream.ReadBlockInfoBlock())
-          return Error("Malformed block");
+          return error("Malformed block");
         break;
       case bitc::PARAMATTR_BLOCK_ID:
-        if (std::error_code EC = ParseAttributeBlock())
+        if (std::error_code EC = parseAttributeBlock())
           return EC;
         break;
       case bitc::PARAMATTR_GROUP_BLOCK_ID:
-        if (std::error_code EC = ParseAttributeGroupBlock())
+        if (std::error_code EC = parseAttributeGroupBlock())
           return EC;
         break;
       case bitc::TYPE_BLOCK_ID_NEW:
-        if (std::error_code EC = ParseTypeTable())
+        if (std::error_code EC = parseTypeTable())
           return EC;
         break;
       case bitc::VALUE_SYMTAB_BLOCK_ID:
-        if (std::error_code EC = ParseValueSymbolTable())
+        if (std::error_code EC = parseValueSymbolTable())
           return EC;
         SeenValueSymbolTable = true;
         break;
       case bitc::CONSTANTS_BLOCK_ID:
-        if (std::error_code EC = ParseConstants())
+        if (std::error_code EC = parseConstants())
           return EC;
-        if (std::error_code EC = ResolveGlobalAndAliasInits())
+        if (std::error_code EC = resolveGlobalAndAliasInits())
           return EC;
         break;
       case bitc::METADATA_BLOCK_ID:
@@ -2773,7 +2774,7 @@ std::error_code BitcodeReader::ParseModule(bool Resume,
           break;
         }
         assert(DeferredMetadataInfo.empty() && "Unexpected deferred metadata");
-        if (std::error_code EC = ParseMetadata())
+        if (std::error_code EC = parseMetadata())
           return EC;
         break;
       case bitc::FUNCTION_BLOCK_ID:
@@ -2781,12 +2782,12 @@ std::error_code BitcodeReader::ParseModule(bool Resume,
         // FunctionsWithBodies list.
         if (!SeenFirstFunctionBody) {
           std::reverse(FunctionsWithBodies.begin(), FunctionsWithBodies.end());
-          if (std::error_code EC = GlobalCleanup())
+          if (std::error_code EC = globalCleanup())
             return EC;
           SeenFirstFunctionBody = true;
         }
 
-        if (std::error_code EC = RememberAndSkipFunctionBody())
+        if (std::error_code EC = rememberAndSkipFunctionBody())
           return EC;
         // For streaming bitcode, suspend parsing when we reach the function
         // bodies. Subsequent materialization calls will resume it when
@@ -2794,13 +2795,13 @@ std::error_code BitcodeReader::ParseModule(bool Resume,
         // the bitcode. If the bitcode file is old, the symbol table will be
         // at the end instead and will not have been seen yet. In this case,
         // just finish the parse now.
-        if (LazyStreamer && SeenValueSymbolTable) {
+        if (IsStreamed && SeenValueSymbolTable) {
           NextUnreadBit = Stream.GetCurrentBitNo();
           return std::error_code();
         }
         break;
       case bitc::USELIST_BLOCK_ID:
-        if (std::error_code EC = ParseUseLists())
+        if (std::error_code EC = parseUseLists())
           return EC;
         break;
       }
@@ -2817,12 +2818,12 @@ std::error_code BitcodeReader::ParseModule(bool Resume,
     default: break;  // Default behavior, ignore unknown content.
     case bitc::MODULE_CODE_VERSION: {  // VERSION: [version#]
       if (Record.size() < 1)
-        return Error("Invalid record");
+        return error("Invalid record");
       // Only version #0 and #1 are supported so far.
       unsigned module_version = Record[0];
       switch (module_version) {
         default:
-          return Error("Invalid value");
+          return error("Invalid value");
         case 0:
           UseRelativeIDs = false;
           break;
@@ -2834,50 +2835,50 @@ std::error_code BitcodeReader::ParseModule(bool Resume,
     }
     case bitc::MODULE_CODE_TRIPLE: {  // TRIPLE: [strchr x N]
       std::string S;
-      if (ConvertToString(Record, 0, S))
-        return Error("Invalid record");
+      if (convertToString(Record, 0, S))
+        return error("Invalid record");
       TheModule->setTargetTriple(S);
       break;
     }
     case bitc::MODULE_CODE_DATALAYOUT: {  // DATALAYOUT: [strchr x N]
       std::string S;
-      if (ConvertToString(Record, 0, S))
-        return Error("Invalid record");
+      if (convertToString(Record, 0, S))
+        return error("Invalid record");
       TheModule->setDataLayout(S);
       break;
     }
     case bitc::MODULE_CODE_ASM: {  // ASM: [strchr x N]
       std::string S;
-      if (ConvertToString(Record, 0, S))
-        return Error("Invalid record");
+      if (convertToString(Record, 0, S))
+        return error("Invalid record");
       TheModule->setModuleInlineAsm(S);
       break;
     }
     case bitc::MODULE_CODE_DEPLIB: {  // DEPLIB: [strchr x N]
       // FIXME: Remove in 4.0.
       std::string S;
-      if (ConvertToString(Record, 0, S))
-        return Error("Invalid record");
+      if (convertToString(Record, 0, S))
+        return error("Invalid record");
       // Ignore value.
       break;
     }
     case bitc::MODULE_CODE_SECTIONNAME: {  // SECTIONNAME: [strchr x N]
       std::string S;
-      if (ConvertToString(Record, 0, S))
-        return Error("Invalid record");
+      if (convertToString(Record, 0, S))
+        return error("Invalid record");
       SectionTable.push_back(S);
       break;
     }
     case bitc::MODULE_CODE_GCNAME: {  // SECTIONNAME: [strchr x N]
       std::string S;
-      if (ConvertToString(Record, 0, S))
-        return Error("Invalid record");
+      if (convertToString(Record, 0, S))
+        return error("Invalid record");
       GCTable.push_back(S);
       break;
     }
     case bitc::MODULE_CODE_COMDAT: { // COMDAT: [selection_kind, name]
       if (Record.size() < 2)
-        return Error("Invalid record");
+        return error("Invalid record");
       Comdat::SelectionKind SK = getDecodedComdatSelectionKind(Record[0]);
       unsigned ComdatNameSize = Record[1];
       std::string ComdatName;
@@ -2895,10 +2896,10 @@ std::error_code BitcodeReader::ParseModule(bool Resume,
     //             comdat]
     case bitc::MODULE_CODE_GLOBALVAR: {
       if (Record.size() < 6)
-        return Error("Invalid record");
+        return error("Invalid record");
       Type *Ty = getTypeByID(Record[0]);
       if (!Ty)
-        return Error("Invalid record");
+        return error("Invalid record");
       bool isConstant = Record[1] & 1;
       bool explicitType = Record[1] & 2;
       unsigned AddressSpace;
@@ -2906,7 +2907,7 @@ std::error_code BitcodeReader::ParseModule(bool Resume,
         AddressSpace = Record[1] >> 2;
       } else {
         if (!Ty->isPointerTy())
-          return Error("Invalid type for value");
+          return error("Invalid type for value");
         AddressSpace = cast<PointerType>(Ty)->getAddressSpace();
         Ty = cast<PointerType>(Ty)->getElementType();
       }
@@ -2919,18 +2920,18 @@ std::error_code BitcodeReader::ParseModule(bool Resume,
       std::string Section;
       if (Record[5]) {
         if (Record[5]-1 >= SectionTable.size())
-          return Error("Invalid ID");
+          return error("Invalid ID");
         Section = SectionTable[Record[5]-1];
       }
       GlobalValue::VisibilityTypes Visibility = GlobalValue::DefaultVisibility;
       // Local linkage must have default visibility.
       if (Record.size() > 6 && !GlobalValue::isLocalLinkage(Linkage))
         // FIXME: Change to an error if non-default in 4.0.
-        Visibility = GetDecodedVisibility(Record[6]);
+        Visibility = getDecodedVisibility(Record[6]);
 
       GlobalVariable::ThreadLocalMode TLM = GlobalVariable::NotThreadLocal;
       if (Record.size() > 7)
-        TLM = GetDecodedThreadLocalMode(Record[7]);
+        TLM = getDecodedThreadLocalMode(Record[7]);
 
       bool UnnamedAddr = false;
       if (Record.size() > 8)
@@ -2950,9 +2951,9 @@ std::error_code BitcodeReader::ParseModule(bool Resume,
       NewGV->setUnnamedAddr(UnnamedAddr);
 
       if (Record.size() > 10)
-        NewGV->setDLLStorageClass(GetDecodedDLLStorageClass(Record[10]));
+        NewGV->setDLLStorageClass(getDecodedDLLStorageClass(Record[10]));
       else
-        UpgradeDLLImportExportLinkage(NewGV, RawLinkage);
+        upgradeDLLImportExportLinkage(NewGV, RawLinkage);
 
       ValueList.push_back(NewGV);
 
@@ -2963,7 +2964,7 @@ std::error_code BitcodeReader::ParseModule(bool Resume,
       if (Record.size() > 11) {
         if (unsigned ComdatID = Record[11]) {
           if (ComdatID > ComdatList.size())
-            return Error("Invalid global variable comdat ID");
+            return error("Invalid global variable comdat ID");
           NewGV->setComdat(ComdatList[ComdatID - 1]);
         }
       } else if (hasImplicitComdat(RawLinkage)) {
@@ -2976,15 +2977,15 @@ std::error_code BitcodeReader::ParseModule(bool Resume,
     //             prologuedata, dllstorageclass, comdat, prefixdata]
     case bitc::MODULE_CODE_FUNCTION: {
       if (Record.size() < 8)
-        return Error("Invalid record");
+        return error("Invalid record");
       Type *Ty = getTypeByID(Record[0]);
       if (!Ty)
-        return Error("Invalid record");
+        return error("Invalid record");
       if (auto *PTy = dyn_cast<PointerType>(Ty))
         Ty = PTy->getElementType();
       auto *FTy = dyn_cast<FunctionType>(Ty);
       if (!FTy)
-        return Error("Invalid type for value");
+        return error("Invalid type for value");
 
       Function *Func = Function::Create(FTy, GlobalValue::ExternalLinkage,
                                         "", TheModule);
@@ -3001,16 +3002,16 @@ std::error_code BitcodeReader::ParseModule(bool Resume,
       Func->setAlignment(Alignment);
       if (Record[6]) {
         if (Record[6]-1 >= SectionTable.size())
-          return Error("Invalid ID");
+          return error("Invalid ID");
         Func->setSection(SectionTable[Record[6]-1]);
       }
       // Local linkage must have default visibility.
       if (!Func->hasLocalLinkage())
         // FIXME: Change to an error if non-default in 4.0.
-        Func->setVisibility(GetDecodedVisibility(Record[7]));
+        Func->setVisibility(getDecodedVisibility(Record[7]));
       if (Record.size() > 8 && Record[8]) {
         if (Record[8]-1 >= GCTable.size())
-          return Error("Invalid ID");
+          return error("Invalid ID");
         Func->setGC(GCTable[Record[8]-1].c_str());
       }
       bool UnnamedAddr = false;
@@ -3021,14 +3022,14 @@ std::error_code BitcodeReader::ParseModule(bool Resume,
         FunctionPrologues.push_back(std::make_pair(Func, Record[10]-1));
 
       if (Record.size() > 11)
-        Func->setDLLStorageClass(GetDecodedDLLStorageClass(Record[11]));
+        Func->setDLLStorageClass(getDecodedDLLStorageClass(Record[11]));
       else
-        UpgradeDLLImportExportLinkage(Func, RawLinkage);
+        upgradeDLLImportExportLinkage(Func, RawLinkage);
 
       if (Record.size() > 12) {
         if (unsigned ComdatID = Record[12]) {
           if (ComdatID > ComdatList.size())
-            return Error("Invalid function comdat ID");
+            return error("Invalid function comdat ID");
           Func->setComdat(ComdatList[ComdatID - 1]);
         }
       } else if (hasImplicitComdat(RawLinkage)) {
@@ -3038,6 +3039,9 @@ std::error_code BitcodeReader::ParseModule(bool Resume,
       if (Record.size() > 13 && Record[13] != 0)
         FunctionPrefixes.push_back(std::make_pair(Func, Record[13]-1));
 
+      if (Record.size() > 14 && Record[14] != 0)
+        FunctionPersonalityFns.push_back(std::make_pair(Func, Record[14] - 1));
+
       ValueList.push_back(Func);
 
       // If this is a function with a body, remember the prototype we are
@@ -3045,7 +3049,7 @@ std::error_code BitcodeReader::ParseModule(bool Resume,
       if (!isProto) {
         Func->setIsMaterializable(true);
         FunctionsWithBodies.push_back(Func);
-        if (LazyStreamer)
+        if (IsStreamed)
           DeferredFunctionInfo[Func] = 0;
       }
       break;
@@ -3054,13 +3058,13 @@ std::error_code BitcodeReader::ParseModule(bool Resume,
     // ALIAS: [alias type, aliasee val#, linkage, visibility, dllstorageclass]
     case bitc::MODULE_CODE_ALIAS: {
       if (Record.size() < 3)
-        return Error("Invalid record");
+        return error("Invalid record");
       Type *Ty = getTypeByID(Record[0]);
       if (!Ty)
-        return Error("Invalid record");
+        return error("Invalid record");
       auto *PTy = dyn_cast<PointerType>(Ty);
       if (!PTy)
-        return Error("Invalid type for value");
+        return error("Invalid type for value");
 
       auto *NewGA =
           GlobalAlias::create(PTy, getDecodedLinkage(Record[2]), "", TheModule);
@@ -3068,13 +3072,13 @@ std::error_code BitcodeReader::ParseModule(bool Resume,
       // Local linkage must have default visibility.
       if (Record.size() > 3 && !NewGA->hasLocalLinkage())
         // FIXME: Change to an error if non-default in 4.0.
-        NewGA->setVisibility(GetDecodedVisibility(Record[3]));
+        NewGA->setVisibility(getDecodedVisibility(Record[3]));
       if (Record.size() > 4)
-        NewGA->setDLLStorageClass(GetDecodedDLLStorageClass(Record[4]));
+        NewGA->setDLLStorageClass(getDecodedDLLStorageClass(Record[4]));
       else
-        UpgradeDLLImportExportLinkage(NewGA, Record[2]);
+        upgradeDLLImportExportLinkage(NewGA, Record[2]);
       if (Record.size() > 5)
-        NewGA->setThreadLocalMode(GetDecodedThreadLocalMode(Record[5]));
+        NewGA->setThreadLocalMode(getDecodedThreadLocalMode(Record[5]));
       if (Record.size() > 6)
         NewGA->setUnnamedAddr(Record[6]);
       ValueList.push_back(NewGA);
@@ -3085,7 +3089,7 @@ std::error_code BitcodeReader::ParseModule(bool Resume,
     case bitc::MODULE_CODE_PURGEVALS:
       // Trim down the value list to the specified size.
       if (Record.size() < 1 || Record[0] > ValueList.size())
-        return Error("Invalid record");
+        return error("Invalid record");
       ValueList.shrinkTo(Record[0]);
       break;
     }
@@ -3093,11 +3097,12 @@ std::error_code BitcodeReader::ParseModule(bool Resume,
   }
 }
 
-std::error_code BitcodeReader::ParseBitcodeInto(Module *M,
-                                                bool ShouldLazyLoadMetadata) {
-  TheModule = nullptr;
+std::error_code
+BitcodeReader::parseBitcodeInto(std::unique_ptr<DataStreamer> Streamer,
+                                Module *M, bool ShouldLazyLoadMetadata) {
+  TheModule = M;
 
-  if (std::error_code EC = InitStream())
+  if (std::error_code EC = initStream(std::move(Streamer)))
     return EC;
 
   // Sniff for the signature.
@@ -3107,68 +3112,33 @@ std::error_code BitcodeReader::ParseBitcodeInto(Module *M,
       Stream.Read(4) != 0xC ||
       Stream.Read(4) != 0xE ||
       Stream.Read(4) != 0xD)
-    return Error("Invalid bitcode signature");
+    return error("Invalid bitcode signature");
 
   // We expect a number of well-defined blocks, though we don't necessarily
   // need to understand them all.
   while (1) {
     if (Stream.AtEndOfStream()) {
-      if (TheModule)
-        return std::error_code();
       // We didn't really read a proper Module.
-      return Error("Malformed IR file");
+      return error("Malformed IR file");
     }
 
     BitstreamEntry Entry =
       Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs);
 
-    switch (Entry.Kind) {
-    case BitstreamEntry::Error:
-      return Error("Malformed block");
-    case BitstreamEntry::EndBlock:
-      return std::error_code();
+    if (Entry.Kind != BitstreamEntry::SubBlock)
+      return error("Malformed block");
 
-    case BitstreamEntry::SubBlock:
-      switch (Entry.ID) {
-      case bitc::BLOCKINFO_BLOCK_ID:
-        if (Stream.ReadBlockInfoBlock())
-          return Error("Malformed block");
-        break;
-      case bitc::MODULE_BLOCK_ID:
-        // Reject multiple MODULE_BLOCK's in a single bitstream.
-        if (TheModule)
-          return Error("Invalid multiple blocks");
-        TheModule = M;
-        if (std::error_code EC = ParseModule(false, ShouldLazyLoadMetadata))
-          return EC;
-        if (LazyStreamer)
-          return std::error_code();
-        break;
-      default:
-        if (Stream.SkipBlock())
-          return Error("Invalid record");
-        break;
-      }
-      continue;
-    case BitstreamEntry::Record:
-      // There should be no records in the top-level of blocks.
+    if (Entry.ID == bitc::MODULE_BLOCK_ID)
+      return parseModule(false, ShouldLazyLoadMetadata);
 
-      // The ranlib in Xcode 4 will align archive members by appending newlines
-      // to the end of them. If this file size is a multiple of 4 but not 8, we
-      // have to read and ignore these final 4 bytes :-(
-      if (Stream.getAbbrevIDWidth() == 2 && Entry.ID == 2 &&
-          Stream.Read(6) == 2 && Stream.Read(24) == 0xa0a0a &&
-          Stream.AtEndOfStream())
-        return std::error_code();
-
-      return Error("Invalid record");
-    }
+    if (Stream.SkipBlock())
+      return error("Invalid record");
   }
 }
 
 ErrorOr<std::string> BitcodeReader::parseModuleTriple() {
   if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
-    return Error("Invalid record");
+    return error("Invalid record");
 
   SmallVector<uint64_t, 64> Record;
 
@@ -3180,7 +3150,7 @@ ErrorOr<std::string> BitcodeReader::parseModuleTriple() {
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
     case BitstreamEntry::Error:
-      return Error("Malformed block");
+      return error("Malformed block");
     case BitstreamEntry::EndBlock:
       return Triple;
     case BitstreamEntry::Record:
@@ -3193,8 +3163,8 @@ ErrorOr<std::string> BitcodeReader::parseModuleTriple() {
     default: break;  // Default behavior, ignore unknown content.
     case bitc::MODULE_CODE_TRIPLE: {  // TRIPLE: [strchr x N]
       std::string S;
-      if (ConvertToString(Record, 0, S))
-        return Error("Invalid record");
+      if (convertToString(Record, 0, S))
+        return error("Invalid record");
       Triple = S;
       break;
     }
@@ -3205,7 +3175,7 @@ ErrorOr<std::string> BitcodeReader::parseModuleTriple() {
 }
 
 ErrorOr<std::string> BitcodeReader::parseTriple() {
-  if (std::error_code EC = InitStream())
+  if (std::error_code EC = initStream(nullptr))
     return EC;
 
   // Sniff for the signature.
@@ -3215,7 +3185,7 @@ ErrorOr<std::string> BitcodeReader::parseTriple() {
       Stream.Read(4) != 0xC ||
       Stream.Read(4) != 0xE ||
       Stream.Read(4) != 0xD)
-    return Error("Invalid bitcode signature");
+    return error("Invalid bitcode signature");
 
   // We expect a number of well-defined blocks, though we don't necessarily
   // need to understand them all.
@@ -3224,7 +3194,7 @@ ErrorOr<std::string> BitcodeReader::parseTriple() {
 
     switch (Entry.Kind) {
     case BitstreamEntry::Error:
-      return Error("Malformed block");
+      return error("Malformed block");
     case BitstreamEntry::EndBlock:
       return std::error_code();
 
@@ -3234,7 +3204,7 @@ ErrorOr<std::string> BitcodeReader::parseTriple() {
 
       // Ignore other sub-blocks.
       if (Stream.SkipBlock())
-        return Error("Malformed block");
+        return error("Malformed block");
       continue;
 
     case BitstreamEntry::Record:
@@ -3244,10 +3214,10 @@ ErrorOr<std::string> BitcodeReader::parseTriple() {
   }
 }
 
-/// ParseMetadataAttachment - Parse metadata attachments.
-std::error_code BitcodeReader::ParseMetadataAttachment(Function &F) {
+/// Parse metadata attachments.
+std::error_code BitcodeReader::parseMetadataAttachment(Function &F) {
   if (Stream.EnterSubBlock(bitc::METADATA_ATTACHMENT_ID))
-    return Error("Invalid record");
+    return error("Invalid record");
 
   SmallVector<uint64_t, 64> Record;
   while (1) {
@@ -3256,7 +3226,7 @@ std::error_code BitcodeReader::ParseMetadataAttachment(Function &F) {
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
     case BitstreamEntry::Error:
-      return Error("Malformed block");
+      return error("Malformed block");
     case BitstreamEntry::EndBlock:
       return std::error_code();
     case BitstreamEntry::Record:
@@ -3272,13 +3242,13 @@ std::error_code BitcodeReader::ParseMetadataAttachment(Function &F) {
     case bitc::METADATA_ATTACHMENT: {
       unsigned RecordLength = Record.size();
       if (Record.empty())
-        return Error("Invalid record");
+        return error("Invalid record");
       if (RecordLength % 2 == 0) {
         // A function attachment.
         for (unsigned I = 0; I != RecordLength; I += 2) {
           auto K = MDKindMap.find(Record[I]);
           if (K == MDKindMap.end())
-            return Error("Invalid ID");
+            return error("Invalid ID");
           Metadata *MD = MDValueList.getValueFwdRef(Record[I + 1]);
           F.setMetadata(K->second, cast<MDNode>(MD));
         }
@@ -3292,7 +3262,7 @@ std::error_code BitcodeReader::ParseMetadataAttachment(Function &F) {
         DenseMap<unsigned, unsigned>::iterator I =
           MDKindMap.find(Kind);
         if (I == MDKindMap.end())
-          return Error("Invalid ID");
+          return error("Invalid ID");
         Metadata *Node = MDValueList.getValueFwdRef(Record[i + 1]);
         if (isa<LocalAsMetadata>(Node))
           // Drop the attachment.  This used to be legal, but there's no
@@ -3308,24 +3278,24 @@ std::error_code BitcodeReader::ParseMetadataAttachment(Function &F) {
   }
 }
 
-static std::error_code TypeCheckLoadStoreInst(DiagnosticHandlerFunction DH,
+static std::error_code typeCheckLoadStoreInst(DiagnosticHandlerFunction DH,
                                               Type *ValType, Type *PtrType) {
   if (!isa<PointerType>(PtrType))
-    return Error(DH, "Load/Store operand is not a pointer type");
+    return error(DH, "Load/Store operand is not a pointer type");
   Type *ElemType = cast<PointerType>(PtrType)->getElementType();
 
   if (ValType && ValType != ElemType)
-    return Error(DH, "Explicit load/store type does not match pointee type of "
+    return error(DH, "Explicit load/store type does not match pointee type of "
                      "pointer operand");
   if (!PointerType::isLoadableOrStorableType(ElemType))
-    return Error(DH, "Cannot load/store from pointer");
+    return error(DH, "Cannot load/store from pointer");
   return std::error_code();
 }
 
-/// ParseFunctionBody - Lazily parse the specified function body block.
-std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
+/// Lazily parse the specified function body block.
+std::error_code BitcodeReader::parseFunctionBody(Function *F) {
   if (Stream.EnterSubBlock(bitc::FUNCTION_BLOCK_ID))
-    return Error("Invalid record");
+    return error("Invalid record");
 
   InstructionList.clear();
   unsigned ModuleValueListSize = ValueList.size();
@@ -3356,7 +3326,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
 
     switch (Entry.Kind) {
     case BitstreamEntry::Error:
-      return Error("Malformed block");
+      return error("Malformed block");
     case BitstreamEntry::EndBlock:
       goto OutOfRecordLoop;
 
@@ -3364,27 +3334,27 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
       switch (Entry.ID) {
       default:  // Skip unknown content.
         if (Stream.SkipBlock())
-          return Error("Invalid record");
+          return error("Invalid record");
         break;
       case bitc::CONSTANTS_BLOCK_ID:
-        if (std::error_code EC = ParseConstants())
+        if (std::error_code EC = parseConstants())
           return EC;
         NextValueNo = ValueList.size();
         break;
       case bitc::VALUE_SYMTAB_BLOCK_ID:
-        if (std::error_code EC = ParseValueSymbolTable())
+        if (std::error_code EC = parseValueSymbolTable())
           return EC;
         break;
       case bitc::METADATA_ATTACHMENT_ID:
-        if (std::error_code EC = ParseMetadataAttachment(*F))
+        if (std::error_code EC = parseMetadataAttachment(*F))
           return EC;
         break;
       case bitc::METADATA_BLOCK_ID:
-        if (std::error_code EC = ParseMetadata())
+        if (std::error_code EC = parseMetadata())
           return EC;
         break;
       case bitc::USELIST_BLOCK_ID:
-        if (std::error_code EC = ParseUseLists())
+        if (std::error_code EC = parseUseLists())
           return EC;
         break;
       }
@@ -3401,10 +3371,10 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
     unsigned BitCode = Stream.readRecord(Entry.ID, Record);
     switch (BitCode) {
     default: // Default behavior: reject
-      return Error("Invalid value");
+      return error("Invalid value");
     case bitc::FUNC_CODE_DECLAREBLOCKS: {   // DECLAREBLOCKS: [nblocks]
       if (Record.size() < 1 || Record[0] == 0)
-        return Error("Invalid record");
+        return error("Invalid record");
       // Create all the basic blocks for the function.
       FunctionBBs.resize(Record[0]);
 
@@ -3417,7 +3387,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
         auto &BBRefs = BBFRI->second;
         // Check for invalid basic block references.
         if (BBRefs.size() > FunctionBBs.size())
-          return Error("Invalid ID");
+          return error("Invalid ID");
         assert(!BBRefs.empty() && "Unexpected empty array");
         assert(!BBRefs.front() && "Invalid reference to entry block");
         for (unsigned I = 0, E = FunctionBBs.size(), RE = BBRefs.size(); I != E;
@@ -3443,7 +3413,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
       I = getLastInstruction();
 
       if (!I)
-        return Error("Invalid record");
+        return error("Invalid record");
       I->setDebugLoc(LastLoc);
       I = nullptr;
       continue;
@@ -3451,7 +3421,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
     case bitc::FUNC_CODE_DEBUG_LOC: {      // DEBUG_LOC: [line, col, scope, ia]
       I = getLastInstruction();
       if (!I || Record.size() < 4)
-        return Error("Invalid record");
+        return error("Invalid record");
 
       unsigned Line = Record[0], Col = Record[1];
       unsigned ScopeID = Record[2], IAID = Record[3];
@@ -3471,11 +3441,11 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
       if (getValueTypePair(Record, OpNum, NextValueNo, LHS) ||
           popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS) ||
           OpNum+1 > Record.size())
-        return Error("Invalid record");
+        return error("Invalid record");
 
-      int Opc = GetDecodedBinaryOpcode(Record[OpNum++], LHS->getType());
+      int Opc = getDecodedBinaryOpcode(Record[OpNum++], LHS->getType());
       if (Opc == -1)
-        return Error("Invalid record");
+        return error("Invalid record");
       I = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
       InstructionList.push_back(I);
       if (OpNum < Record.size()) {
@@ -3517,12 +3487,12 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
       Value *Op;
       if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
           OpNum+2 != Record.size())
-        return Error("Invalid record");
+        return error("Invalid record");
 
       Type *ResTy = getTypeByID(Record[OpNum]);
-      int Opc = GetDecodedCastOpcode(Record[OpNum+1]);
+      int Opc = getDecodedCastOpcode(Record[OpNum + 1]);
       if (Opc == -1 || !ResTy)
-        return Error("Invalid record");
+        return error("Invalid record");
       Instruction *Temp = nullptr;
       if ((I = UpgradeBitCastInst(Opc, Op, ResTy, Temp))) {
         if (Temp) {
@@ -3553,7 +3523,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
 
       Value *BasePtr;
       if (getValueTypePair(Record, OpNum, NextValueNo, BasePtr))
-        return Error("Invalid record");
+        return error("Invalid record");
 
       if (!Ty)
         Ty = cast<SequentialType>(BasePtr->getType()->getScalarType())
@@ -3561,14 +3531,14 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
       else if (Ty !=
                cast<SequentialType>(BasePtr->getType()->getScalarType())
                    ->getElementType())
-        return Error(
+        return error(
             "Explicit gep type does not match pointee type of pointer operand");
 
       SmallVector<Value*, 16> GEPIdx;
       while (OpNum != Record.size()) {
         Value *Op;
         if (getValueTypePair(Record, OpNum, NextValueNo, Op))
-          return Error("Invalid record");
+          return error("Invalid record");
         GEPIdx.push_back(Op);
       }
 
@@ -3585,11 +3555,11 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
       unsigned OpNum = 0;
       Value *Agg;
       if (getValueTypePair(Record, OpNum, NextValueNo, Agg))
-        return Error("Invalid record");
+        return error("Invalid record");
 
       unsigned RecSize = Record.size();
       if (OpNum == RecSize)
-        return Error("EXTRACTVAL: Invalid instruction with 0 indices");
+        return error("EXTRACTVAL: Invalid instruction with 0 indices");
 
       SmallVector<unsigned, 4> EXTRACTVALIdx;
       Type *CurTy = Agg->getType();
@@ -3599,13 +3569,13 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
         uint64_t Index = Record[OpNum];
 
         if (!IsStruct && !IsArray)
-          return Error("EXTRACTVAL: Invalid type");
+          return error("EXTRACTVAL: Invalid type");
         if ((unsigned)Index != Index)
-          return Error("Invalid value");
+          return error("Invalid value");
         if (IsStruct && Index >= CurTy->subtypes().size())
-          return Error("EXTRACTVAL: Invalid struct index");
+          return error("EXTRACTVAL: Invalid struct index");
         if (IsArray && Index >= CurTy->getArrayNumElements())
-          return Error("EXTRACTVAL: Invalid array index");
+          return error("EXTRACTVAL: Invalid array index");
         EXTRACTVALIdx.push_back((unsigned)Index);
 
         if (IsStruct)
@@ -3624,14 +3594,14 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
       unsigned OpNum = 0;
       Value *Agg;
       if (getValueTypePair(Record, OpNum, NextValueNo, Agg))
-        return Error("Invalid record");
+        return error("Invalid record");
       Value *Val;
       if (getValueTypePair(Record, OpNum, NextValueNo, Val))
-        return Error("Invalid record");
+        return error("Invalid record");
 
       unsigned RecSize = Record.size();
       if (OpNum == RecSize)
-        return Error("INSERTVAL: Invalid instruction with 0 indices");
+        return error("INSERTVAL: Invalid instruction with 0 indices");
 
       SmallVector<unsigned, 4> INSERTVALIdx;
       Type *CurTy = Agg->getType();
@@ -3641,13 +3611,13 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
         uint64_t Index = Record[OpNum];
 
         if (!IsStruct && !IsArray)
-          return Error("INSERTVAL: Invalid type");
+          return error("INSERTVAL: Invalid type");
         if ((unsigned)Index != Index)
-          return Error("Invalid value");
+          return error("Invalid value");
         if (IsStruct && Index >= CurTy->subtypes().size())
-          return Error("INSERTVAL: Invalid struct index");
+          return error("INSERTVAL: Invalid struct index");
         if (IsArray && Index >= CurTy->getArrayNumElements())
-          return Error("INSERTVAL: Invalid array index");
+          return error("INSERTVAL: Invalid array index");
 
         INSERTVALIdx.push_back((unsigned)Index);
         if (IsStruct)
@@ -3657,7 +3627,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
       }
 
       if (CurTy != Val->getType())
-        return Error("Inserted value type doesn't match aggregate type");
+        return error("Inserted value type doesn't match aggregate type");
 
       I = InsertValueInst::Create(Agg, Val, INSERTVALIdx);
       InstructionList.push_back(I);
@@ -3672,7 +3642,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
       if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) ||
           popValue(Record, OpNum, NextValueNo, TrueVal->getType(), FalseVal) ||
           popValue(Record, OpNum, NextValueNo, Type::getInt1Ty(Context), Cond))
-        return Error("Invalid record");
+        return error("Invalid record");
 
       I = SelectInst::Create(Cond, TrueVal, FalseVal);
       InstructionList.push_back(I);
@@ -3687,18 +3657,18 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
       if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) ||
           popValue(Record, OpNum, NextValueNo, TrueVal->getType(), FalseVal) ||
           getValueTypePair(Record, OpNum, NextValueNo, Cond))
-        return Error("Invalid record");
+        return error("Invalid record");
 
       // select condition can be either i1 or [N x i1]
       if (VectorType* vector_type =
           dyn_cast<VectorType>(Cond->getType())) {
         // expect <n x i1>
         if (vector_type->getElementType() != Type::getInt1Ty(Context))
-          return Error("Invalid type for value");
+          return error("Invalid type for value");
       } else {
         // expect i1
         if (Cond->getType() != Type::getInt1Ty(Context))
-          return Error("Invalid type for value");
+          return error("Invalid type for value");
       }
 
       I = SelectInst::Create(Cond, TrueVal, FalseVal);
@@ -3711,9 +3681,9 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
       Value *Vec, *Idx;
       if (getValueTypePair(Record, OpNum, NextValueNo, Vec) ||
           getValueTypePair(Record, OpNum, NextValueNo, Idx))
-        return Error("Invalid record");
+        return error("Invalid record");
       if (!Vec->getType()->isVectorTy())
-        return Error("Invalid type for value");
+        return error("Invalid type for value");
       I = ExtractElementInst::Create(Vec, Idx);
       InstructionList.push_back(I);
       break;
@@ -3723,13 +3693,13 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
       unsigned OpNum = 0;
       Value *Vec, *Elt, *Idx;
       if (getValueTypePair(Record, OpNum, NextValueNo, Vec))
-        return Error("Invalid record");
+        return error("Invalid record");
       if (!Vec->getType()->isVectorTy())
-        return Error("Invalid type for value");
+        return error("Invalid type for value");
       if (popValue(Record, OpNum, NextValueNo,
                    cast<VectorType>(Vec->getType())->getElementType(), Elt) ||
           getValueTypePair(Record, OpNum, NextValueNo, Idx))
-        return Error("Invalid record");
+        return error("Invalid record");
       I = InsertElementInst::Create(Vec, Elt, Idx);
       InstructionList.push_back(I);
       break;
@@ -3740,12 +3710,12 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
       Value *Vec1, *Vec2, *Mask;
       if (getValueTypePair(Record, OpNum, NextValueNo, Vec1) ||
           popValue(Record, OpNum, NextValueNo, Vec1->getType(), Vec2))
-        return Error("Invalid record");
+        return error("Invalid record");
 
       if (getValueTypePair(Record, OpNum, NextValueNo, Mask))
-        return Error("Invalid record");
+        return error("Invalid record");
       if (!Vec1->getType()->isVectorTy() || !Vec2->getType()->isVectorTy())
-        return Error("Invalid type for value");
+        return error("Invalid type for value");
       I = new ShuffleVectorInst(Vec1, Vec2, Mask);
       InstructionList.push_back(I);
       break;
@@ -3763,7 +3733,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
       if (getValueTypePair(Record, OpNum, NextValueNo, LHS) ||
           popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS) ||
           OpNum+1 != Record.size())
-        return Error("Invalid record");
+        return error("Invalid record");
 
       if (LHS->getType()->isFPOrFPVectorTy())
         I = new FCmpInst((FCmpInst::Predicate)Record[OpNum], LHS, RHS);
@@ -3785,9 +3755,9 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
         unsigned OpNum = 0;
         Value *Op = nullptr;
         if (getValueTypePair(Record, OpNum, NextValueNo, Op))
-          return Error("Invalid record");
+          return error("Invalid record");
         if (OpNum != Record.size())
-          return Error("Invalid record");
+          return error("Invalid record");
 
         I = ReturnInst::Create(Context, Op);
         InstructionList.push_back(I);
@@ -3795,10 +3765,10 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
       }
     case bitc::FUNC_CODE_INST_BR: { // BR: [bb#, bb#, opval] or [bb#]
       if (Record.size() != 1 && Record.size() != 3)
-        return Error("Invalid record");
+        return error("Invalid record");
       BasicBlock *TrueDest = getBasicBlock(Record[0]);
       if (!TrueDest)
-        return Error("Invalid record");
+        return error("Invalid record");
 
       if (Record.size() == 1) {
         I = BranchInst::Create(TrueDest);
@@ -3809,7 +3779,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
         Value *Cond = getValue(Record, 2, NextValueNo,
                                Type::getInt1Ty(Context));
         if (!FalseDest || !Cond)
-          return Error("Invalid record");
+          return error("Invalid record");
         I = BranchInst::Create(TrueDest, FalseDest, Cond);
         InstructionList.push_back(I);
       }
@@ -3829,7 +3799,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
         Value *Cond = getValue(Record, 2, NextValueNo, OpTy);
         BasicBlock *Default = getBasicBlock(Record[3]);
         if (!OpTy || !Cond || !Default)
-          return Error("Invalid record");
+          return error("Invalid record");
 
         unsigned NumCases = Record[4];
 
@@ -3847,7 +3817,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
             unsigned ActiveWords = 1;
             if (ValueBitWidth > 64)
               ActiveWords = Record[CurIdx++];
-            Low = ReadWideAPInt(makeArrayRef(&Record[CurIdx], ActiveWords),
+            Low = readWideAPInt(makeArrayRef(&Record[CurIdx], ActiveWords),
                                 ValueBitWidth);
             CurIdx += ActiveWords;
 
@@ -3855,9 +3825,8 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
               ActiveWords = 1;
               if (ValueBitWidth > 64)
                 ActiveWords = Record[CurIdx++];
-              APInt High =
-                  ReadWideAPInt(makeArrayRef(&Record[CurIdx], ActiveWords),
-                                ValueBitWidth);
+              APInt High = readWideAPInt(
+                  makeArrayRef(&Record[CurIdx], ActiveWords), ValueBitWidth);
               CurIdx += ActiveWords;
 
               // FIXME: It is not clear whether values in the range should be
@@ -3881,12 +3850,12 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
       // Old SwitchInst format without case ranges.
 
       if (Record.size() < 3 || (Record.size() & 1) == 0)
-        return Error("Invalid record");
+        return error("Invalid record");
       Type *OpTy = getTypeByID(Record[0]);
       Value *Cond = getValue(Record, 1, NextValueNo, OpTy);
       BasicBlock *Default = getBasicBlock(Record[2]);
       if (!OpTy || !Cond || !Default)
-        return Error("Invalid record");
+        return error("Invalid record");
       unsigned NumCases = (Record.size()-3)/2;
       SwitchInst *SI = SwitchInst::Create(Cond, Default, NumCases);
       InstructionList.push_back(SI);
@@ -3896,7 +3865,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
         BasicBlock *DestBB = getBasicBlock(Record[1+3+i*2]);
         if (!CaseVal || !DestBB) {
           delete SI;
-          return Error("Invalid record");
+          return error("Invalid record");
         }
         SI->addCase(CaseVal, DestBB);
       }
@@ -3905,11 +3874,11 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
     }
     case bitc::FUNC_CODE_INST_INDIRECTBR: { // INDIRECTBR: [opty, op0, op1, ...]
       if (Record.size() < 2)
-        return Error("Invalid record");
+        return error("Invalid record");
       Type *OpTy = getTypeByID(Record[0]);
       Value *Address = getValue(Record, 1, NextValueNo, OpTy);
       if (!OpTy || !Address)
-        return Error("Invalid record");
+        return error("Invalid record");
       unsigned NumDests = Record.size()-2;
       IndirectBrInst *IBI = IndirectBrInst::Create(Address, NumDests);
       InstructionList.push_back(IBI);
@@ -3918,7 +3887,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
           IBI->addDestination(DestBB);
         } else {
           delete IBI;
-          return Error("Invalid record");
+          return error("Invalid record");
         }
       }
       I = IBI;
@@ -3928,7 +3897,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
     case bitc::FUNC_CODE_INST_INVOKE: {
       // INVOKE: [attrs, cc, normBB, unwindBB, fnty, op0,op1,op2, ...]
       if (Record.size() < 4)
-        return Error("Invalid record");
+        return error("Invalid record");
       unsigned OpNum = 0;
       AttributeSet PAL = getAttributes(Record[OpNum++]);
       unsigned CCInfo = Record[OpNum++];
@@ -3938,42 +3907,42 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
       FunctionType *FTy = nullptr;
       if (CCInfo >> 13 & 1 &&
           !(FTy = dyn_cast<FunctionType>(getTypeByID(Record[OpNum++]))))
-        return Error("Explicit invoke type is not a function type");
+        return error("Explicit invoke type is not a function type");
 
       Value *Callee;
       if (getValueTypePair(Record, OpNum, NextValueNo, Callee))
-        return Error("Invalid record");
+        return error("Invalid record");
 
       PointerType *CalleeTy = dyn_cast<PointerType>(Callee->getType());
       if (!CalleeTy)
-        return Error("Callee is not a pointer");
+        return error("Callee is not a pointer");
       if (!FTy) {
         FTy = dyn_cast<FunctionType>(CalleeTy->getElementType());
         if (!FTy)
-          return Error("Callee is not of pointer to function type");
+          return error("Callee is not of pointer to function type");
       } else if (CalleeTy->getElementType() != FTy)
-        return Error("Explicit invoke type does not match pointee type of "
+        return error("Explicit invoke type does not match pointee type of "
                      "callee operand");
       if (Record.size() < FTy->getNumParams() + OpNum)
-        return Error("Insufficient operands to call");
+        return error("Insufficient operands to call");
 
       SmallVector<Value*, 16> Ops;
       for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) {
         Ops.push_back(getValue(Record, OpNum, NextValueNo,
                                FTy->getParamType(i)));
         if (!Ops.back())
-          return Error("Invalid record");
+          return error("Invalid record");
       }
 
       if (!FTy->isVarArg()) {
         if (Record.size() != OpNum)
-          return Error("Invalid record");
+          return error("Invalid record");
       } else {
         // Read type/value pairs for varargs params.
         while (OpNum != Record.size()) {
           Value *Op;
           if (getValueTypePair(Record, OpNum, NextValueNo, Op))
-            return Error("Invalid record");
+            return error("Invalid record");
           Ops.push_back(Op);
         }
       }
@@ -3989,7 +3958,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
       unsigned Idx = 0;
       Value *Val = nullptr;
       if (getValueTypePair(Record, Idx, NextValueNo, Val))
-        return Error("Invalid record");
+        return error("Invalid record");
       I = ResumeInst::Create(Val);
       InstructionList.push_back(I);
       break;
@@ -4000,10 +3969,10 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
       break;
     case bitc::FUNC_CODE_INST_PHI: { // PHI: [ty, val0,bb0, ...]
       if (Record.size() < 1 || ((Record.size()-1)&1))
-        return Error("Invalid record");
+        return error("Invalid record");
       Type *Ty = getTypeByID(Record[0]);
       if (!Ty)
-        return Error("Invalid record");
+        return error("Invalid record");
 
       PHINode *PN = PHINode::Create(Ty, (Record.size()-1)/2);
       InstructionList.push_back(PN);
@@ -4019,28 +3988,42 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
           V = getValue(Record, 1+i, NextValueNo, Ty);
         BasicBlock *BB = getBasicBlock(Record[2+i]);
         if (!V || !BB)
-          return Error("Invalid record");
+          return error("Invalid record");
         PN->addIncoming(V, BB);
       }
       I = PN;
       break;
     }
 
-    case bitc::FUNC_CODE_INST_LANDINGPAD: {
+    case bitc::FUNC_CODE_INST_LANDINGPAD:
+    case bitc::FUNC_CODE_INST_LANDINGPAD_OLD: {
       // LANDINGPAD: [ty, val, val, num, (id0,val0 ...)?]
       unsigned Idx = 0;
-      if (Record.size() < 4)
-        return Error("Invalid record");
+      if (BitCode == bitc::FUNC_CODE_INST_LANDINGPAD) {
+        if (Record.size() < 3)
+          return error("Invalid record");
+      } else {
+        assert(BitCode == bitc::FUNC_CODE_INST_LANDINGPAD_OLD);
+        if (Record.size() < 4)
+          return error("Invalid record");
+      }
       Type *Ty = getTypeByID(Record[Idx++]);
       if (!Ty)
-        return Error("Invalid record");
-      Value *PersFn = nullptr;
-      if (getValueTypePair(Record, Idx, NextValueNo, PersFn))
-        return Error("Invalid record");
+        return error("Invalid record");
+      if (BitCode == bitc::FUNC_CODE_INST_LANDINGPAD_OLD) {
+        Value *PersFn = nullptr;
+        if (getValueTypePair(Record, Idx, NextValueNo, PersFn))
+          return error("Invalid record");
+
+        if (!F->hasPersonalityFn())
+          F->setPersonalityFn(cast<Constant>(PersFn));
+        else if (F->getPersonalityFn() != cast<Constant>(PersFn))
+          return error("Personality function mismatch");
+      }
 
       bool IsCleanup = !!Record[Idx++];
       unsigned NumClauses = Record[Idx++];
-      LandingPadInst *LP = LandingPadInst::Create(Ty, PersFn, NumClauses);
+      LandingPadInst *LP = LandingPadInst::Create(Ty, NumClauses);
       LP->setCleanup(IsCleanup);
       for (unsigned J = 0; J != NumClauses; ++J) {
         LandingPadInst::ClauseType CT =
@@ -4049,7 +4032,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
 
         if (getValueTypePair(Record, Idx, NextValueNo, Val)) {
           delete LP;
-          return Error("Invalid record");
+          return error("Invalid record");
         }
 
         assert((CT != LandingPadInst::Catch ||
@@ -4068,7 +4051,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
 
     case bitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [instty, opty, op, align]
       if (Record.size() != 4)
-        return Error("Invalid record");
+        return error("Invalid record");
       uint64_t AlignRecord = Record[3];
       const uint64_t InAllocaMask = uint64_t(1) << 5;
       const uint64_t ExplicitTypeMask = uint64_t(1) << 6;
@@ -4078,7 +4061,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
       if ((AlignRecord & ExplicitTypeMask) == 0) {
         auto *PTy = dyn_cast_or_null<PointerType>(Ty);
         if (!PTy)
-          return Error("Old-style alloca with a non-pointer type");
+          return error("Old-style alloca with a non-pointer type");
         Ty = PTy->getElementType();
       }
       Type *OpTy = getTypeByID(Record[1]);
@@ -4089,7 +4072,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
         return EC;
       }
       if (!Ty || !Size)
-        return Error("Invalid record");
+        return error("Invalid record");
       AllocaInst *AI = new AllocaInst(Ty, Size, Align);
       AI->setUsedWithInAlloca(InAlloca);
       I = AI;
@@ -4101,13 +4084,13 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
       Value *Op;
       if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
           (OpNum + 2 != Record.size() && OpNum + 3 != Record.size()))
-        return Error("Invalid record");
+        return error("Invalid record");
 
       Type *Ty = nullptr;
       if (OpNum + 3 == Record.size())
         Ty = getTypeByID(Record[OpNum++]);
       if (std::error_code EC =
-              TypeCheckLoadStoreInst(DiagnosticHandler, Ty, Op->getType()))
+              typeCheckLoadStoreInst(DiagnosticHandler, Ty, Op->getType()))
         return EC;
       if (!Ty)
         Ty = cast<PointerType>(Op->getType())->getElementType();
@@ -4126,24 +4109,24 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
       Value *Op;
       if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
           (OpNum + 4 != Record.size() && OpNum + 5 != Record.size()))
-        return Error("Invalid record");
+        return error("Invalid record");
 
       Type *Ty = nullptr;
       if (OpNum + 5 == Record.size())
         Ty = getTypeByID(Record[OpNum++]);
       if (std::error_code EC =
-              TypeCheckLoadStoreInst(DiagnosticHandler, Ty, Op->getType()))
+              typeCheckLoadStoreInst(DiagnosticHandler, Ty, Op->getType()))
         return EC;
       if (!Ty)
         Ty = cast<PointerType>(Op->getType())->getElementType();
 
-      AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+2]);
+      AtomicOrdering Ordering = getDecodedOrdering(Record[OpNum + 2]);
       if (Ordering == NotAtomic || Ordering == Release ||
           Ordering == AcquireRelease)
-        return Error("Invalid record");
+        return error("Invalid record");
       if (Ordering != NotAtomic && Record[OpNum] == 0)
-        return Error("Invalid record");
-      SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+3]);
+        return error("Invalid record");
+      SynchronizationScope SynchScope = getDecodedSynchScope(Record[OpNum + 3]);
 
       unsigned Align;
       if (std::error_code EC = parseAlignmentValue(Record[OpNum], Align))
@@ -4164,9 +4147,9 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
                           cast<PointerType>(Ptr->getType())->getElementType(),
                           Val)) ||
           OpNum + 2 != Record.size())
-        return Error("Invalid record");
+        return error("Invalid record");
 
-      if (std::error_code EC = TypeCheckLoadStoreInst(
+      if (std::error_code EC = typeCheckLoadStoreInst(
               DiagnosticHandler, Val->getType(), Ptr->getType()))
         return EC;
       unsigned Align;
@@ -4188,18 +4171,18 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
                           cast<PointerType>(Ptr->getType())->getElementType(),
                           Val)) ||
           OpNum + 4 != Record.size())
-        return Error("Invalid record");
+        return error("Invalid record");
 
-      if (std::error_code EC = TypeCheckLoadStoreInst(
+      if (std::error_code EC = typeCheckLoadStoreInst(
               DiagnosticHandler, Val->getType(), Ptr->getType()))
         return EC;
-      AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+2]);
+      AtomicOrdering Ordering = getDecodedOrdering(Record[OpNum + 2]);
       if (Ordering == NotAtomic || Ordering == Acquire ||
           Ordering == AcquireRelease)
-        return Error("Invalid record");
-      SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+3]);
+        return error("Invalid record");
+      SynchronizationScope SynchScope = getDecodedSynchScope(Record[OpNum + 3]);
       if (Ordering != NotAtomic && Record[OpNum] == 0)
-        return Error("Invalid record");
+        return error("Invalid record");
 
       unsigned Align;
       if (std::error_code EC = parseAlignmentValue(Record[OpNum], Align))
@@ -4222,13 +4205,13 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
                           Cmp)) ||
           popValue(Record, OpNum, NextValueNo, Cmp->getType(), New) ||
           Record.size() < OpNum + 3 || Record.size() > OpNum + 5)
-        return Error("Invalid record");
-      AtomicOrdering SuccessOrdering = GetDecodedOrdering(Record[OpNum+1]);
+        return error("Invalid record");
+      AtomicOrdering SuccessOrdering = getDecodedOrdering(Record[OpNum + 1]);
       if (SuccessOrdering == NotAtomic || SuccessOrdering == Unordered)
-        return Error("Invalid record");
-      SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+2]);
+        return error("Invalid record");
+      SynchronizationScope SynchScope = getDecodedSynchScope(Record[OpNum + 2]);
 
-      if (std::error_code EC = TypeCheckLoadStoreInst(
+      if (std::error_code EC = typeCheckLoadStoreInst(
               DiagnosticHandler, Cmp->getType(), Ptr->getType()))
         return EC;
       AtomicOrdering FailureOrdering;
@@ -4236,7 +4219,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
         FailureOrdering =
             AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrdering);
       else
-        FailureOrdering = GetDecodedOrdering(Record[OpNum+3]);
+        FailureOrdering = getDecodedOrdering(Record[OpNum + 3]);
 
       I = new AtomicCmpXchgInst(Ptr, Cmp, New, SuccessOrdering, FailureOrdering,
                                 SynchScope);
@@ -4263,15 +4246,15 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
           popValue(Record, OpNum, NextValueNo,
                     cast<PointerType>(Ptr->getType())->getElementType(), Val) ||
           OpNum+4 != Record.size())
-        return Error("Invalid record");
-      AtomicRMWInst::BinOp Operation = GetDecodedRMWOperation(Record[OpNum]);
+        return error("Invalid record");
+      AtomicRMWInst::BinOp Operation = getDecodedRMWOperation(Record[OpNum]);
       if (Operation < AtomicRMWInst::FIRST_BINOP ||
           Operation > AtomicRMWInst::LAST_BINOP)
-        return Error("Invalid record");
-      AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+2]);
+        return error("Invalid record");
+      AtomicOrdering Ordering = getDecodedOrdering(Record[OpNum + 2]);
       if (Ordering == NotAtomic || Ordering == Unordered)
-        return Error("Invalid record");
-      SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+3]);
+        return error("Invalid record");
+      SynchronizationScope SynchScope = getDecodedSynchScope(Record[OpNum + 3]);
       I = new AtomicRMWInst(Operation, Ptr, Val, Ordering, SynchScope);
       cast<AtomicRMWInst>(I)->setVolatile(Record[OpNum+1]);
       InstructionList.push_back(I);
@@ -4279,12 +4262,12 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
     }
     case bitc::FUNC_CODE_INST_FENCE: { // FENCE:[ordering, synchscope]
       if (2 != Record.size())
-        return Error("Invalid record");
-      AtomicOrdering Ordering = GetDecodedOrdering(Record[0]);
+        return error("Invalid record");
+      AtomicOrdering Ordering = getDecodedOrdering(Record[0]);
       if (Ordering == NotAtomic || Ordering == Unordered ||
           Ordering == Monotonic)
-        return Error("Invalid record");
-      SynchronizationScope SynchScope = GetDecodedSynchScope(Record[1]);
+        return error("Invalid record");
+      SynchronizationScope SynchScope = getDecodedSynchScope(Record[1]);
       I = new FenceInst(Context, Ordering, SynchScope);
       InstructionList.push_back(I);
       break;
@@ -4292,7 +4275,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
     case bitc::FUNC_CODE_INST_CALL: {
       // CALL: [paramattrs, cc, fnty, fnid, arg0, arg1...]
       if (Record.size() < 3)
-        return Error("Invalid record");
+        return error("Invalid record");
 
       unsigned OpNum = 0;
       AttributeSet PAL = getAttributes(Record[OpNum++]);
@@ -4301,24 +4284,24 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
       FunctionType *FTy = nullptr;
       if (CCInfo >> 15 & 1 &&
           !(FTy = dyn_cast<FunctionType>(getTypeByID(Record[OpNum++]))))
-        return Error("Explicit call type is not a function type");
+        return error("Explicit call type is not a function type");
 
       Value *Callee;
       if (getValueTypePair(Record, OpNum, NextValueNo, Callee))
-        return Error("Invalid record");
+        return error("Invalid record");
 
       PointerType *OpTy = dyn_cast<PointerType>(Callee->getType());
       if (!OpTy)
-        return Error("Callee is not a pointer type");
+        return error("Callee is not a pointer type");
       if (!FTy) {
         FTy = dyn_cast<FunctionType>(OpTy->getElementType());
         if (!FTy)
-          return Error("Callee is not of pointer to function type");
+          return error("Callee is not of pointer to function type");
       } else if (OpTy->getElementType() != FTy)
-        return Error("Explicit call type does not match pointee type of "
+        return error("Explicit call type does not match pointee type of "
                      "callee operand");
       if (Record.size() < FTy->getNumParams() + OpNum)
-        return Error("Insufficient operands to call");
+        return error("Insufficient operands to call");
 
       SmallVector<Value*, 16> Args;
       // Read the fixed params.
@@ -4329,18 +4312,18 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
           Args.push_back(getValue(Record, OpNum, NextValueNo,
                                   FTy->getParamType(i)));
         if (!Args.back())
-          return Error("Invalid record");
+          return error("Invalid record");
       }
 
       // Read type/value pairs for varargs params.
       if (!FTy->isVarArg()) {
         if (OpNum != Record.size())
-          return Error("Invalid record");
+          return error("Invalid record");
       } else {
         while (OpNum != Record.size()) {
           Value *Op;
           if (getValueTypePair(Record, OpNum, NextValueNo, Op))
-            return Error("Invalid record");
+            return error("Invalid record");
           Args.push_back(Op);
         }
       }
@@ -4360,12 +4343,12 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
     }
     case bitc::FUNC_CODE_INST_VAARG: { // VAARG: [valistty, valist, instty]
       if (Record.size() < 3)
-        return Error("Invalid record");
+        return error("Invalid record");
       Type *OpTy = getTypeByID(Record[0]);
       Value *Op = getValue(Record, 1, NextValueNo, OpTy);
       Type *ResTy = getTypeByID(Record[2]);
       if (!OpTy || !Op || !ResTy)
-        return Error("Invalid record");
+        return error("Invalid record");
       I = new VAArgInst(Op, ResTy);
       InstructionList.push_back(I);
       break;
@@ -4376,7 +4359,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
     // this file.
     if (!CurBB) {
       delete I;
-      return Error("Invalid instruction with no BB");
+      return error("Invalid instruction with no BB");
     }
     CurBB->getInstList().push_back(I);
 
@@ -4388,7 +4371,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
 
     // Non-void values get registered in the value table for future use.
     if (I && !I->getType()->isVoidTy())
-      ValueList.AssignValue(I, NextValueNo++);
+      ValueList.assignValue(I, NextValueNo++);
   }
 
 OutOfRecordLoop:
@@ -4403,7 +4386,7 @@ OutOfRecordLoop:
           delete A;
         }
       }
-      return Error("Never resolved value found in function");
+      return error("Never resolved value found in function");
     }
   }
 
@@ -4418,15 +4401,15 @@ OutOfRecordLoop:
 }
 
 /// Find the function body in the bitcode stream
-std::error_code BitcodeReader::FindFunctionInStream(
+std::error_code BitcodeReader::findFunctionInStream(
     Function *F,
     DenseMap<Function *, uint64_t>::iterator DeferredFunctionInfoIterator) {
   while (DeferredFunctionInfoIterator->second == 0) {
     if (Stream.AtEndOfStream())
-      return Error("Could not find function in stream");
+      return error("Could not find function in stream");
     // ParseModule will parse the next body in the stream and set its
     // position in the DeferredFunctionInfo map.
-    if (std::error_code EC = ParseModule(true))
+    if (std::error_code EC = parseModule(true))
       return EC;
   }
   return std::error_code();
@@ -4451,14 +4434,14 @@ std::error_code BitcodeReader::materialize(GlobalValue *GV) {
   assert(DFII != DeferredFunctionInfo.end() && "Deferred function not found!");
   // If its position is recorded as 0, its body is somewhere in the stream
   // but we haven't seen it yet.
-  if (DFII->second == 0 && LazyStreamer)
-    if (std::error_code EC = FindFunctionInStream(F, DFII))
+  if (DFII->second == 0 && IsStreamed)
+    if (std::error_code EC = findFunctionInStream(F, DFII))
       return EC;
 
   // Move the bit stream to the saved position of the deferred function body.
   Stream.JumpToBit(DFII->second);
 
-  if (std::error_code EC = ParseFunctionBody(F))
+  if (std::error_code EC = parseFunctionBody(F))
     return EC;
   F->setIsMaterializable(false);
 
@@ -4529,12 +4512,12 @@ std::error_code BitcodeReader::materializeModule(Module *M) {
   // pointing to the END_BLOCK record after them. Now make sure the rest
   // of the bits in the module have been read.
   if (NextUnreadBit)
-    ParseModule(true);
+    parseModule(true);
 
   // Check that all block address forward references got resolved (as we
   // promised above).
   if (!BasicBlockFwdRefs.empty())
-    return Error("Never resolved function from blockaddress");
+    return error("Never resolved function from blockaddress");
 
   // Upgrade any intrinsic calls that slipped through (should not happen!) and
   // delete the old functions to clean up. We can't do this unless the entire
@@ -4566,24 +4549,25 @@ std::vector<StructType *> BitcodeReader::getIdentifiedStructTypes() const {
   return IdentifiedStructTypes;
 }
 
-std::error_code BitcodeReader::InitStream() {
-  if (LazyStreamer)
-    return InitLazyStream();
-  return InitStreamFromBuffer();
+std::error_code
+BitcodeReader::initStream(std::unique_ptr<DataStreamer> Streamer) {
+  if (Streamer)
+    return initLazyStream(std::move(Streamer));
+  return initStreamFromBuffer();
 }
 
-std::error_code BitcodeReader::InitStreamFromBuffer() {
+std::error_code BitcodeReader::initStreamFromBuffer() {
   const unsigned char *BufPtr = (const unsigned char*)Buffer->getBufferStart();
   const unsigned char *BufEnd = BufPtr+Buffer->getBufferSize();
 
   if (Buffer->getBufferSize() & 3)
-    return Error("Invalid bitcode signature");
+    return error("Invalid bitcode signature");
 
   // If we have a wrapper header, parse it and ignore the non-bc file contents.
   // The magic number is 0x0B17C0DE stored in little endian.
   if (isBitcodeWrapper(BufPtr, BufEnd))
     if (SkipBitcodeWrapperHeader(BufPtr, BufEnd, true))
-      return Error("Invalid bitcode wrapper header");
+      return error("Invalid bitcode wrapper header");
 
   StreamFile.reset(new BitstreamReader(BufPtr, BufEnd));
   Stream.init(&*StreamFile);
@@ -4591,20 +4575,22 @@ std::error_code BitcodeReader::InitStreamFromBuffer() {
   return std::error_code();
 }
 
-std::error_code BitcodeReader::InitLazyStream() {
+std::error_code
+BitcodeReader::initLazyStream(std::unique_ptr<DataStreamer> Streamer) {
   // Check and strip off the bitcode wrapper; BitstreamReader expects never to
   // see it.
-  auto OwnedBytes = llvm::make_unique<StreamingMemoryObject>(LazyStreamer);
+  auto OwnedBytes =
+      llvm::make_unique<StreamingMemoryObject>(std::move(Streamer));
   StreamingMemoryObject &Bytes = *OwnedBytes;
   StreamFile = llvm::make_unique<BitstreamReader>(std::move(OwnedBytes));
   Stream.init(&*StreamFile);
 
   unsigned char buf[16];
   if (Bytes.readBytes(buf, 16, 0) != 16)
-    return Error("Invalid bitcode signature");
+    return error("Invalid bitcode signature");
 
   if (!isBitcode(buf, buf + 16))
-    return Error("Invalid bitcode signature");
+    return error("Invalid bitcode signature");
 
   if (isBitcodeWrapper(buf, buf + 4)) {
     const unsigned char *bitcodeStart = buf;
@@ -4632,7 +4618,7 @@ class BitcodeErrorCategoryType : public std::error_category {
     llvm_unreachable("Unknown error type!");
   }
 };
-}
+} // namespace
 
 static ManagedStatic<BitcodeErrorCategoryType> ErrorCategory;
 
@@ -4644,83 +4630,86 @@ const std::error_category &llvm::BitcodeErrorCategory() {
 // External interface
 //===----------------------------------------------------------------------===//
 
-/// \brief Get a lazy one-at-time loading module from bitcode.
-///
-/// This isn't always used in a lazy context.  In particular, it's also used by
-/// \a parseBitcodeFile().  If this is truly lazy, then we need to eagerly pull
-/// in forward-referenced functions from block address references.
-///
-/// \param[in] WillMaterializeAll Set to \c true if the caller promises to
-/// materialize everything -- in particular, if this isn't truly lazy.
-static ErrorOr<Module *>
-getLazyBitcodeModuleImpl(std::unique_ptr<MemoryBuffer> &&Buffer,
-                         LLVMContext &Context, bool WillMaterializeAll,
-                         DiagnosticHandlerFunction DiagnosticHandler,
-                         bool ShouldLazyLoadMetadata = false) {
-  Module *M = new Module(Buffer->getBufferIdentifier(), Context);
-  BitcodeReader *R =
-      new BitcodeReader(Buffer.get(), Context, DiagnosticHandler);
+static ErrorOr<std::unique_ptr<Module>>
+getBitcodeModuleImpl(std::unique_ptr<DataStreamer> Streamer, StringRef Name,
+                     BitcodeReader *R, LLVMContext &Context,
+                     bool MaterializeAll, bool ShouldLazyLoadMetadata) {
+  std::unique_ptr<Module> M = make_unique<Module>(Name, Context);
   M->setMaterializer(R);
 
   auto cleanupOnError = [&](std::error_code EC) {
     R->releaseBuffer(); // Never take ownership on error.
-    delete M;  // Also deletes R.
     return EC;
   };
 
   // Delay parsing Metadata if ShouldLazyLoadMetadata is true.
-  if (std::error_code EC = R->ParseBitcodeInto(M, ShouldLazyLoadMetadata))
+  if (std::error_code EC = R->parseBitcodeInto(std::move(Streamer), M.get(),
+                                               ShouldLazyLoadMetadata))
     return cleanupOnError(EC);
 
-  if (!WillMaterializeAll)
+  if (MaterializeAll) {
+    // Read in the entire module, and destroy the BitcodeReader.
+    if (std::error_code EC = M->materializeAllPermanently())
+      return cleanupOnError(EC);
+  } else {
     // Resolve forward references from blockaddresses.
     if (std::error_code EC = R->materializeForwardReferencedFunctions())
       return cleanupOnError(EC);
+  }
+  return std::move(M);
+}
+
+/// \brief Get a lazy one-at-time loading module from bitcode.
+///
+/// This isn't always used in a lazy context.  In particular, it's also used by
+/// \a parseBitcodeFile().  If this is truly lazy, then we need to eagerly pull
+/// in forward-referenced functions from block address references.
+///
+/// \param[in] MaterializeAll Set to \c true if we should materialize
+/// everything.
+static ErrorOr<std::unique_ptr<Module>>
+getLazyBitcodeModuleImpl(std::unique_ptr<MemoryBuffer> &&Buffer,
+                         LLVMContext &Context, bool MaterializeAll,
+                         DiagnosticHandlerFunction DiagnosticHandler,
+                         bool ShouldLazyLoadMetadata = false) {
+  BitcodeReader *R =
+      new BitcodeReader(Buffer.get(), Context, DiagnosticHandler);
+
+  ErrorOr<std::unique_ptr<Module>> Ret =
+      getBitcodeModuleImpl(nullptr, Buffer->getBufferIdentifier(), R, Context,
+                           MaterializeAll, ShouldLazyLoadMetadata);
+  if (!Ret)
+    return Ret;
 
   Buffer.release(); // The BitcodeReader owns it now.
-  return M;
+  return Ret;
 }
 
-ErrorOr<Module *>
-llvm::getLazyBitcodeModule(std::unique_ptr<MemoryBuffer> &&Buffer,
-                           LLVMContext &Context,
-                           DiagnosticHandlerFunction DiagnosticHandler,
-                           bool ShouldLazyLoadMetadata) {
+ErrorOr<std::unique_ptr<Module>> llvm::getLazyBitcodeModule(
+    std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context,
+    DiagnosticHandlerFunction DiagnosticHandler, bool ShouldLazyLoadMetadata) {
   return getLazyBitcodeModuleImpl(std::move(Buffer), Context, false,
                                   DiagnosticHandler, ShouldLazyLoadMetadata);
 }
 
-ErrorOr<std::unique_ptr<Module>>
-llvm::getStreamedBitcodeModule(StringRef Name, DataStreamer *Streamer,
-                               LLVMContext &Context,
-                               DiagnosticHandlerFunction DiagnosticHandler) {
+ErrorOr<std::unique_ptr<Module>> llvm::getStreamedBitcodeModule(
+    StringRef Name, std::unique_ptr<DataStreamer> Streamer,
+    LLVMContext &Context, DiagnosticHandlerFunction DiagnosticHandler) {
   std::unique_ptr<Module> M = make_unique<Module>(Name, Context);
-  BitcodeReader *R = new BitcodeReader(Streamer, Context, DiagnosticHandler);
-  M->setMaterializer(R);
-  if (std::error_code EC = R->ParseBitcodeInto(M.get()))
-    return EC;
-  return std::move(M);
+  BitcodeReader *R = new BitcodeReader(Context, DiagnosticHandler);
+
+  return getBitcodeModuleImpl(std::move(Streamer), Name, R, Context, false,
+                              false);
 }
 
-ErrorOr<Module *>
+ErrorOr<std::unique_ptr<Module>>
 llvm::parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context,
                        DiagnosticHandlerFunction DiagnosticHandler) {
   std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(Buffer, false);
-  ErrorOr<Module *> ModuleOrErr = getLazyBitcodeModuleImpl(
-      std::move(Buf), Context, true, DiagnosticHandler);
-  if (!ModuleOrErr)
-    return ModuleOrErr;
-  Module *M = ModuleOrErr.get();
-  // Read in the entire module, and destroy the BitcodeReader.
-  if (std::error_code EC = M->materializeAllPermanently()) {
-    delete M;
-    return EC;
-  }
-
+  return getLazyBitcodeModuleImpl(std::move(Buf), Context, true,
+                                  DiagnosticHandler);
   // TODO: Restore the use-lists to the in-memory state when the bitcode was
   // written.  We must defer until the Module has been fully materialized.
-
-  return M;
 }
 
 std::string
diff --git a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 97caefb..e79eeb0 100644
--- a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -232,6 +232,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
     return bitc::ATTR_KIND_STACK_PROTECT_REQ;
   case Attribute::StackProtectStrong:
     return bitc::ATTR_KIND_STACK_PROTECT_STRONG;
+  case Attribute::SafeStack:
+    return bitc::ATTR_KIND_SAFESTACK;
   case Attribute::StructRet:
     return bitc::ATTR_KIND_STRUCT_RET;
   case Attribute::SanitizeAddress:
@@ -693,7 +695,7 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
   for (const Function &F : *M) {
     // FUNCTION:  [type, callingconv, isproto, linkage, paramattrs, alignment,
     //             section, visibility, gc, unnamed_addr, prologuedata,
-    //             dllstorageclass, comdat, prefixdata]
+    //             dllstorageclass, comdat, prefixdata, personalityfn]
     Vals.push_back(VE.getTypeID(F.getFunctionType()));
     Vals.push_back(F.getCallingConv());
     Vals.push_back(F.isDeclaration());
@@ -710,6 +712,8 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
     Vals.push_back(F.hasComdat() ? VE.getComdatID(F.getComdat()) : 0);
     Vals.push_back(F.hasPrefixData() ? (VE.getValueID(F.getPrefixData()) + 1)
                                      : 0);
+    Vals.push_back(
+        F.hasPersonalityFn() ? (VE.getValueID(F.getPersonalityFn()) + 1) : 0);
 
     unsigned AbbrevToUse = 0;
     Stream.EmitRecord(bitc::MODULE_CODE_FUNCTION, Vals, AbbrevToUse);
@@ -1857,7 +1861,6 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
     const LandingPadInst &LP = cast<LandingPadInst>(I);
     Code = bitc::FUNC_CODE_INST_LANDINGPAD;
     Vals.push_back(VE.getTypeID(LP.getType()));
-    PushValueAndType(LP.getPersonalityFn(), InstID, Vals, VE);
     Vals.push_back(LP.isCleanup());
     Vals.push_back(LP.getNumClauses());
     for (unsigned I = 0, E = LP.getNumClauses(); I != E; ++I) {
@@ -2403,10 +2406,7 @@ enum {
 
 static void WriteInt32ToBuffer(uint32_t Value, SmallVectorImpl<char> &Buffer,
                                uint32_t &Position) {
-  Buffer[Position + 0] = (unsigned char) (Value >>  0);
-  Buffer[Position + 1] = (unsigned char) (Value >>  8);
-  Buffer[Position + 2] = (unsigned char) (Value >> 16);
-  Buffer[Position + 3] = (unsigned char) (Value >> 24);
+  support::endian::write32le(&Buffer[Position], Value);
   Position += 4;
 }
 
diff --git a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp
index 3165743..c890380 100644
--- a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp
+++ b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp
@@ -41,7 +41,7 @@ namespace {
       return false;
     }
   };
-}
+} // namespace
 
 char WriteBitcodePass::ID = 0;
 
diff --git a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
index 6c517f5..53c3a40 100644
--- a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -52,7 +52,7 @@ struct OrderMap {
     IDs[V].first = ID;
   }
 };
-}
+} // namespace
 
 static void orderValue(const Value *V, OrderMap &OM) {
   if (OM.lookup(V).first)
@@ -93,6 +93,9 @@ static OrderMap orderModule(const Module &M) {
     if (F.hasPrologueData())
       if (!isa<GlobalValue>(F.getPrologueData()))
         orderValue(F.getPrologueData(), OM);
+    if (F.hasPersonalityFn())
+      if (!isa<GlobalValue>(F.getPersonalityFn()))
+        orderValue(F.getPersonalityFn(), OM);
   }
   OM.LastGlobalConstantID = OM.size();
 
@@ -274,6 +277,8 @@ static UseListOrderStack predictUseListOrder(const Module &M) {
       predictValueUseListOrder(F.getPrefixData(), nullptr, OM, Stack);
     if (F.hasPrologueData())
       predictValueUseListOrder(F.getPrologueData(), nullptr, OM, Stack);
+    if (F.hasPersonalityFn())
+      predictValueUseListOrder(F.getPersonalityFn(), nullptr, OM, Stack);
   }
 
   return Stack;
@@ -291,44 +296,45 @@ ValueEnumerator::ValueEnumerator(const Module &M,
     UseListOrders = predictUseListOrder(M);
 
   // Enumerate the global variables.
-  for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
-       I != E; ++I)
-    EnumerateValue(I);
+  for (const GlobalVariable &GV : M.globals())
+    EnumerateValue(&GV);
 
   // Enumerate the functions.
-  for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
-    EnumerateValue(I);
-    EnumerateAttributes(cast<Function>(I)->getAttributes());
+  for (const Function & F : M) {
+    EnumerateValue(&F);
+    EnumerateAttributes(F.getAttributes());
   }
 
   // Enumerate the aliases.
-  for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();
-       I != E; ++I)
-    EnumerateValue(I);
+  for (const GlobalAlias &GA : M.aliases())
+    EnumerateValue(&GA);
 
   // Remember what is the cutoff between globalvalue's and other constants.
   unsigned FirstConstant = Values.size();
 
   // Enumerate the global variable initializers.
-  for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
-       I != E; ++I)
-    if (I->hasInitializer())
-      EnumerateValue(I->getInitializer());
+  for (const GlobalVariable &GV : M.globals())
+    if (GV.hasInitializer())
+      EnumerateValue(GV.getInitializer());
 
   // Enumerate the aliasees.
-  for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();
-       I != E; ++I)
-    EnumerateValue(I->getAliasee());
+  for (const GlobalAlias &GA : M.aliases())
+    EnumerateValue(GA.getAliasee());
 
   // Enumerate the prefix data constants.
-  for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I)
-    if (I->hasPrefixData())
-      EnumerateValue(I->getPrefixData());
+  for (const Function &F : M)
+    if (F.hasPrefixData())
+      EnumerateValue(F.getPrefixData());
 
   // Enumerate the prologue data constants.
+  for (const Function &F : M)
+    if (F.hasPrologueData())
+      EnumerateValue(F.getPrologueData());
+
+  // Enumerate the personality functions.
   for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I)
-    if (I->hasPrologueData())
-      EnumerateValue(I->getPrologueData());
+    if (I->hasPersonalityFn())
+      EnumerateValue(I->getPersonalityFn());
 
   // Enumerate the metadata type.
   //
diff --git a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h
index 92d166e..b2daa48 100644
--- a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h
+++ b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h
@@ -203,6 +203,6 @@ private:
   void EnumerateNamedMetadata(const Module &M);
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h
index 18c8bb5..63d2085 100644
--- a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -174,6 +174,6 @@ class RegisterClassInfo;
                                    RenameOrderType& RenameOrder,
                                    std::map<unsigned, unsigned> &RenameMap);
   };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/CodeGen/AntiDepBreaker.h b/contrib/llvm/lib/CodeGen/AntiDepBreaker.h
index a61a8ef..7985241 100644
--- a/contrib/llvm/lib/CodeGen/AntiDepBreaker.h
+++ b/contrib/llvm/lib/CodeGen/AntiDepBreaker.h
@@ -62,6 +62,6 @@ public:
   }
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h
index 211fc98..e0ce3f9 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h
@@ -48,5 +48,5 @@ public:
 
   void resetUsedFlag() { HasBeenUsed = false; }
 };
-}
+} // namespace llvm
 #endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 2e3b83a..95da588 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -151,7 +151,7 @@ void AsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) {
 }
 
 StringRef AsmPrinter::getTargetTriple() const {
-  return TM.getTargetTriple();
+  return TM.getTargetTriple().str();
 }
 
 /// getCurrentSection() - Return the current section we are emitting to.
@@ -172,7 +172,6 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
 
 bool AsmPrinter::doInitialization(Module &M) {
   MMI = getAnalysisIfAvailable<MachineModuleInfo>();
-  MMI->AnalyzeModule(M);
 
   // Initialize TargetLoweringObjectFile.
   const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
@@ -222,7 +221,8 @@ bool AsmPrinter::doInitialization(Module &M) {
     // We're at the module level. Construct MCSubtarget from the default CPU
     // and target triple.
     std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
-        TM.getTargetTriple(), TM.getTargetCPU(), TM.getTargetFeatureString()));
+        TM.getTargetTriple().str(), TM.getTargetCPU(),
+        TM.getTargetFeatureString()));
     OutStreamer->AddComment("Start of file scope inline assembly");
     OutStreamer->AddBlankLine();
     EmitInlineAsm(M.getModuleInlineAsm()+"\n", *STI, TM.Options.MCOptions);
@@ -232,7 +232,7 @@ bool AsmPrinter::doInitialization(Module &M) {
 
   if (MAI->doesSupportDebugInformation()) {
     bool skip_dwarf = false;
-    if (Triple(TM.getTargetTriple()).isKnownWindowsMSVCEnvironment()) {
+    if (TM.getTargetTriple().isKnownWindowsMSVCEnvironment()) {
       Handlers.push_back(HandlerInfo(new WinCodeViewLineTables(this),
                                      DbgTimerName,
                                      CodeViewLineTablesGroupName));
@@ -900,12 +900,11 @@ void AsmPrinter::EmitFunctionBody() {
   if (MAI->hasDotTypeDotSizeDirective()) {
     // We can get the size as difference between the function label and the
     // temp label.
-    const MCExpr *SizeExp =
-      MCBinaryExpr::createSub(MCSymbolRefExpr::create(CurrentFnEnd, OutContext),
-                              MCSymbolRefExpr::create(CurrentFnSymForSize,
-                                                      OutContext),
-                              OutContext);
-    OutStreamer->emitELFSize(cast<MCSymbolELF>(CurrentFnSym), SizeExp);
+    const MCExpr *SizeExp = MCBinaryExpr::createSub(
+        MCSymbolRefExpr::create(CurrentFnEnd, OutContext),
+        MCSymbolRefExpr::create(CurrentFnSymForSize, OutContext), OutContext);
+    if (auto Sym = dyn_cast<MCSymbolELF>(CurrentFnSym))
+      OutStreamer->emitELFSize(Sym, SizeExp);
   }
 
   for (const HandlerInfo &HI : Handlers) {
@@ -1043,8 +1042,7 @@ bool AsmPrinter::doFinalization(Module &M) {
   if (!ModuleFlags.empty())
     TLOF.emitModuleFlags(*OutStreamer, ModuleFlags, *Mang, TM);
 
-  Triple TT(TM.getTargetTriple());
-  if (TT.isOSBinFormatELF()) {
+  if (TM.getTargetTriple().isOSBinFormatELF()) {
     MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
 
     // Output stubs for external and common global variables.
@@ -1591,25 +1589,7 @@ void AsmPrinter::EmitInt32(int Value) const {
 /// .set if it avoids relocations.
 void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
                                      unsigned Size) const {
-  if (!MAI->doesDwarfUseRelocationsAcrossSections())
-    if (OutStreamer->emitAbsoluteSymbolDiff(Hi, Lo, Size))
-      return;
-
-  // Get the Hi-Lo expression.
-  const MCExpr *Diff =
-    MCBinaryExpr::createSub(MCSymbolRefExpr::create(Hi, OutContext),
-                            MCSymbolRefExpr::create(Lo, OutContext),
-                            OutContext);
-
-  if (!MAI->doesSetDirectiveSuppressesReloc()) {
-    OutStreamer->EmitValue(Diff, Size);
-    return;
-  }
-
-  // Otherwise, emit with .set (aka assignment).
-  MCSymbol *SetLabel = createTempSymbol("set");
-  OutStreamer->EmitAssignment(SetLabel, Diff);
-  OutStreamer->EmitSymbolValue(SetLabel, Size);
+  OutStreamer->emitAbsoluteSymbolDiff(Hi, Lo, Size);
 }
 
 /// EmitLabelPlusOffset - Emit something like ".long Label+Offset"
@@ -1811,40 +1791,30 @@ static int isRepeatedByteSequence(const ConstantDataSequential *V) {
 /// composed of a repeated sequence of identical bytes and return the
 /// byte value.  If it is not a repeated sequence, return -1.
 static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
-
   if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
-    if (CI->getBitWidth() > 64) return -1;
+    uint64_t Size = TM.getDataLayout()->getTypeAllocSizeInBits(V->getType());
+    assert(Size % 8 == 0);
 
-    uint64_t Size =
-        TM.getDataLayout()->getTypeAllocSize(V->getType());
-    uint64_t Value = CI->getZExtValue();
+    // Extend the element to take zero padding into account.
+    APInt Value = CI->getValue().zextOrSelf(Size);
+    if (!Value.isSplat(8))
+      return -1;
 
-    // Make sure the constant is at least 8 bits long and has a power
-    // of 2 bit width.  This guarantees the constant bit width is
-    // always a multiple of 8 bits, avoiding issues with padding out
-    // to Size and other such corner cases.
-    if (CI->getBitWidth() < 8 || !isPowerOf2_64(CI->getBitWidth())) return -1;
-
-    uint8_t Byte = static_cast<uint8_t>(Value);
-
-    for (unsigned i = 1; i < Size; ++i) {
-      Value >>= 8;
-      if (static_cast<uint8_t>(Value) != Byte) return -1;
-    }
-    return Byte;
+    return Value.zextOrTrunc(8).getZExtValue();
   }
   if (const ConstantArray *CA = dyn_cast<ConstantArray>(V)) {
     // Make sure all array elements are sequences of the same repeated
     // byte.
     assert(CA->getNumOperands() != 0 && "Should be a CAZ");
-    int Byte = isRepeatedByteSequence(CA->getOperand(0), TM);
-    if (Byte == -1) return -1;
-
-    for (unsigned i = 1, e = CA->getNumOperands(); i != e; ++i) {
-      int ThisByte = isRepeatedByteSequence(CA->getOperand(i), TM);
-      if (ThisByte == -1) return -1;
-      if (Byte != ThisByte) return -1;
-    }
+    Constant *Op0 = CA->getOperand(0);
+    int Byte = isRepeatedByteSequence(Op0, TM);
+    if (Byte == -1)
+      return -1;
+
+    // All array elements must be equal.
+    for (unsigned i = 1, e = CA->getNumOperands(); i != e; ++i)
+      if (CA->getOperand(i) != Op0)
+        return -1;
     return Byte;
   }
 
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 7dbfddf..8ee613b 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -157,24 +157,20 @@ void AsmPrinter::EmitTTypeReference(const GlobalValue *GV,
     OutStreamer->EmitIntValue(0, GetSizeOfEncodedValue(Encoding));
 }
 
-/// EmitSectionOffset - Emit the 4-byte offset of Label from the start of its
-/// section.  This can be done with a special directive if the target supports
-/// it (e.g. cygwin) or by emitting it as an offset from a label at the start
-/// of the section.
-///
-/// SectionLabel is a temporary label emitted at the start of the section that
-/// Label lives in.
-void AsmPrinter::emitSectionOffset(const MCSymbol *Label) const {
-  // On COFF targets, we have to emit the special .secrel32 directive.
-  if (MAI->needsDwarfSectionOffsetDirective()) {
-    OutStreamer->EmitCOFFSecRel32(Label);
-    return;
-  }
+void AsmPrinter::emitDwarfSymbolReference(const MCSymbol *Label,
+                                          bool ForceOffset) const {
+  if (!ForceOffset) {
+    // On COFF targets, we have to emit the special .secrel32 directive.
+    if (MAI->needsDwarfSectionOffsetDirective()) {
+      OutStreamer->EmitCOFFSecRel32(Label);
+      return;
+    }
 
-  // If the format uses relocations with dwarf, refer to the symbol directly.
-  if (MAI->doesDwarfUseRelocationsAcrossSections()) {
-    OutStreamer->EmitSymbolValue(Label, 4);
-    return;
+    // If the format uses relocations with dwarf, refer to the symbol directly.
+    if (MAI->doesDwarfUseRelocationsAcrossSections()) {
+      OutStreamer->EmitSymbolValue(Label, 4);
+      return;
+    }
   }
 
   // Otherwise, emit it as a label difference from the start of the section.
@@ -183,7 +179,7 @@ void AsmPrinter::emitSectionOffset(const MCSymbol *Label) const {
 
 void AsmPrinter::emitDwarfStringOffset(DwarfStringPoolEntryRef S) const {
   if (MAI->doesDwarfUseRelocationsAcrossSections()) {
-    emitSectionOffset(S.getSymbol());
+    emitDwarfSymbolReference(S.getSymbol());
     return;
   }
 
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h b/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
index 0cc829f..7a712a0 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
@@ -103,6 +103,6 @@ public:
   }
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index fa8449e..4847de4 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -618,11 +618,7 @@ unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
 void DIELocList::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
   DwarfDebug *DD = AP->getDwarfDebug();
   MCSymbol *Label = DD->getDebugLocs().getList(Index).Label;
-
-  if (AP->MAI->doesDwarfUseRelocationsAcrossSections() && !DD->useSplitDwarf())
-    AP->emitSectionOffset(Label);
-  else
-    AP->EmitLabelDifference(Label, Label->getSection().getBeginSymbol(), 4);
+  AP->emitDwarfSymbolReference(Label, /*ForceOffset*/ DD->useSplitDwarf());
 }
 
 #ifndef NDEBUG
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
index 1850e04..789e6dd 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
@@ -157,6 +157,6 @@ private:
   AsmPrinter *AP;
   DenseMap<const DIE *, unsigned> Numbering;
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
index 546d1b4..5d40050 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
@@ -55,6 +55,6 @@ public:
 void calculateDbgValueHistory(const MachineFunction *MF,
                               const TargetRegisterInfo *TRI,
                               DbgValueHistoryMap &Result);
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index 6a943c6..083228b 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -175,6 +175,6 @@ inline bool operator<(const DebugLocEntry::Value &A,
          B.getExpression()->getBitPieceOffset();
 }
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
index 3001da2..1ae385d 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
@@ -129,5 +129,5 @@ private:
     return Entries[EI + 1].CommentOffset - Entries[EI].CommentOffset;
   }
 };
-}
+} // namespace llvm
 #endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
index 4d81441..cc677c2 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
@@ -252,5 +252,5 @@ public:
   void dump() { print(dbgs()); }
 #endif
 };
-}
+} // namespace llvm
 #endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 689184a..45c56fb 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -817,4 +817,4 @@ bool DwarfCompileUnit::includeMinimalInlineScopes() const {
   return getCUNode()->getEmissionKind() == DIBuilder::LineTablesOnly ||
          (DD->useSplitDwarf() && !Skeleton);
 }
-} // end llvm namespace
+} // namespace llvm
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 50e4a54e..48c302b 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -231,6 +231,6 @@ public:
   const MCSymbol *getBaseAddress() const { return BaseAddress; }
 };
 
-} // end llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 3f6665b..fb33169 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -1414,7 +1414,7 @@ void DwarfDebug::emitDebugPubSection(
     Asm->EmitInt16(dwarf::DW_PUBNAMES_VERSION);
 
     Asm->OutStreamer->AddComment("Offset of Compilation Unit Info");
-    Asm->emitSectionOffset(TheU->getLabelBegin());
+    Asm->emitDwarfSymbolReference(TheU->getLabelBegin());
 
     Asm->OutStreamer->AddComment("Compilation Unit Length");
     Asm->EmitInt32(TheU->getLength());
@@ -1562,8 +1562,6 @@ void DwarfDebug::emitDebugLoc() {
     Asm->OutStreamer->EmitLabel(List.Label);
     const DwarfCompileUnit *CU = List.CU;
     for (const auto &Entry : DebugLocs.getEntries(List)) {
-       if (Entry.BeginSym == Entry.EndSym)
-        continue;
       // Set up the range. This range is relative to the entry point of the
       // compile unit. This is a hard coded 0 for low_pc when we're emitting
       // ranges, or the DW_AT_low_pc on the compile unit otherwise.
@@ -1741,7 +1739,7 @@ void DwarfDebug::emitDebugARanges() {
     Asm->OutStreamer->AddComment("DWARF Arange version number");
     Asm->EmitInt16(dwarf::DW_ARANGES_VERSION);
     Asm->OutStreamer->AddComment("Offset Into Debug Info Section");
-    Asm->emitSectionOffset(CU->getLabelBegin());
+    Asm->emitDwarfSymbolReference(CU->getLabelBegin());
     Asm->OutStreamer->AddComment("Address Size (in bytes)");
     Asm->EmitInt8(PtrSize);
     Asm->OutStreamer->AddComment("Segment Size (in bytes)");
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index d569827..a2799b8 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -65,11 +65,6 @@ void DwarfExpression::AddShr(unsigned ShiftBy) {
   EmitOp(dwarf::DW_OP_shr);
 }
 
-void DwarfExpression::AddOpStackValue() {
-  if (DwarfVersion >= 4)
-    EmitOp(dwarf::DW_OP_stack_value);
-}
-
 bool DwarfExpression::AddMachineRegIndirect(unsigned MachineReg, int Offset) {
   if (isFrameRegister(MachineReg)) {
     // If variable offset is based in frame register then use fbreg.
@@ -177,14 +172,16 @@ void DwarfExpression::AddSignedConstant(int Value) {
   // value, so the producers and consumers started to rely on heuristics
   // to disambiguate the value vs. location status of the expression.
   // See PR21176 for more details.
-  AddOpStackValue();
+  if (DwarfVersion >= 4)
+    EmitOp(dwarf::DW_OP_stack_value);
 }
 
 void DwarfExpression::AddUnsignedConstant(unsigned Value) {
   EmitOp(dwarf::DW_OP_constu);
   EmitUnsigned(Value);
   // cf. comment in DwarfExpression::AddSignedConstant().
-  AddOpStackValue();
+  if (DwarfVersion >= 4)
+    EmitOp(dwarf::DW_OP_stack_value);
 }
 
 static unsigned getOffsetOrZero(unsigned OffsetInBits,
@@ -215,30 +212,15 @@ bool DwarfExpression::AddMachineRegExpression(const DIExpression *Expr,
                getOffsetOrZero(OffsetInBits, PieceOffsetInBits));
   }
   case dwarf::DW_OP_plus: {
+    // [DW_OP_reg,Offset,DW_OP_plus,DW_OP_deref] --> [DW_OP_breg,Offset].
     auto N = I.getNext();
-    unsigned Offset = I->getArg(0);
-    // First combine all DW_OP_plus until we hit either a DW_OP_deref or a
-    // DW_OP_bit_piece
-    while (N != E && N->getOp() == dwarf::DW_OP_plus) {
-      Offset += N->getArg(0);
-      ++I;
-      N = I.getNext();
-    }
     if (N != E && N->getOp() == dwarf::DW_OP_deref) {
-      // [DW_OP_reg,Offset,DW_OP_plus,DW_OP_deref] --> [DW_OP_breg,Offset].
+      unsigned Offset = I->getArg(0);
       ValidReg = AddMachineRegIndirect(MachineReg, Offset);
       std::advance(I, 2);
-    } else {
-      assert ((N == E) || (N->getOp() == dwarf::DW_OP_bit_piece));
-      if (Offset == 0) {
-        ValidReg = AddMachineRegPiece(MachineReg);
-      } else {
-        ValidReg = AddMachineRegIndirect(MachineReg, Offset);
-        AddOpStackValue();
-      }
-      ++I;
-    }
-    break;
+      break;
+    } else
+      ValidReg = AddMachineRegPiece(MachineReg);
   }
   case dwarf::DW_OP_deref: {
       // [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg].
@@ -255,7 +237,6 @@ bool DwarfExpression::AddMachineRegExpression(const DIExpression *Expr,
 
   // Emit remaining elements of the expression.
   AddExpression(I, E, PieceOffsetInBits);
-
   return true;
 }
 
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
index f6249ff..154d7d9 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -83,9 +83,6 @@ public:
   bool AddMachineRegPiece(unsigned MachineReg, unsigned PieceSizeInBits = 0,
                           unsigned PieceOffsetInBits = 0);
 
-  /// Emit a DW_OP_stack_value
-  void AddOpStackValue();
-
   /// Emit a signed constant.
   void AddSignedConstant(int Value);
   /// Emit an unsigned constant.
@@ -134,6 +131,6 @@ public:
   void EmitUnsigned(uint64_t Value) override;
   bool isFrameRegister(unsigned MachineReg) override;
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index 5ef333c..fdefb1d 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -170,4 +170,4 @@ bool DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
   Vars.push_back(Var);
   return true;
 }
-}
+} // namespace llvm
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
index 8402027..22759fd 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -114,5 +114,5 @@ public:
     return DITypeNodeToDieMap.lookup(TypeMD);
   }
 };
-}
+} // namespace llvm
 #endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h
index 93a1684..c107258 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h
@@ -45,5 +45,5 @@ public:
   /// Get a reference to an entry in the string pool.
   EntryRef getEntry(AsmPrinter &Asm, StringRef Str);
 };
-}
+} // namespace llvm
 #endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 907f670..f4b15ba 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -931,7 +931,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
         StringRef PropertyName = Property->getName();
         addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName);
         if (Property->getType())
-          addType(ElemDie, Property->getType());
+          addType(ElemDie, resolve(Property->getType()));
         addSourceLine(ElemDie, Property);
         StringRef GetterName = Property->getGetterName();
         if (!GetterName.empty())
@@ -1449,10 +1449,8 @@ void DwarfUnit::emitHeader(bool UseOffsets) {
   // start of the section. Use a relocatable offset where needed to ensure
   // linking doesn't invalidate that offset.
   const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
-  if (!UseOffsets)
-    Asm->emitSectionOffset(TLOF.getDwarfAbbrevSection()->getBeginSymbol());
-  else
-    Asm->EmitInt32(0);
+  Asm->emitDwarfSymbolReference(TLOF.getDwarfAbbrevSection()->getBeginSymbol(),
+                                UseOffsets);
 
   Asm->OutStreamer->AddComment("Address Size (in bytes)");
   Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
index f56c9b4..200ddf0 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -402,5 +402,5 @@ public:
   }
   DwarfCompileUnit &getCU() override { return CU; }
 };
-} // end llvm namespace
+} // namespace llvm
 #endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
index 65973fa..128a8ad 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
@@ -132,7 +132,7 @@ public:
   void beginInstruction(const MachineInstr *MI) override {}
   void endInstruction() override {}
 };
-}
+} // namespace llvm
 
 #endif
 
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
index 535b1f6..11bfe76 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
@@ -378,4 +378,4 @@ void WinCodeViewLineTables::beginInstruction(const MachineInstr *MI) {
     return;
   maybeRecordLocation(DL, Asm->MF);
 }
-}
+} // namespace llvm
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
index f166350..1ba6060 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -50,6 +50,11 @@ WinException::~WinException() {}
 /// endModule - Emit all exception information that should come after the
 /// content.
 void WinException::endModule() {
+  auto &OS = *Asm->OutStreamer;
+  const Module *M = MMI->getModule();
+  for (const Function &F : *M)
+    if (F.hasFnAttribute("safeseh"))
+      OS.EmitCOFFSafeSEH(Asm->getSymbol(&F));
 }
 
 void WinException::beginFunction(const MachineFunction *MF) {
@@ -144,7 +149,7 @@ void WinException::endFunction(const MachineFunction *MF) {
     if (Per == EHPersonality::MSVC_Win64SEH)
       emitCSpecificHandlerTable();
     else if (Per == EHPersonality::MSVC_X86SEH)
-      emitCSpecificHandlerTable(); // FIXME
+      emitExceptHandlerTable(MF);
     else if (Per == EHPersonality::MSVC_CXX)
       emitCXXFrameHandler3Table(MF);
     else
@@ -444,7 +449,7 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
               Asm->OutContext.getOrCreateParentFrameOffsetSymbol(
                   GlobalValue::getRealLinkageName(HT.Handler->getName()));
           const MCSymbolRefExpr *ParentFrameOffsetRef = MCSymbolRefExpr::create(
-              ParentFrameOffset, MCSymbolRefExpr::VK_None, Asm->OutContext);
+              ParentFrameOffset, Asm->OutContext);
           OS.EmitValue(ParentFrameOffsetRef, 4); // ParentFrameOffset
         }
       }
@@ -541,3 +546,103 @@ void WinException::extendIP2StateTable(const MachineFunction *MF,
     }
   }
 }
+
+/// Emit the language-specific data that _except_handler3 and 4 expect. This is
+/// functionally equivalent to the __C_specific_handler table, except it is
+/// indexed by state number instead of IP.
+void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
+  MCStreamer &OS = *Asm->OutStreamer;
+
+  // Define the EH registration node offset label in terms of its frameescape
+  // label. The WinEHStatePass ensures that the registration node is passed to
+  // frameescape. This allows SEH filter functions to access the
+  // EXCEPTION_POINTERS field, which is filled in by the _except_handlerN.
+  const Function *F = MF->getFunction();
+  WinEHFuncInfo &FuncInfo = MMI->getWinEHFuncInfo(F);
+  assert(FuncInfo.EHRegNodeEscapeIndex != INT_MAX &&
+         "no EH reg node frameescape index");
+  StringRef FLinkageName = GlobalValue::getRealLinkageName(F->getName());
+  MCSymbol *ParentFrameOffset =
+      Asm->OutContext.getOrCreateParentFrameOffsetSymbol(FLinkageName);
+  MCSymbol *FrameAllocSym = Asm->OutContext.getOrCreateFrameAllocSymbol(
+      FLinkageName, FuncInfo.EHRegNodeEscapeIndex);
+  const MCSymbolRefExpr *FrameAllocSymRef =
+      MCSymbolRefExpr::create(FrameAllocSym, Asm->OutContext);
+  OS.EmitAssignment(ParentFrameOffset, FrameAllocSymRef);
+
+  // Emit the __ehtable label that we use for llvm.x86.seh.lsda.
+  MCSymbol *LSDALabel = Asm->OutContext.getOrCreateLSDASymbol(FLinkageName);
+  OS.EmitLabel(LSDALabel);
+
+  const Function *Per = MMI->getPersonality();
+  StringRef PerName = Per->getName();
+  int BaseState = -1;
+  if (PerName == "_except_handler4") {
+    // The LSDA for _except_handler4 starts with this struct, followed by the
+    // scope table:
+    //
+    // struct EH4ScopeTable {
+    //   int32_t GSCookieOffset;
+    //   int32_t GSCookieXOROffset;
+    //   int32_t EHCookieOffset;
+    //   int32_t EHCookieXOROffset;
+    //   ScopeTableEntry ScopeRecord[];
+    // };
+    //
+    // Only the EHCookieOffset field appears to vary, and it appears to be the
+    // offset from the final saved SP value to the retaddr.
+    OS.EmitIntValue(-2, 4);
+    OS.EmitIntValue(0, 4);
+    // FIXME: Calculate.
+    OS.EmitIntValue(9999, 4);
+    OS.EmitIntValue(0, 4);
+    BaseState = -2;
+  }
+
+  // Build a list of pointers to LandingPadInfos and then sort by WinEHState.
+  const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
+  SmallVector<const LandingPadInfo *, 4> LPads;
+  LPads.reserve((PadInfos.size()));
+  for (const LandingPadInfo &LPInfo : PadInfos)
+    LPads.push_back(&LPInfo);
+  std::sort(LPads.begin(), LPads.end(),
+            [](const LandingPadInfo *L, const LandingPadInfo *R) {
+              return L->WinEHState < R->WinEHState;
+            });
+
+  // For each action in each lpad, emit one of these:
+  // struct ScopeTableEntry {
+  //   int32_t EnclosingLevel;
+  //   int32_t (__cdecl *Filter)();
+  //   void *HandlerOrFinally;
+  // };
+  //
+  // The "outermost" action will use BaseState as its enclosing level. Each
+  // other action will refer to the previous state as its enclosing level.
+  int CurState = 0;
+  for (const LandingPadInfo *LPInfo : LPads) {
+    int EnclosingLevel = BaseState;
+    assert(CurState + int(LPInfo->SEHHandlers.size()) - 1 ==
+               LPInfo->WinEHState &&
+           "gaps in the SEH scope table");
+    for (auto I = LPInfo->SEHHandlers.rbegin(), E = LPInfo->SEHHandlers.rend();
+         I != E; ++I) {
+      const SEHHandler &Handler = *I;
+      const BlockAddress *BA = Handler.RecoverBA;
+      const Function *F = Handler.FilterOrFinally;
+      assert(F && "cannot catch all in 32-bit SEH without filter function");
+      const MCExpr *FilterOrNull =
+          create32bitRef(BA ? Asm->getSymbol(F) : nullptr);
+      const MCExpr *ExceptOrFinally = create32bitRef(
+          BA ? Asm->GetBlockAddressSymbol(BA) : Asm->getSymbol(F));
+
+      OS.EmitIntValue(EnclosingLevel, 4);
+      OS.EmitValue(FilterOrNull, 4);
+      OS.EmitValue(ExceptOrFinally, 4);
+
+      // The next state unwinds to this state.
+      EnclosingLevel = CurState;
+      CurState++;
+    }
+  }
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h
index 478899b..bbff3c2 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h
@@ -38,8 +38,15 @@ class WinException : public EHStreamer {
 
   void emitCSpecificHandlerTable();
 
+  /// Emit the EH table data for 32-bit and 64-bit functions using
+  /// the __CxxFrameHandler3 personality.
   void emitCXXFrameHandler3Table(const MachineFunction *MF);
 
+  /// Emit the EH table data for _except_handler3 and _except_handler4
+  /// personality functions. These are only used on 32-bit and do not use CFI
+  /// tables.
+  void emitExceptHandlerTable(const MachineFunction *MF);
+
   void extendIP2StateTable(const MachineFunction *MF, const Function *ParentF,
                            WinEHFuncInfo &FuncInfo);
 
@@ -63,7 +70,7 @@ public:
   /// Gather and emit post-function exception information.
   void endFunction(const MachineFunction *) override;
 };
-}
+} // namespace llvm
 
 #endif
 
diff --git a/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp
index fa17108..0bb0fa3 100644
--- a/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -55,7 +55,7 @@ namespace {
     bool isIdempotentRMW(AtomicRMWInst *AI);
     bool simplifyIdempotentRMW(AtomicRMWInst *AI);
   };
-}
+} // namespace
 
 char AtomicExpand::ID = 0;
 char &llvm::AtomicExpandID = AtomicExpand::ID;
@@ -464,7 +464,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
   Value *ShouldStore =
       Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store");
 
-  // If the the cmpxchg doesn't actually need any ordering when it fails, we can
+  // If the cmpxchg doesn't actually need any ordering when it fails, we can
   // jump straight past that fence instruction (if it exists).
   Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB);
 
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
index b8d9a1a..e7b7f5b 100644
--- a/contrib/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
@@ -79,7 +79,7 @@ namespace {
       MachineFunctionPass::getAnalysisUsage(AU);
     }
   };
-}
+} // namespace
 
 char BranchFolderPass::ID = 0;
 char &llvm::BranchFolderPassID = BranchFolderPass::ID;
@@ -273,8 +273,12 @@ static unsigned HashMachineInstr(const MachineInstr *MI) {
     // Merge in bits from the operand if easy.
     unsigned OperandHash = 0;
     switch (Op.getType()) {
-    case MachineOperand::MO_Register:          OperandHash = Op.getReg(); break;
-    case MachineOperand::MO_Immediate:         OperandHash = Op.getImm(); break;
+    case MachineOperand::MO_Register:
+      OperandHash = Op.getReg();
+      break;
+    case MachineOperand::MO_Immediate:
+      OperandHash = Op.getImm();
+      break;
     case MachineOperand::MO_MachineBasicBlock:
       OperandHash = Op.getMBB()->getNumber();
       break;
@@ -289,10 +293,11 @@ static unsigned HashMachineInstr(const MachineInstr *MI) {
       // pull in the offset.
       OperandHash = Op.getOffset();
       break;
-    default: break;
+    default:
+      break;
     }
 
-    Hash += ((OperandHash << 3) | Op.getType()) << (i&31);
+    Hash += ((OperandHash << 3) | Op.getType()) << (i & 31);
   }
   return Hash;
 }
@@ -301,13 +306,13 @@ static unsigned HashMachineInstr(const MachineInstr *MI) {
 static unsigned HashEndOfMBB(const MachineBasicBlock *MBB) {
   MachineBasicBlock::const_iterator I = MBB->end();
   if (I == MBB->begin())
-    return 0;   // Empty MBB.
+    return 0; // Empty MBB.
 
   --I;
   // Skip debug info so it will not affect codegen.
   while (I->isDebugValue()) {
-    if (I==MBB->begin())
-      return 0;      // MBB empty except for debug info.
+    if (I == MBB->begin())
+      return 0; // MBB empty except for debug info.
     --I;
   }
 
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm/lib/CodeGen/BranchFolding.h
index 3653a2c..d1b17dd 100644
--- a/contrib/llvm/lib/CodeGen/BranchFolding.h
+++ b/contrib/llvm/lib/CodeGen/BranchFolding.h
@@ -142,6 +142,6 @@ namespace llvm {
     bool HoistCommonCode(MachineFunction &MF);
     bool HoistCommonCodeInSuccs(MachineBasicBlock *MBB);
   };
-}
+} // namespace llvm
 
 #endif /* LLVM_CODEGEN_BRANCHFOLDING_HPP */
diff --git a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
index 034ffb3..fb29b1d 100644
--- a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
+++ b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
@@ -37,9 +37,9 @@ CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
   UsedRegs.resize((TRI.getNumRegs()+31)/32);
 }
 
-// HandleByVal - Allocate space on the stack large enough to pass an argument
-// by value. The size and alignment information of the argument is encoded in
-// its parameter attribute.
+/// Allocate space on the stack large enough to pass an argument by value.
+/// The size and alignment information of the argument is encoded in
+/// its parameter attribute.
 void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
                           MVT LocVT, CCValAssign::LocInfo LocInfo,
                           int MinSize, int MinAlign,
@@ -57,13 +57,13 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
   addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
 }
 
-/// MarkAllocated - Mark a register and all of its aliases as allocated.
+/// Mark a register and all of its aliases as allocated.
 void CCState::MarkAllocated(unsigned Reg) {
   for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI)
     UsedRegs[*AI/32] |= 1 << (*AI&31);
 }
 
-/// AnalyzeFormalArguments - Analyze an array of argument values,
+/// Analyze an array of argument values,
 /// incorporating info about the formals into this state.
 void
 CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -83,8 +83,8 @@ CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
   }
 }
 
-/// CheckReturn - Analyze the return values of a function, returning true if
-/// the return can be performed without sret-demotion, and false otherwise.
+/// Analyze the return values of a function, returning true if the return can
+/// be performed without sret-demotion and false otherwise.
 bool CCState::CheckReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
                           CCAssignFn Fn) {
   // Determine which register each value should be copied into.
@@ -97,7 +97,7 @@ bool CCState::CheckReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
   return true;
 }
 
-/// AnalyzeReturn - Analyze the returned values of a return,
+/// Analyze the returned values of a return,
 /// incorporating info about the result values into this state.
 void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
                             CCAssignFn Fn) {
@@ -115,7 +115,7 @@ void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
   }
 }
 
-/// AnalyzeCallOperands - Analyze the outgoing arguments to a call,
+/// Analyze the outgoing arguments to a call,
 /// incorporating info about the passed values into this state.
 void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
                                   CCAssignFn Fn) {
@@ -133,8 +133,7 @@ void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
   }
 }
 
-/// AnalyzeCallOperands - Same as above except it takes vectors of types
-/// and argument flags.
+/// Same as above except it takes vectors of types and argument flags.
 void CCState::AnalyzeCallOperands(SmallVectorImpl<MVT> &ArgVTs,
                                   SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
                                   CCAssignFn Fn) {
@@ -152,8 +151,8 @@ void CCState::AnalyzeCallOperands(SmallVectorImpl<MVT> &ArgVTs,
   }
 }
 
-/// AnalyzeCallResult - Analyze the return values of a call,
-/// incorporating info about the passed values into this state.
+/// Analyze the return values of a call, incorporating info about the passed
+/// values into this state.
 void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
                                 CCAssignFn Fn) {
   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
@@ -169,8 +168,7 @@ void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
   }
 }
 
-/// AnalyzeCallResult - Same as above except it's specialized for calls which
-/// produce a single value.
+/// Same as above except it's specialized for calls that produce a single value.
 void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) {
   if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) {
 #ifndef NDEBUG
diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp
index 2c6eaf3..155c5ec 100644
--- a/contrib/llvm/lib/CodeGen/CodeGen.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp
@@ -42,6 +42,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeMachineBlockPlacementPass(Registry);
   initializeMachineBlockPlacementStatsPass(Registry);
   initializeMachineCSEPass(Registry);
+  initializeImplicitNullChecksPass(Registry);
   initializeMachineCombinerPass(Registry);
   initializeMachineCopyPropagationPass(Registry);
   initializeMachineDominatorTreePass(Registry);
diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 6a81403..247c45b 100644
--- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -135,8 +135,8 @@ class TypePromotionTransaction;
     /// multiple load/stores of the same address.
     ValueMap<Value*, Value*> SunkAddrs;
 
-    /// Keeps track of all truncates inserted for the current function.
-    SetOfInstrs InsertedTruncsSet;
+    /// Keeps track of all instructions inserted for the current function.
+    SetOfInstrs InsertedInsts;
     /// Keeps track of the type of the related instruction before their
     /// promotion for the current function.
     InstrToOrigTy PromotedInsts;
@@ -189,7 +189,7 @@ class TypePromotionTransaction;
     bool splitBranchCondition(Function &F);
     bool simplifyOffsetableRelocate(Instruction &I);
   };
-}
+} // namespace
 
 char CodeGenPrepare::ID = 0;
 INITIALIZE_TM_PASS(CodeGenPrepare, "codegenprepare",
@@ -205,7 +205,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
 
   bool EverMadeChange = false;
   // Clear per function information.
-  InsertedTruncsSet.clear();
+  InsertedInsts.clear();
   PromotedInsts.clear();
 
   ModifiedDT = false;
@@ -1406,6 +1406,9 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
         return false;
       // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
       ExtVal->moveBefore(CI);
+      // Mark this instruction as "inserted by CGP", so that other
+      // optimizations don't touch it.
+      InsertedInsts.insert(ExtVal);
       return true;
     }
     }
@@ -2107,8 +2110,8 @@ class AddressingModeMatcher {
   /// part of the return value of this addressing mode matching stuff.
   ExtAddrMode &AddrMode;
 
-  /// The truncate instruction inserted by other CodeGenPrepare optimizations.
-  const SetOfInstrs &InsertedTruncs;
+  /// The instructions inserted by other CodeGenPrepare optimizations.
+  const SetOfInstrs &InsertedInsts;
   /// A map from the instructions to their type before promotion.
   InstrToOrigTy &PromotedInsts;
   /// The ongoing transaction where every action should be registered.
@@ -2122,14 +2125,14 @@ class AddressingModeMatcher {
   AddressingModeMatcher(SmallVectorImpl<Instruction *> &AMI,
                         const TargetMachine &TM, Type *AT, unsigned AS,
                         Instruction *MI, ExtAddrMode &AM,
-                        const SetOfInstrs &InsertedTruncs,
+                        const SetOfInstrs &InsertedInsts,
                         InstrToOrigTy &PromotedInsts,
                         TypePromotionTransaction &TPT)
       : AddrModeInsts(AMI), TM(TM),
         TLI(*TM.getSubtargetImpl(*MI->getParent()->getParent())
                  ->getTargetLowering()),
         AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM),
-        InsertedTruncs(InsertedTruncs), PromotedInsts(PromotedInsts), TPT(TPT) {
+        InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT) {
     IgnoreProfitability = false;
   }
 public:
@@ -2137,8 +2140,7 @@ public:
   /// Match - Find the maximal addressing mode that a load/store of V can fold,
   /// give an access type of AccessTy.  This returns a list of involved
   /// instructions in AddrModeInsts.
-  /// \p InsertedTruncs The truncate instruction inserted by other
-  /// CodeGenPrepare
+  /// \p InsertedInsts The instructions inserted by other CodeGenPrepare
   /// optimizations.
   /// \p PromotedInsts maps the instructions to their type before promotion.
   /// \p The ongoing transaction where every action should be registered.
@@ -2146,13 +2148,13 @@ public:
                            Instruction *MemoryInst,
                            SmallVectorImpl<Instruction*> &AddrModeInsts,
                            const TargetMachine &TM,
-                           const SetOfInstrs &InsertedTruncs,
+                           const SetOfInstrs &InsertedInsts,
                            InstrToOrigTy &PromotedInsts,
                            TypePromotionTransaction &TPT) {
     ExtAddrMode Result;
 
     bool Success = AddressingModeMatcher(AddrModeInsts, TM, AccessTy, AS,
-                                         MemoryInst, Result, InsertedTruncs,
+                                         MemoryInst, Result, InsertedInsts,
                                          PromotedInsts, TPT).MatchAddr(V, 0);
     (void)Success; assert(Success && "Couldn't select *anything*?");
     return Result;
@@ -2361,12 +2363,12 @@ public:
   /// action to promote the operand of \p Ext instead of using Ext.
   /// \return NULL if no promotable action is possible with the current
   /// sign extension.
-  /// \p InsertedTruncs keeps track of all the truncate instructions inserted by
-  /// the others CodeGenPrepare optimizations. This information is important
+  /// \p InsertedInsts keeps track of all the instructions inserted by the
+  /// other CodeGenPrepare optimizations. This information is important
   /// because we do not want to promote these instructions as CodeGenPrepare
   /// will reinsert them later. Thus creating an infinite loop: create/remove.
   /// \p PromotedInsts maps the instructions to their type before promotion.
-  static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedTruncs,
+  static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts,
                           const TargetLowering &TLI,
                           const InstrToOrigTy &PromotedInsts);
 };
@@ -2439,7 +2441,7 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
 }
 
 TypePromotionHelper::Action TypePromotionHelper::getAction(
-    Instruction *Ext, const SetOfInstrs &InsertedTruncs,
+    Instruction *Ext, const SetOfInstrs &InsertedInsts,
     const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
   assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
          "Unexpected instruction type");
@@ -2455,7 +2457,7 @@ TypePromotionHelper::Action TypePromotionHelper::getAction(
   // Do not promote if the operand has been added by codegenprepare.
   // Otherwise, it means we are undoing an optimization that is likely to be
   // redone, thus causing potential infinite loop.
-  if (isa<TruncInst>(ExtOpnd) && InsertedTruncs.count(ExtOpnd))
+  if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
     return nullptr;
 
   // SExt or Trunc instructions.
@@ -2839,7 +2841,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
     // Try to move this ext out of the way of the addressing mode.
     // Ask for a method for doing so.
     TypePromotionHelper::Action TPH =
-        TypePromotionHelper::getAction(Ext, InsertedTruncs, TLI, PromotedInsts);
+        TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
     if (!TPH)
       return false;
 
@@ -3157,7 +3159,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
     TypePromotionTransaction::ConstRestorationPt LastKnownGood =
         TPT.getRestorationPoint();
     AddressingModeMatcher Matcher(MatchedAddrModeInsts, TM, AddressAccessTy, AS,
-                                  MemoryInst, Result, InsertedTruncs,
+                                  MemoryInst, Result, InsertedInsts,
                                   PromotedInsts, TPT);
     Matcher.IgnoreProfitability = true;
     bool Success = Matcher.MatchAddr(Address, 0);
@@ -3240,7 +3242,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
     SmallVector<Instruction*, 16> NewAddrModeInsts;
     ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
       V, AccessTy, AddrSpace, MemoryInst, NewAddrModeInsts, *TM,
-      InsertedTruncsSet, PromotedInsts, TPT);
+      InsertedInsts, PromotedInsts, TPT);
 
     // This check is broken into two cases with very similar code to avoid using
     // getNumUses() as much as possible. Some values have a lot of uses, so
@@ -3652,7 +3654,7 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
       continue;
     // Get the action to perform the promotion.
     TypePromotionHelper::Action TPH = TypePromotionHelper::getAction(
-        I, InsertedTruncsSet, *TLI, PromotedInsts);
+        I, InsertedInsts, *TLI, PromotedInsts);
     // Check if we can promote.
     if (!TPH)
       continue;
@@ -3828,7 +3830,7 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
     if (!InsertedTrunc) {
       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
       InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt);
-      InsertedTruncsSet.insert(InsertedTrunc);
+      InsertedInsts.insert(InsertedTrunc);
     }
 
     // Replace a use of the {s|z}ext source with a use of the result.
@@ -4357,6 +4359,11 @@ bool CodeGenPrepare::OptimizeExtractElementInst(Instruction *Inst) {
 }
 
 bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
+  // Bail out if we inserted the instruction to prevent optimizations from
+  // stepping on each other's toes.
+  if (InsertedInsts.count(I))
+    return false;
+
   if (PHINode *P = dyn_cast<PHINode>(I)) {
     // It is possible for very late stage optimizations (such as SimplifyCFG)
     // to introduce PHI nodes too late to be cleaned up.  If we detect such a
diff --git a/contrib/llvm/lib/CodeGen/CoreCLRGC.cpp b/contrib/llvm/lib/CodeGen/CoreCLRGC.cpp
index 28c97ba..0816d14 100644
--- a/contrib/llvm/lib/CodeGen/CoreCLRGC.cpp
+++ b/contrib/llvm/lib/CodeGen/CoreCLRGC.cpp
@@ -45,7 +45,7 @@ public:
     return (1 == PT->getAddressSpace());
   }
 };
-}
+} // namespace
 
 static GCRegistry::Add<CoreCLRGC> X("coreclr", "CoreCLR-compatible GC");
 
diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h
index af011a0..1ca5300 100644
--- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -103,6 +103,6 @@ class TargetRegisterInfo;
                                       const TargetRegisterClass *RC,
                                       SmallVectorImpl<unsigned> &Forbid);
   };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
index 0a188c0..02cdb50 100644
--- a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
+++ b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
@@ -110,7 +110,7 @@ public:
   // Schedule - Actual scheduling work.
   void schedule() override;
 };
-}
+} // namespace llvm
 
 DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF,
                                            MachineLoopInfo &MLI, bool IsPostRA)
diff --git a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index 963d573..efaf47c 100644
--- a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -45,7 +45,7 @@ namespace {
   private:
     bool isDead(const MachineInstr *MI) const;
   };
-}
+} // namespace
 char DeadMachineInstructionElim::ID = 0;
 char &llvm::DeadMachineInstructionElimID = DeadMachineInstructionElim::ID;
 
diff --git a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
index 42656fb..e019dfb 100644
--- a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
@@ -181,27 +181,22 @@ size_t DwarfEHPrepare::pruneUnreachableResumes(
 bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
   SmallVector<ResumeInst*, 16> Resumes;
   SmallVector<LandingPadInst*, 16> CleanupLPads;
-  bool FoundLP = false;
   for (BasicBlock &BB : Fn) {
     if (auto *RI = dyn_cast<ResumeInst>(BB.getTerminator()))
       Resumes.push_back(RI);
-    if (auto *LP = BB.getLandingPadInst()) {
+    if (auto *LP = BB.getLandingPadInst())
       if (LP->isCleanup())
         CleanupLPads.push_back(LP);
-      // Check the personality on the first landingpad. Don't do anything if
-      // it's for MSVC.
-      if (!FoundLP) {
-        FoundLP = true;
-        EHPersonality Pers = classifyEHPersonality(LP->getPersonalityFn());
-        if (isMSVCEHPersonality(Pers))
-          return false;
-      }
-    }
   }
 
   if (Resumes.empty())
     return false;
 
+  // Check the personality, don't do anything if it's for MSVC.
+  EHPersonality Pers = classifyEHPersonality(Fn.getPersonalityFn());
+  if (isMSVCEHPersonality(Pers))
+    return false;
+
   LLVMContext &Ctx = Fn.getContext();
 
   size_t ResumesLeft = pruneUnreachableResumes(Fn, Resumes, CleanupLPads);
diff --git a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
index d3687b9..fbc4d97 100644
--- a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -479,11 +479,20 @@ void SSAIfConv::rewritePHIOperands() {
   // Convert all PHIs to select instructions inserted before FirstTerm.
   for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
     PHIInfo &PI = PHIs[i];
+    unsigned DstReg = 0;
+    
     DEBUG(dbgs() << "If-converting " << *PI.PHI);
-    unsigned PHIDst = PI.PHI->getOperand(0).getReg();
-    unsigned DstReg = MRI->createVirtualRegister(MRI->getRegClass(PHIDst));
-    TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg);
-    DEBUG(dbgs() << "          --> " << *std::prev(FirstTerm));
+    if (PI.TReg == PI.FReg) {
+      // We do not need the select instruction if both incoming values are
+      // equal.
+      DstReg = PI.TReg;
+    } else {
+      unsigned PHIDst = PI.PHI->getOperand(0).getReg();
+      DstReg = MRI->createVirtualRegister(MRI->getRegClass(PHIDst));
+      TII->insertSelect(*Head, FirstTerm, HeadDL,
+                         DstReg, Cond, PI.TReg, PI.FReg);
+      DEBUG(dbgs() << "          --> " << *std::prev(FirstTerm));
+    }
 
     // Rewrite PHI operands TPred -> (DstReg, Head), remove FPred.
     for (unsigned i = PI.PHI->getNumOperands(); i != 1; i -= 2) {
diff --git a/contrib/llvm/lib/CodeGen/EdgeBundles.cpp b/contrib/llvm/lib/CodeGen/EdgeBundles.cpp
index aea7c31..f43b2f1 100644
--- a/contrib/llvm/lib/CodeGen/EdgeBundles.cpp
+++ b/contrib/llvm/lib/CodeGen/EdgeBundles.cpp
@@ -89,7 +89,7 @@ raw_ostream &WriteGraph<>(raw_ostream &O, const EdgeBundles &G,
   O << "}\n";
   return O;
 }
-}
+} // namespace llvm
 
 /// view - Visualize the annotated bipartite CFG with Graphviz.
 void EdgeBundles::view() const {
diff --git a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
index 5b09cf1..dd508b3 100644
--- a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
@@ -110,7 +110,7 @@ struct DomainValue {
     Instrs.clear();
   }
 };
-}
+} // namespace
 
 namespace {
 /// Information about a live register.
@@ -201,7 +201,7 @@ private:
   bool shouldBreakDependence(MachineInstr*, unsigned OpIdx, unsigned Pref);
   void processUndefReads(MachineBasicBlock*);
 };
-}
+} // namespace
 
 char ExeDepsFix::ID = 0;
 
diff --git a/contrib/llvm/lib/CodeGen/FaultMaps.cpp b/contrib/llvm/lib/CodeGen/FaultMaps.cpp
new file mode 100644
index 0000000..0512ff9
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/FaultMaps.cpp
@@ -0,0 +1,114 @@
+//===---------------------------- FaultMaps.cpp ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/FaultMaps.h"
+
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "faultmaps"
+
+static const int FaultMapVersion = 1;
+const char *FaultMaps::WFMP = "Fault Maps: ";
+
+FaultMaps::FaultMaps(AsmPrinter &AP) : AP(AP) {}
+
+void FaultMaps::recordFaultingOp(FaultKind FaultTy,
+                                 const MCSymbol *HandlerLabel) {
+  MCContext &OutContext = AP.OutStreamer->getContext();
+  MCSymbol *FaultingLabel = OutContext.createTempSymbol();
+
+  AP.OutStreamer->EmitLabel(FaultingLabel);
+
+  const MCExpr *FaultingOffset = MCBinaryExpr::createSub(
+      MCSymbolRefExpr::create(FaultingLabel, OutContext),
+      MCSymbolRefExpr::create(AP.CurrentFnSymForSize, OutContext), OutContext);
+
+  const MCExpr *HandlerOffset = MCBinaryExpr::createSub(
+      MCSymbolRefExpr::create(HandlerLabel, OutContext),
+      MCSymbolRefExpr::create(AP.CurrentFnSymForSize, OutContext), OutContext);
+
+  FunctionInfos[AP.CurrentFnSym].emplace_back(FaultTy, FaultingOffset,
+                                              HandlerOffset);
+}
+
+void FaultMaps::serializeToFaultMapSection() {
+  if (FunctionInfos.empty())
+    return;
+
+  MCContext &OutContext = AP.OutStreamer->getContext();
+  MCStreamer &OS = *AP.OutStreamer;
+
+  // Create the section.
+  MCSection *FaultMapSection =
+      OutContext.getObjectFileInfo()->getFaultMapSection();
+  OS.SwitchSection(FaultMapSection);
+
+  // Emit a dummy symbol to force section inclusion.
+  OS.EmitLabel(OutContext.getOrCreateSymbol(Twine("__LLVM_FaultMaps")));
+
+  DEBUG(dbgs() << "********** Fault Map Output **********\n");
+
+  // Header
+  OS.EmitIntValue(FaultMapVersion, 1); // Version.
+  OS.EmitIntValue(0, 1);               // Reserved.
+  OS.EmitIntValue(0, 2);               // Reserved.
+
+  DEBUG(dbgs() << WFMP << "#functions = " << FunctionInfos.size() << "\n");
+  OS.EmitIntValue(FunctionInfos.size(), 4);
+
+  DEBUG(dbgs() << WFMP << "functions:\n");
+
+  for (const auto &FFI : FunctionInfos)
+    emitFunctionInfo(FFI.first, FFI.second);
+}
+
+void FaultMaps::emitFunctionInfo(const MCSymbol *FnLabel,
+                                 const FunctionFaultInfos &FFI) {
+  MCStreamer &OS = *AP.OutStreamer;
+
+  DEBUG(dbgs() << WFMP << "  function addr: " << *FnLabel << "\n");
+  OS.EmitSymbolValue(FnLabel, 8);
+
+  DEBUG(dbgs() << WFMP << "  #faulting PCs: " << FFI.size() << "\n");
+  OS.EmitIntValue(FFI.size(), 4);
+
+  OS.EmitIntValue(0, 4); // Reserved
+
+  for (auto &Fault : FFI) {
+    DEBUG(dbgs() << WFMP << "    fault type: "
+          << faultTypeToString(Fault.Kind) << "\n");
+    OS.EmitIntValue(Fault.Kind, 4);
+
+    DEBUG(dbgs() << WFMP << "    faulting PC offset: "
+          << *Fault.FaultingOffsetExpr << "\n");
+    OS.EmitValue(Fault.FaultingOffsetExpr, 4);
+
+    DEBUG(dbgs() << WFMP << "    fault handler PC offset: "
+          << *Fault.HandlerOffsetExpr << "\n");
+    OS.EmitValue(Fault.HandlerOffsetExpr, 4);
+  }
+}
+
+
+const char *FaultMaps::faultTypeToString(FaultMaps::FaultKind FT) {
+  switch (FT) {
+  default:
+    llvm_unreachable("unhandled fault type!");
+
+  case FaultMaps::FaultingLoad:
+    return "FaultingLoad";
+  }
+}
diff --git a/contrib/llvm/lib/CodeGen/GCMetadata.cpp b/contrib/llvm/lib/CodeGen/GCMetadata.cpp
index c8116a4..cba7f5f 100644
--- a/contrib/llvm/lib/CodeGen/GCMetadata.cpp
+++ b/contrib/llvm/lib/CodeGen/GCMetadata.cpp
@@ -38,7 +38,7 @@ public:
   bool runOnFunction(Function &F) override;
   bool doFinalization(Module &M) override;
 };
-}
+} // namespace
 
 INITIALIZE_PASS(GCModuleInfo, "collector-metadata",
                 "Create Garbage Collector Module Metadata", false, false)
diff --git a/contrib/llvm/lib/CodeGen/GCRootLowering.cpp b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp
index d8edd7e..fcef322 100644
--- a/contrib/llvm/lib/CodeGen/GCRootLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp
@@ -76,7 +76,7 @@ public:
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 };
-}
+} // namespace
 
 // -----------------------------------------------------------------------------
 
diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp
index e861ceb..963dfe7 100644
--- a/contrib/llvm/lib/CodeGen/IfConversion.cpp
+++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp
@@ -264,7 +264,7 @@ namespace {
   };
 
   char IfConverter::ID = 0;
-}
+} // namespace
 
 char &llvm::IfConverterID = IfConverter::ID;
 
diff --git a/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp
new file mode 100644
index 0000000..b1176ce
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp
@@ -0,0 +1,261 @@
+//===-- ImplicitNullChecks.cpp - Fold null checks into memory accesses ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass turns explicit null checks of the form
+//
+//   test %r10, %r10
+//   je throw_npe
+//   movl (%r10), %esi
+//   ...
+//
+// to
+//
+//   faulting_load_op("movl (%r10), %esi", throw_npe)
+//   ...
+//
+// With the help of a runtime that understands the .fault_maps section,
+// faulting_load_op branches to throw_npe if executing movl (%r10), %esi incurs
+// a page fault.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+static cl::opt<unsigned> PageSize("imp-null-check-page-size",
+                                  cl::desc("The page size of the target in "
+                                           "bytes"),
+                                  cl::init(4096));
+
+namespace {
+
+class ImplicitNullChecks : public MachineFunctionPass {
+  /// Represents one null check that can be made implicit.
+  struct NullCheck {
+    // The memory operation the null check can be folded into.
+    MachineInstr *MemOperation;
+
+    // The instruction actually doing the null check (Ptr != 0).
+    MachineInstr *CheckOperation;
+
+    // The block the check resides in.
+    MachineBasicBlock *CheckBlock;
+
+    // The block branched to if the pointer is non-null.
+    MachineBasicBlock *NotNullSucc;
+
+    // The block branched to if the pointer is null.
+    MachineBasicBlock *NullSucc;
+
+    NullCheck()
+        : MemOperation(), CheckOperation(), CheckBlock(), NotNullSucc(),
+          NullSucc() {}
+
+    explicit NullCheck(MachineInstr *memOperation, MachineInstr *checkOperation,
+                       MachineBasicBlock *checkBlock,
+                       MachineBasicBlock *notNullSucc,
+                       MachineBasicBlock *nullSucc)
+        : MemOperation(memOperation), CheckOperation(checkOperation),
+          CheckBlock(checkBlock), NotNullSucc(notNullSucc), NullSucc(nullSucc) {
+    }
+  };
+
+  const TargetInstrInfo *TII = nullptr;
+  const TargetRegisterInfo *TRI = nullptr;
+  MachineModuleInfo *MMI = nullptr;
+
+  bool analyzeBlockForNullChecks(MachineBasicBlock &MBB,
+                                 SmallVectorImpl<NullCheck> &NullCheckList);
+  MachineInstr *insertFaultingLoad(MachineInstr *LoadMI, MachineBasicBlock *MBB,
+                                   MCSymbol *HandlerLabel);
+  void rewriteNullChecks(ArrayRef<NullCheck> NullCheckList);
+
+public:
+  static char ID;
+
+  ImplicitNullChecks() : MachineFunctionPass(ID) {
+    initializeImplicitNullChecksPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+};
+} // namespace
+
+bool ImplicitNullChecks::runOnMachineFunction(MachineFunction &MF) {
+  TII = MF.getSubtarget().getInstrInfo();
+  TRI = MF.getRegInfo().getTargetRegisterInfo();
+  MMI = &MF.getMMI();
+
+  SmallVector<NullCheck, 16> NullCheckList;
+
+  for (auto &MBB : MF)
+    analyzeBlockForNullChecks(MBB, NullCheckList);
+
+  if (!NullCheckList.empty())
+    rewriteNullChecks(NullCheckList);
+
+  return !NullCheckList.empty();
+}
+
+/// Analyze MBB to check if its terminating branch can be turned into an
+/// implicit null check.  If yes, append a description of the said null check to
+/// NullCheckList and return true, else return false.
+bool ImplicitNullChecks::analyzeBlockForNullChecks(
+    MachineBasicBlock &MBB, SmallVectorImpl<NullCheck> &NullCheckList) {
+  typedef TargetInstrInfo::MachineBranchPredicate MachineBranchPredicate;
+
+  MachineBranchPredicate MBP;
+
+  if (TII->AnalyzeBranchPredicate(MBB, MBP, true))
+    return false;
+
+  // Is the predicate comparing an integer to zero?
+  if (!(MBP.LHS.isReg() && MBP.RHS.isImm() && MBP.RHS.getImm() == 0 &&
+        (MBP.Predicate == MachineBranchPredicate::PRED_NE ||
+         MBP.Predicate == MachineBranchPredicate::PRED_EQ)))
+    return false;
+
+  // If we cannot erase the test instruction itself, then making the null check
+  // implicit does not buy us much.
+  if (!MBP.SingleUseCondition)
+    return false;
+
+  MachineBasicBlock *NotNullSucc, *NullSucc;
+
+  if (MBP.Predicate == MachineBranchPredicate::PRED_NE) {
+    NotNullSucc = MBP.TrueDest;
+    NullSucc = MBP.FalseDest;
+  } else {
+    NotNullSucc = MBP.FalseDest;
+    NullSucc = MBP.TrueDest;
+  }
+
+  // We handle the simplest case for now.  We can potentially do better by using
+  // the machine dominator tree.
+  if (NotNullSucc->pred_size() != 1)
+    return false;
+
+  // Starting with a code fragment like:
+  //
+  //   test %RAX, %RAX
+  //   jne LblNotNull
+  //
+  //  LblNull:
+  //   callq throw_NullPointerException
+  //
+  //  LblNotNull:
+  //   Def = Load (%RAX + <offset>)
+  //   ...
+  //
+  //
+  // we want to end up with
+  //
+  //   Def = TrappingLoad (%RAX + <offset>), LblNull
+  //   jmp LblNotNull ;; explicit or fallthrough
+  //
+  //  LblNotNull:
+  //   ...
+  //
+  //  LblNull:
+  //   callq throw_NullPointerException
+  //
+
+  unsigned PointerReg = MBP.LHS.getReg();
+  MachineInstr *MemOp = &*NotNullSucc->begin();
+  unsigned BaseReg, Offset;
+  if (TII->getMemOpBaseRegImmOfs(MemOp, BaseReg, Offset, TRI))
+    if (MemOp->mayLoad() && !MemOp->isPredicable() && BaseReg == PointerReg &&
+        Offset < PageSize && MemOp->getDesc().getNumDefs() == 1) {
+      NullCheckList.emplace_back(MemOp, MBP.ConditionDef, &MBB, NotNullSucc,
+                                 NullSucc);
+      return true;
+    }
+
+  return false;
+}
+
+/// Wrap a machine load instruction, LoadMI, into a FAULTING_LOAD_OP machine
+/// instruction.  The FAULTING_LOAD_OP instruction does the same load as LoadMI
+/// (defining the same register), and branches to HandlerLabel if the load
+/// faults.  The FAULTING_LOAD_OP instruction is inserted at the end of MBB.
+MachineInstr *ImplicitNullChecks::insertFaultingLoad(MachineInstr *LoadMI,
+                                                     MachineBasicBlock *MBB,
+                                                     MCSymbol *HandlerLabel) {
+  DebugLoc DL;
+  unsigned NumDefs = LoadMI->getDesc().getNumDefs();
+  assert(NumDefs == 1 && "other cases unhandled!");
+  (void)NumDefs;
+
+  unsigned DefReg = LoadMI->defs().begin()->getReg();
+  assert(std::distance(LoadMI->defs().begin(), LoadMI->defs().end()) == 1 &&
+         "expected exactly one def!");
+
+  auto MIB = BuildMI(MBB, DL, TII->get(TargetOpcode::FAULTING_LOAD_OP), DefReg)
+                 .addSym(HandlerLabel)
+                 .addImm(LoadMI->getOpcode());
+
+  for (auto &MO : LoadMI->uses())
+    MIB.addOperand(MO);
+
+  MIB.setMemRefs(LoadMI->memoperands_begin(), LoadMI->memoperands_end());
+
+  return MIB;
+}
+
+/// Rewrite the null checks in NullCheckList into implicit null checks.
+void ImplicitNullChecks::rewriteNullChecks(
+    ArrayRef<ImplicitNullChecks::NullCheck> NullCheckList) {
+  DebugLoc DL;
+
+  for (auto &NC : NullCheckList) {
+    MCSymbol *HandlerLabel = MMI->getContext().createTempSymbol();
+
+    // Remove the conditional branch dependent on the null check.
+    unsigned BranchesRemoved = TII->RemoveBranch(*NC.CheckBlock);
+    (void)BranchesRemoved;
+    assert(BranchesRemoved > 0 && "expected at least one branch!");
+
+    // Insert a faulting load where the conditional branch was originally.  We
+    // check earlier ensures that this bit of code motion is legal.  We do not
+    // touch the successors list for any basic block since we haven't changed
+    // control flow, we've just made it implicit.
+    insertFaultingLoad(NC.MemOperation, NC.CheckBlock, HandlerLabel);
+    NC.MemOperation->removeFromParent();
+    NC.CheckOperation->eraseFromParent();
+
+    // Insert an *unconditional* branch to not-null successor.
+    TII->InsertBranch(*NC.CheckBlock, NC.NotNullSucc, nullptr, /*Cond=*/None,
+                      DL);
+
+    // Emit the HandlerLabel as an EH_LABEL.
+    BuildMI(*NC.NullSucc, NC.NullSucc->begin(), DL,
+            TII->get(TargetOpcode::EH_LABEL)).addSym(HandlerLabel);
+  }
+}
+
+char ImplicitNullChecks::ID = 0;
+char &llvm::ImplicitNullChecksID = ImplicitNullChecks::ID;
+INITIALIZE_PASS_BEGIN(ImplicitNullChecks, "implicit-null-checks",
+                      "Implicit null checks", false, false)
+INITIALIZE_PASS_END(ImplicitNullChecks, "implicit-null-checks",
+                    "Implicit null checks", false, false)
diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
index 9989f23..48c95c9 100644
--- a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -181,7 +181,7 @@ private:
   void spillAroundUses(unsigned Reg);
   void spillAll();
 };
-}
+} // namespace
 
 namespace llvm {
 
@@ -194,7 +194,7 @@ Spiller *createInlineSpiller(MachineFunctionPass &pass,
   return new InlineSpiller(pass, mf, vrm);
 }
 
-}
+} // namespace llvm
 
 //===----------------------------------------------------------------------===//
 //                                Snippets
diff --git a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
index ff52058..b486bdc 100644
--- a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -43,16 +43,17 @@ EnableFastISelOption("fast-isel", cl::Hidden,
   cl::desc("Enable the \"fast\" instruction selector"));
 
 void LLVMTargetMachine::initAsmInfo() {
-  MRI = TheTarget.createMCRegInfo(getTargetTriple());
+  MRI = TheTarget.createMCRegInfo(getTargetTriple().str());
   MII = TheTarget.createMCInstrInfo();
   // FIXME: Having an MCSubtargetInfo on the target machine is a hack due
   // to some backends having subtarget feature dependent module level
   // code generation. This is similar to the hack in the AsmPrinter for
   // module level assembly etc.
-  STI = TheTarget.createMCSubtargetInfo(getTargetTriple(), getTargetCPU(),
+  STI = TheTarget.createMCSubtargetInfo(getTargetTriple().str(), getTargetCPU(),
                                         getTargetFeatureString());
 
-  MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo(*MRI, getTargetTriple());
+  MCAsmInfo *TmpAsmInfo =
+      TheTarget.createMCAsmInfo(*MRI, getTargetTriple().str());
   // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0,
   // and if the old one gets included then MCAsmInfo will be NULL and
   // we'll crash later.
@@ -72,12 +73,12 @@ void LLVMTargetMachine::initAsmInfo() {
 
 LLVMTargetMachine::LLVMTargetMachine(const Target &T,
                                      StringRef DataLayoutString,
-                                     StringRef Triple, StringRef CPU,
+                                     const Triple &TT, StringRef CPU,
                                      StringRef FS, TargetOptions Options,
                                      Reloc::Model RM, CodeModel::Model CM,
                                      CodeGenOpt::Level OL)
-    : TargetMachine(T, DataLayoutString, Triple, CPU, FS, Options) {
-  CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM, OL);
+    : TargetMachine(T, DataLayoutString, TT, CPU, FS, Options) {
+  CodeGenInfo = T.createMCCodeGenInfo(TT.str(), RM, CM, OL);
 }
 
 TargetIRAnalysis LLVMTargetMachine::getTargetIRAnalysis() {
@@ -87,11 +88,11 @@ TargetIRAnalysis LLVMTargetMachine::getTargetIRAnalysis() {
 }
 
 /// addPassesToX helper drives creation and initialization of TargetPassConfig.
-static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
-                                          PassManagerBase &PM,
-                                          bool DisableVerify,
-                                          AnalysisID StartAfter,
-                                          AnalysisID StopAfter) {
+static MCContext *
+addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
+                        bool DisableVerify, AnalysisID StartAfter,
+                        AnalysisID StopAfter,
+                        MachineFunctionInitializer *MFInitializer = nullptr) {
 
   // Add internal analysis passes from the target machine.
   PM.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
@@ -121,7 +122,7 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
   PM.add(MMI);
 
   // Set up a MachineFunction for the rest of CodeGen to work on.
-  PM.add(new MachineFunctionAnalysis(*TM));
+  PM.add(new MachineFunctionAnalysis(*TM, MFInitializer));
 
   // Enable FastISel with -fast, but allow that to be overridden.
   if (EnableFastISelOption == cl::BOU_TRUE ||
@@ -142,10 +143,11 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
 
 bool LLVMTargetMachine::addPassesToEmitFile(
     PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType,
-    bool DisableVerify, AnalysisID StartAfter, AnalysisID StopAfter) {
+    bool DisableVerify, AnalysisID StartAfter, AnalysisID StopAfter,
+    MachineFunctionInitializer *MFInitializer) {
   // Add common CodeGen passes.
-  MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify,
-                                               StartAfter, StopAfter);
+  MCContext *Context = addPassesToGenerateCode(
+      this, PM, DisableVerify, StartAfter, StopAfter, MFInitializer);
   if (!Context)
     return true;
 
@@ -167,15 +169,15 @@ bool LLVMTargetMachine::addPassesToEmitFile(
   switch (FileType) {
   case CGFT_AssemblyFile: {
     MCInstPrinter *InstPrinter = getTarget().createMCInstPrinter(
-        Triple(getTargetTriple()), MAI.getAssemblerDialect(), MAI, MII, MRI);
+        getTargetTriple(), MAI.getAssemblerDialect(), MAI, MII, MRI);
 
     // Create a code emitter if asked to show the encoding.
     MCCodeEmitter *MCE = nullptr;
     if (Options.MCOptions.ShowMCEncoding)
       MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context);
 
-    MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(),
-                                                       TargetCPU);
+    MCAsmBackend *MAB =
+        getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU);
     auto FOut = llvm::make_unique<formatted_raw_ostream>(Out);
     MCStreamer *S = getTarget().createAsmStreamer(
         *Context, std::move(FOut), Options.MCOptions.AsmVerbose,
@@ -188,15 +190,15 @@ bool LLVMTargetMachine::addPassesToEmitFile(
     // Create the code emitter for the target if it exists.  If not, .o file
     // emission fails.
     MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context);
-    MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(),
-                                                       TargetCPU);
+    MCAsmBackend *MAB =
+        getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU);
     if (!MCE || !MAB)
       return true;
 
     // Don't waste memory on names of temp labels.
     Context->setUseNamesOnTempLabels(false);
 
-    Triple T(getTargetTriple());
+    Triple T(getTargetTriple().str());
     AsmStreamer.reset(getTarget().createMCObjectStreamer(
         T, *Context, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll,
         /*DWARFMustBeAtTheEnd*/ true));
@@ -241,12 +243,12 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
   const MCRegisterInfo &MRI = *getMCRegisterInfo();
   MCCodeEmitter *MCE =
       getTarget().createMCCodeEmitter(*getMCInstrInfo(), MRI, *Ctx);
-  MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(),
-                                                     TargetCPU);
+  MCAsmBackend *MAB =
+      getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU);
   if (!MCE || !MAB)
     return true;
 
-  Triple T(getTargetTriple());
+  const Triple &T = getTargetTriple();
   const MCSubtargetInfo &STI = *getMCSubtargetInfo();
   std::unique_ptr<MCStreamer> AsmStreamer(getTarget().createMCObjectStreamer(
       T, *Ctx, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll,
diff --git a/contrib/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
index 11deb81..b355393 100644
--- a/contrib/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
@@ -738,45 +738,22 @@ bool LiveVariables::VarInfo::isLiveIn(const MachineBasicBlock &MBB,
 bool LiveVariables::isLiveOut(unsigned Reg, const MachineBasicBlock &MBB) {
   LiveVariables::VarInfo &VI = getVarInfo(Reg);
 
+  SmallPtrSet<const MachineBasicBlock *, 8> Kills;
+  for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
+    Kills.insert(VI.Kills[i]->getParent());
+
   // Loop over all of the successors of the basic block, checking to see if
   // the value is either live in the block, or if it is killed in the block.
-  SmallVector<MachineBasicBlock*, 8> OpSuccBlocks;
-  for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
-         E = MBB.succ_end(); SI != E; ++SI) {
-    MachineBasicBlock *SuccMBB = *SI;
-
+  for (const MachineBasicBlock *SuccMBB : MBB.successors()) {
     // Is it alive in this successor?
     unsigned SuccIdx = SuccMBB->getNumber();
     if (VI.AliveBlocks.test(SuccIdx))
       return true;
-    OpSuccBlocks.push_back(SuccMBB);
+    // Or is it live because there is a use in a successor that kills it?
+    if (Kills.count(SuccMBB))
+      return true;
   }
 
-  // Check to see if this value is live because there is a use in a successor
-  // that kills it.
-  switch (OpSuccBlocks.size()) {
-  case 1: {
-    MachineBasicBlock *SuccMBB = OpSuccBlocks[0];
-    for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
-      if (VI.Kills[i]->getParent() == SuccMBB)
-        return true;
-    break;
-  }
-  case 2: {
-    MachineBasicBlock *SuccMBB1 = OpSuccBlocks[0], *SuccMBB2 = OpSuccBlocks[1];
-    for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
-      if (VI.Kills[i]->getParent() == SuccMBB1 ||
-          VI.Kills[i]->getParent() == SuccMBB2)
-        return true;
-    break;
-  }
-  default:
-    std::sort(OpSuccBlocks.begin(), OpSuccBlocks.end());
-    for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
-      if (std::binary_search(OpSuccBlocks.begin(), OpSuccBlocks.end(),
-                             VI.Kills[i]->getParent()))
-        return true;
-  }
   return false;
 }
 
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/CMakeLists.txt b/contrib/llvm/lib/CodeGen/MIRParser/CMakeLists.txt
deleted file mode 100644
index 468f072..0000000
--- a/contrib/llvm/lib/CodeGen/MIRParser/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-add_llvm_library(LLVMMIRParser
-  MIRParser.cpp
-  )
-
-add_dependencies(LLVMMIRParser intrinsics_gen)
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/LLVMBuild.txt b/contrib/llvm/lib/CodeGen/MIRParser/LLVMBuild.txt
deleted file mode 100644
index 04ae722..0000000
--- a/contrib/llvm/lib/CodeGen/MIRParser/LLVMBuild.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-;===- ./lib/CodeGen/MIRParser/LLVMBuild.txt --------------------*- Conf -*--===;
-;
-;                     The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-;   http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[component_0]
-type = Library
-name = MIRParser
-parent = CodeGen
-required_libraries = Core Support Target AsmParser CodeGen
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index 7a51b38..1fef3f6 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -14,10 +14,17 @@
 
 #include "llvm/CodeGen/MIRParser/MIRParser.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/AsmParser/Parser.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MIRYamlMapping.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/ValueSymbolTable.h"
 #include "llvm/Support/LineIterator.h"
 #include "llvm/Support/SMLoc.h"
 #include "llvm/Support/SourceMgr.h"
@@ -27,7 +34,7 @@
 
 using namespace llvm;
 
-namespace {
+namespace llvm {
 
 /// This class implements the parsing of LLVM IR that's embedded inside a MIR
 /// file.
@@ -35,29 +42,56 @@ class MIRParserImpl {
   SourceMgr SM;
   StringRef Filename;
   LLVMContext &Context;
+  StringMap<std::unique_ptr<yaml::MachineFunction>> Functions;
 
 public:
   MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents, StringRef Filename,
                 LLVMContext &Context);
 
+  void reportDiagnostic(const SMDiagnostic &Diag);
+
+  /// Report an error with the given message at unknown location.
+  ///
+  /// Always returns true.
+  bool error(const Twine &Message);
+
   /// Try to parse the optional LLVM module and the machine functions in the MIR
   /// file.
   ///
   /// Return null if an error occurred.
-  std::unique_ptr<Module> parse(SMDiagnostic &Error);
+  std::unique_ptr<Module> parse();
 
   /// Parse the machine function in the current YAML document.
   ///
+  /// \param NoLLVMIR - set to true when the MIR file doesn't have LLVM IR.
+  /// A dummy IR function is created and inserted into the given module when
+  /// this parameter is true.
+  ///
+  /// Return true if an error occurred.
+  bool parseMachineFunction(yaml::Input &In, Module &M, bool NoLLVMIR);
+
+  /// Initialize the machine function to the state that's described in the MIR
+  /// file.
+  ///
+  /// Return true if error occurred.
+  bool initializeMachineFunction(MachineFunction &MF);
+
+  /// Initialize the machine basic block using it's YAML representation.
+  ///
   /// Return true if an error occurred.
-  bool parseMachineFunction(yaml::Input &In);
+  bool initializeMachineBasicBlock(MachineBasicBlock &MBB,
+                                   const yaml::MachineBasicBlock &YamlMBB);
 
 private:
   /// Return a MIR diagnostic converted from an LLVM assembly diagnostic.
   SMDiagnostic diagFromLLVMAssemblyDiag(const SMDiagnostic &Error,
                                         SMRange SourceRange);
+
+  /// Create an empty function with the given name.
+  void createDummyFunction(StringRef Name, Module &M);
 };
 
-} // end anonymous namespace
+} // end namespace llvm
 
 MIRParserImpl::MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents,
                              StringRef Filename, LLVMContext &Context)
@@ -65,30 +99,54 @@ MIRParserImpl::MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents,
   SM.AddNewSourceBuffer(std::move(Contents), SMLoc());
 }
 
+bool MIRParserImpl::error(const Twine &Message) {
+  Context.diagnose(DiagnosticInfoMIRParser(
+      DS_Error, SMDiagnostic(Filename, SourceMgr::DK_Error, Message.str())));
+  return true;
+}
+
+void MIRParserImpl::reportDiagnostic(const SMDiagnostic &Diag) {
+  DiagnosticSeverity Kind;
+  switch (Diag.getKind()) {
+  case SourceMgr::DK_Error:
+    Kind = DS_Error;
+    break;
+  case SourceMgr::DK_Warning:
+    Kind = DS_Warning;
+    break;
+  case SourceMgr::DK_Note:
+    Kind = DS_Note;
+    break;
+  }
+  Context.diagnose(DiagnosticInfoMIRParser(Kind, Diag));
+}
+
 static void handleYAMLDiag(const SMDiagnostic &Diag, void *Context) {
-  *reinterpret_cast<SMDiagnostic *>(Context) = Diag;
+  reinterpret_cast<MIRParserImpl *>(Context)->reportDiagnostic(Diag);
 }
 
-std::unique_ptr<Module> MIRParserImpl::parse(SMDiagnostic &Error) {
+std::unique_ptr<Module> MIRParserImpl::parse() {
   yaml::Input In(SM.getMemoryBuffer(SM.getMainFileID())->getBuffer(),
-                 /*Ctxt=*/nullptr, handleYAMLDiag, &Error);
+                 /*Ctxt=*/nullptr, handleYAMLDiag, this);
 
   if (!In.setCurrentDocument()) {
-    if (!Error.getMessage().empty())
+    if (In.error())
       return nullptr;
     // Create an empty module when the MIR file is empty.
     return llvm::make_unique<Module>(Filename, Context);
   }
 
   std::unique_ptr<Module> M;
+  bool NoLLVMIR = false;
   // Parse the block scalar manually so that we can return unique pointer
   // without having to go trough YAML traits.
   if (const auto *BSN =
           dyn_cast_or_null<yaml::BlockScalarNode>(In.getCurrentNode())) {
+    SMDiagnostic Error;
     M = parseAssembly(MemoryBufferRef(BSN->getValue(), Filename), Error,
                       Context);
     if (!M) {
-      Error = diagFromLLVMAssemblyDiag(Error, BSN->getSourceRange());
+      reportDiagnostic(diagFromLLVMAssemblyDiag(Error, BSN->getSourceRange()));
       return M;
     }
     In.nextDocument();
@@ -97,11 +155,12 @@ std::unique_ptr<Module> MIRParserImpl::parse(SMDiagnostic &Error) {
   } else {
     // Create an new, empty module.
     M = llvm::make_unique<Module>(Filename, Context);
+    NoLLVMIR = true;
   }
 
   // Parse the machine functions.
   do {
-    if (parseMachineFunction(In))
+    if (parseMachineFunction(In, *M, NoLLVMIR))
       return nullptr;
     In.nextDocument();
   } while (In.setCurrentDocument());
@@ -109,13 +168,68 @@ std::unique_ptr<Module> MIRParserImpl::parse(SMDiagnostic &Error) {
   return M;
 }
 
-bool MIRParserImpl::parseMachineFunction(yaml::Input &In) {
-  yaml::MachineFunction MF;
-  yaml::yamlize(In, MF, false);
+bool MIRParserImpl::parseMachineFunction(yaml::Input &In, Module &M,
+                                         bool NoLLVMIR) {
+  auto MF = llvm::make_unique<yaml::MachineFunction>();
+  yaml::yamlize(In, *MF, false);
   if (In.error())
     return true;
-  // TODO: Initialize the real machine function with the state in the yaml
-  // machine function later on.
+  auto FunctionName = MF->Name;
+  if (Functions.find(FunctionName) != Functions.end())
+    return error(Twine("redefinition of machine function '") + FunctionName +
+                 "'");
+  Functions.insert(std::make_pair(FunctionName, std::move(MF)));
+  if (NoLLVMIR)
+    createDummyFunction(FunctionName, M);
+  else if (!M.getFunction(FunctionName))
+    return error(Twine("function '") + FunctionName +
+                 "' isn't defined in the provided LLVM IR");
+  return false;
+}
+
+void MIRParserImpl::createDummyFunction(StringRef Name, Module &M) {
+  auto &Context = M.getContext();
+  Function *F = cast<Function>(M.getOrInsertFunction(
+      Name, FunctionType::get(Type::getVoidTy(Context), false)));
+  BasicBlock *BB = BasicBlock::Create(Context, "entry", F);
+  new UnreachableInst(Context, BB);
+}
+
+bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) {
+  auto It = Functions.find(MF.getName());
+  if (It == Functions.end())
+    return error(Twine("no machine function information for function '") +
+                 MF.getName() + "' in the MIR file");
+  // TODO: Recreate the machine function.
+  const yaml::MachineFunction &YamlMF = *It->getValue();
+  if (YamlMF.Alignment)
+    MF.setAlignment(YamlMF.Alignment);
+  MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice);
+  MF.setHasInlineAsm(YamlMF.HasInlineAsm);
+  const auto &F = *MF.getFunction();
+  for (const auto &YamlMBB : YamlMF.BasicBlocks) {
+    const BasicBlock *BB = nullptr;
+    if (!YamlMBB.Name.empty()) {
+      BB = dyn_cast_or_null<BasicBlock>(
+          F.getValueSymbolTable().lookup(YamlMBB.Name));
+      if (!BB)
+        return error(Twine("basic block '") + YamlMBB.Name +
+                     "' is not defined in the function '" + MF.getName() + "'");
+    }
+    auto *MBB = MF.CreateMachineBasicBlock(BB);
+    MF.insert(MF.end(), MBB);
+    if (initializeMachineBasicBlock(*MBB, YamlMBB))
+      return true;
+  }
+  return false;
+}
+
+bool MIRParserImpl::initializeMachineBasicBlock(
+    MachineBasicBlock &MBB, const yaml::MachineBasicBlock &YamlMBB) {
+  MBB.setAlignment(YamlMBB.Alignment);
+  if (YamlMBB.AddressTaken)
+    MBB.setHasAddressTaken();
+  MBB.setIsLandingPad(YamlMBB.IsLandingPad);
   return false;
 }
 
@@ -150,22 +264,33 @@ SMDiagnostic MIRParserImpl::diagFromLLVMAssemblyDiag(const SMDiagnostic &Error,
                       Error.getFixIts());
 }
 
-std::unique_ptr<Module> llvm::parseMIRFile(StringRef Filename,
-                                           SMDiagnostic &Error,
-                                           LLVMContext &Context) {
+MIRParser::MIRParser(std::unique_ptr<MIRParserImpl> Impl)
+    : Impl(std::move(Impl)) {}
+
+MIRParser::~MIRParser() {}
+
+std::unique_ptr<Module> MIRParser::parseLLVMModule() { return Impl->parse(); }
+
+bool MIRParser::initializeMachineFunction(MachineFunction &MF) {
+  return Impl->initializeMachineFunction(MF);
+}
+
+std::unique_ptr<MIRParser> llvm::createMIRParserFromFile(StringRef Filename,
+                                                         SMDiagnostic &Error,
+                                                         LLVMContext &Context) {
   auto FileOrErr = MemoryBuffer::getFile(Filename);
   if (std::error_code EC = FileOrErr.getError()) {
     Error = SMDiagnostic(Filename, SourceMgr::DK_Error,
                          "Could not open input file: " + EC.message());
-    return std::unique_ptr<Module>();
+    return nullptr;
   }
-  return parseMIR(std::move(FileOrErr.get()), Error, Context);
+  return createMIRParser(std::move(FileOrErr.get()), Context);
 }
 
-std::unique_ptr<Module> llvm::parseMIR(std::unique_ptr<MemoryBuffer> Contents,
-                                       SMDiagnostic &Error,
-                                       LLVMContext &Context) {
+std::unique_ptr<MIRParser>
+llvm::createMIRParser(std::unique_ptr<MemoryBuffer> Contents,
+                      LLVMContext &Context) {
   auto Filename = Contents->getBufferIdentifier();
-  MIRParserImpl Parser(std::move(Contents), Filename, Context);
-  return Parser.parse(Error);
+  return llvm::make_unique<MIRParser>(
+      llvm::make_unique<MIRParserImpl>(std::move(Contents), Filename, Context));
 }
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/Makefile b/contrib/llvm/lib/CodeGen/MIRParser/Makefile
deleted file mode 100644
index c02d188..0000000
--- a/contrib/llvm/lib/CodeGen/MIRParser/Makefile
+++ /dev/null
@@ -1,13 +0,0 @@
-##===- lib/CodeGen/MIRParser/Makefile ----------------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../..
-LIBRARYNAME = LLVMMIRParser
-
-include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp
new file mode 100644
index 0000000..bbf163a
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -0,0 +1,96 @@
+//===- MIRPrinter.cpp - MIR serialization format printer ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the class that prints out the LLVM IR and machine
+// functions using the MIR serialization format.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MIRPrinter.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MIRYamlMapping.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/YAMLTraits.h"
+
+using namespace llvm;
+
+namespace {
+
+/// This class prints out the machine functions using the MIR serialization
+/// format.
+class MIRPrinter {
+  raw_ostream &OS;
+
+public:
+  MIRPrinter(raw_ostream &OS) : OS(OS) {}
+
+  void print(const MachineFunction &MF);
+
+  void convert(yaml::MachineBasicBlock &YamlMBB, const MachineBasicBlock &MBB);
+};
+
+} // end anonymous namespace
+
+namespace llvm {
+namespace yaml {
+
+/// This struct serializes the LLVM IR module.
+template <> struct BlockScalarTraits<Module> {
+  static void output(const Module &Mod, void *Ctxt, raw_ostream &OS) {
+    Mod.print(OS, nullptr);
+  }
+  static StringRef input(StringRef Str, void *Ctxt, Module &Mod) {
+    llvm_unreachable("LLVM Module is supposed to be parsed separately");
+    return "";
+  }
+};
+
+} // end namespace yaml
+} // end namespace llvm
+
+void MIRPrinter::print(const MachineFunction &MF) {
+  yaml::MachineFunction YamlMF;
+  YamlMF.Name = MF.getName();
+  YamlMF.Alignment = MF.getAlignment();
+  YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice();
+  YamlMF.HasInlineAsm = MF.hasInlineAsm();
+  for (const auto &MBB : MF) {
+    yaml::MachineBasicBlock YamlMBB;
+    convert(YamlMBB, MBB);
+    YamlMF.BasicBlocks.push_back(YamlMBB);
+  }
+  yaml::Output Out(OS);
+  Out << YamlMF;
+}
+
+void MIRPrinter::convert(yaml::MachineBasicBlock &YamlMBB,
+                         const MachineBasicBlock &MBB) {
+  // TODO: Serialize unnamed BB references.
+  if (const auto *BB = MBB.getBasicBlock())
+    YamlMBB.Name = BB->hasName() ? BB->getName() : "<unnamed bb>";
+  else
+    YamlMBB.Name = "";
+  YamlMBB.Alignment = MBB.getAlignment();
+  YamlMBB.AddressTaken = MBB.hasAddressTaken();
+  YamlMBB.IsLandingPad = MBB.isLandingPad();
+}
+
+void llvm::printMIR(raw_ostream &OS, const Module &M) {
+  yaml::Output Out(OS);
+  Out << const_cast<Module &>(M);
+}
+
+void llvm::printMIR(raw_ostream &OS, const MachineFunction &MF) {
+  MIRPrinter Printer(OS);
+  Printer.print(MF);
+}
diff --git a/contrib/llvm/lib/CodeGen/MIRPrinter.h b/contrib/llvm/lib/CodeGen/MIRPrinter.h
new file mode 100644
index 0000000..16aa903
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MIRPrinter.h
@@ -0,0 +1,33 @@
+//===- MIRPrinter.h - MIR serialization format printer --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the functions that print out the LLVM IR and the machine
+// functions using the MIR serialization format.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_MIRPRINTER_H
+#define LLVM_LIB_CODEGEN_MIRPRINTER_H
+
+namespace llvm {
+
+class MachineFunction;
+class Module;
+class raw_ostream;
+
+/// Print LLVM IR using the MIR serialization format to the given output stream.
+void printMIR(raw_ostream &OS, const Module &M);
+
+/// Print a machine function using the MIR serialization format to the given
+/// output stream.
+void printMIR(raw_ostream &OS, const MachineFunction &MF);
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp b/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp
index 5e0f4cd..13d61e6 100644
--- a/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp
@@ -12,54 +12,17 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "MIRPrinter.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MIRYamlMapping.h"
-#include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/YAMLTraits.h"
 
 using namespace llvm;
 
-namespace llvm {
-namespace yaml {
-
-/// This struct serializes the LLVM IR module.
-template <> struct BlockScalarTraits<Module> {
-  static void output(const Module &Mod, void *Ctxt, raw_ostream &OS) {
-    Mod.print(OS, nullptr);
-  }
-  static StringRef input(StringRef Str, void *Ctxt, Module &Mod) {
-    llvm_unreachable("LLVM Module is supposed to be parsed separately");
-    return "";
-  }
-};
-
-} // end namespace yaml
-} // end namespace llvm
-
 namespace {
 
-/// This class prints out the machine functions using the MIR serialization
-/// format.
-class MIRPrinter {
-  raw_ostream &OS;
-
-public:
-  MIRPrinter(raw_ostream &OS) : OS(OS) {}
-
-  void print(const MachineFunction &MF);
-};
-
-void MIRPrinter::print(const MachineFunction &MF) {
-  yaml::MachineFunction YamlMF;
-  YamlMF.Name = MF.getName();
-  yaml::Output Out(OS);
-  Out << YamlMF;
-}
-
 /// This pass prints out the LLVM IR to an output stream using the MIR
 /// serialization format.
 struct MIRPrintingPass : public MachineFunctionPass {
@@ -80,14 +43,13 @@ struct MIRPrintingPass : public MachineFunctionPass {
   virtual bool runOnMachineFunction(MachineFunction &MF) override {
     std::string Str;
     raw_string_ostream StrOS(Str);
-    MIRPrinter(StrOS).print(MF);
+    printMIR(StrOS, MF);
     MachineFunctions.append(StrOS.str());
     return false;
   }
 
   virtual bool doFinalization(Module &M) override {
-    yaml::Output Out(OS);
-    Out << M;
+    printMIR(OS, M);
     OS << MachineFunctions;
     return false;
   }
diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 2969bad..141990b 100644
--- a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -179,7 +179,7 @@ public:
   /// in-loop predecessors of this chain.
   unsigned LoopPredecessors;
 };
-}
+} // namespace
 
 namespace {
 class MachineBlockPlacement : public MachineFunctionPass {
@@ -267,7 +267,7 @@ public:
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 };
-}
+} // namespace
 
 char MachineBlockPlacement::ID = 0;
 char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID;
@@ -1185,7 +1185,7 @@ public:
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 };
-}
+} // namespace
 
 char MachineBlockPlacementStats::ID = 0;
 char &llvm::MachineBlockPlacementStatsID = MachineBlockPlacementStats::ID;
diff --git a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
index a4bc77e..5019e8e 100644
--- a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -78,7 +78,7 @@ private:
   void instr2instrSC(SmallVectorImpl<MachineInstr *> &Instrs,
                      SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC);
 };
-}
+} // namespace
 
 char MachineCombiner::ID = 0;
 char &llvm::MachineCombinerID = MachineCombiner::ID;
@@ -223,14 +223,14 @@ bool MachineCombiner::preservesCriticalPathLen(
     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
 
   assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
-  // NewRoot is the last instruction in the \p InsInstrs vector
-  // Get depth and latency of NewRoot
+  // NewRoot is the last instruction in the \p InsInstrs vector.
+  // Get depth and latency of NewRoot.
   unsigned NewRootIdx = InsInstrs.size() - 1;
   MachineInstr *NewRoot = InsInstrs[NewRootIdx];
   unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
   unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace);
 
-  // Get depth, latency and slack of Root
+  // Get depth, latency and slack of Root.
   unsigned RootDepth = BlockTrace.getInstrCycles(Root).Depth;
   unsigned RootLatency = TSchedModel.computeInstrLatency(Root);
   unsigned RootSlack = BlockTrace.getInstrSlack(Root);
@@ -245,7 +245,7 @@ bool MachineCombiner::preservesCriticalPathLen(
         dbgs() << " RootDepth + RootLatency + RootSlack "
                << RootDepth + RootLatency + RootSlack << "\n";);
 
-  /// True when the new sequence does not lenghten the critical path.
+  /// True when the new sequence does not lengthen the critical path.
   return ((NewRootDepth + NewRootLatency) <=
           (RootDepth + RootLatency + RootSlack));
 }
@@ -284,7 +284,7 @@ bool MachineCombiner::preservesResourceLen(
   ArrayRef<const MCSchedClassDesc *> MSCInsArr = makeArrayRef(InsInstrsSC);
   ArrayRef<const MCSchedClassDesc *> MSCDelArr = makeArrayRef(DelInstrsSC);
 
-  // Compute new resource length
+  // Compute new resource length.
   unsigned ResLenAfterCombine =
       BlockTrace.getResourceLength(MBBarr, MSCInsArr, MSCDelArr);
 
@@ -322,7 +322,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
     auto &MI = *BlockIter++;
 
     DEBUG(dbgs() << "INSTR "; MI.dump(); dbgs() << "\n";);
-    SmallVector<MachineCombinerPattern::MC_PATTERN, 16> Pattern;
+    SmallVector<MachineCombinerPattern::MC_PATTERN, 16> Patterns;
     // The motivating example is:
     //
     //     MUL  Other        MUL_op1 MUL_op2  Other
@@ -345,11 +345,11 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
     //
     // The algorithm does not try to evaluate all patterns and pick the best.
     // This is only an artificial restriction though. In practice there is
-    // mostly one pattern and hasPattern() can order patterns based on an
-    // internal cost heuristic.
+    // mostly one pattern, and getMachineCombinerPatterns() can order patterns
+    // based on an internal cost heuristic.
 
-    if (TII->hasPattern(MI, Pattern)) {
-      for (auto P : Pattern) {
+    if (TII->getMachineCombinerPatterns(MI, Patterns)) {
+      for (auto P : Patterns) {
         SmallVector<MachineInstr *, 16> InsInstrs;
         SmallVector<MachineInstr *, 16> DelInstrs;
         DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
@@ -373,8 +373,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
                                       InstrIdxForVirtReg) &&
              preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) {
           for (auto *InstrPtr : InsInstrs)
-            MBB->insert((MachineBasicBlock::iterator) & MI,
-                        (MachineInstr *)InstrPtr);
+            MBB->insert((MachineBasicBlock::iterator) &MI, InstrPtr);
           for (auto *InstrPtr : DelInstrs)
             InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval();
 
@@ -383,15 +382,14 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
 
           Traces->invalidate(MBB);
           Traces->verifyAnalysis();
-          // Eagerly stop after the first pattern fired
+          // Eagerly stop after the first pattern fires.
           break;
         } else {
           // Cleanup instructions of the alternative code sequence. There is no
           // use for them.
-          for (auto *InstrPtr : InsInstrs) {
-            MachineFunction *MF = MBB->getParent();
-            MF->DeleteMachineInstr((MachineInstr *)InstrPtr);
-          }
+          MachineFunction *MF = MBB->getParent();
+          for (auto *InstrPtr : InsInstrs)
+            MF->DeleteMachineInstr(InstrPtr);
         }
         InstrIdxForVirtReg.clear();
       }
diff --git a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index a686341..ec171b0 100644
--- a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -55,7 +55,7 @@ namespace {
                                  DenseMap<unsigned, MachineInstr*> &AvailCopyMap);
     bool CopyPropagateBlock(MachineBasicBlock &MBB);
   };
-}
+} // namespace
 char MachineCopyPropagation::ID = 0;
 char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID;
 
diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
index 09662b6..67b9d77 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionInitializer.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
@@ -41,11 +42,13 @@ using namespace llvm;
 
 #define DEBUG_TYPE "codegen"
 
+void MachineFunctionInitializer::anchor() {}
+
 //===----------------------------------------------------------------------===//
 // MachineFunction implementation
 //===----------------------------------------------------------------------===//
 
-// Out of line virtual method.
+// Out-of-line virtual method.
 MachineFunctionInfo::~MachineFunctionInfo() {}
 
 void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
@@ -114,8 +117,8 @@ MachineFunction::~MachineFunction() {
   }
 }
 
-/// getOrCreateJumpTableInfo - Get the JumpTableInfo for this function, if it
-/// does already exist, allocate one.
+/// Get the JumpTableInfo for this function.
+/// If it does not already exist, allocate one.
 MachineJumpTableInfo *MachineFunction::
 getOrCreateJumpTableInfo(unsigned EntryKind) {
   if (JumpTableInfo) return JumpTableInfo;
@@ -130,11 +133,10 @@ bool MachineFunction::shouldSplitStack() {
   return getFunction()->hasFnAttribute("split-stack");
 }
 
-/// RenumberBlocks - This discards all of the MachineBasicBlock numbers and
-/// recomputes them.  This guarantees that the MBB numbers are sequential,
-/// dense, and match the ordering of the blocks within the function.  If a
-/// specific MachineBasicBlock is specified, only that block and those after
-/// it are renumbered.
+/// This discards all of the MachineBasicBlock numbers and recomputes them.
+/// This guarantees that the MBB numbers are sequential, dense, and match the
+/// ordering of the blocks within the function.  If a specific MachineBasicBlock
+/// is specified, only that block and those after it are renumbered.
 void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) {
   if (empty()) { MBBNumbering.clear(); return; }
   MachineFunction::iterator MBBI, E = end();
@@ -172,9 +174,7 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) {
   MBBNumbering.resize(BlockNo);
 }
 
-/// CreateMachineInstr - Allocate a new MachineInstr. Use this instead
-/// of `new MachineInstr'.
-///
+/// Allocate a new MachineInstr. Use this instead of `new MachineInstr'.
 MachineInstr *
 MachineFunction::CreateMachineInstr(const MCInstrDesc &MCID,
                                     DebugLoc DL, bool NoImp) {
@@ -182,17 +182,15 @@ MachineFunction::CreateMachineInstr(const MCInstrDesc &MCID,
     MachineInstr(*this, MCID, DL, NoImp);
 }
 
-/// CloneMachineInstr - Create a new MachineInstr which is a copy of the
-/// 'Orig' instruction, identical in all ways except the instruction
-/// has no parent, prev, or next.
-///
+/// Create a new MachineInstr which is a copy of the 'Orig' instruction,
+/// identical in all ways except the instruction has no parent, prev, or next.
 MachineInstr *
 MachineFunction::CloneMachineInstr(const MachineInstr *Orig) {
   return new (InstructionRecycler.Allocate<MachineInstr>(Allocator))
              MachineInstr(*this, *Orig);
 }
 
-/// DeleteMachineInstr - Delete the given MachineInstr.
+/// Delete the given MachineInstr.
 ///
 /// This function also serves as the MachineInstr destructor - the real
 /// ~MachineInstr() destructor must be empty.
@@ -208,17 +206,15 @@ MachineFunction::DeleteMachineInstr(MachineInstr *MI) {
   InstructionRecycler.Deallocate(Allocator, MI);
 }
 
-/// CreateMachineBasicBlock - Allocate a new MachineBasicBlock. Use this
-/// instead of `new MachineBasicBlock'.
-///
+/// Allocate a new MachineBasicBlock. Use this instead of
+/// `new MachineBasicBlock'.
 MachineBasicBlock *
 MachineFunction::CreateMachineBasicBlock(const BasicBlock *bb) {
   return new (BasicBlockRecycler.Allocate<MachineBasicBlock>(Allocator))
              MachineBasicBlock(*this, bb);
 }
 
-/// DeleteMachineBasicBlock - Delete the given MachineBasicBlock.
-///
+/// Delete the given MachineBasicBlock.
 void
 MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) {
   assert(MBB->getParent() == this && "MBB parent mismatch!");
@@ -408,7 +404,7 @@ namespace llvm {
       return OutStr;
     }
   };
-}
+} // namespace llvm
 
 void MachineFunction::viewCFG() const
 {
@@ -430,7 +426,7 @@ void MachineFunction::viewCFGOnly() const
 #endif // NDEBUG
 }
 
-/// addLiveIn - Add the specified physical register as a live-in value and
+/// Add the specified physical register as a live-in value and
 /// create a corresponding virtual register for it.
 unsigned MachineFunction::addLiveIn(unsigned PReg,
                                     const TargetRegisterClass *RC) {
@@ -454,7 +450,7 @@ unsigned MachineFunction::addLiveIn(unsigned PReg,
   return VReg;
 }
 
-/// getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
+/// Return the MCSymbol for the specified non-empty jump table.
 /// If isLinkerPrivate is specified, an 'l' label is returned, otherwise a
 /// normal 'L' label is returned.
 MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
@@ -471,8 +467,7 @@ MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
   return Ctx.getOrCreateSymbol(Name);
 }
 
-/// getPICBaseSymbol - Return a function-local symbol to represent the PIC
-/// base.
+/// Return a function-local symbol to represent the PIC base.
 MCSymbol *MachineFunction::getPICBaseSymbol() const {
   const DataLayout *DL = getTarget().getDataLayout();
   return Ctx.getOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+
@@ -483,8 +478,7 @@ MCSymbol *MachineFunction::getPICBaseSymbol() const {
 //  MachineFrameInfo implementation
 //===----------------------------------------------------------------------===//
 
-/// ensureMaxAlignment - Make sure the function is at least Align bytes
-/// aligned.
+/// Make sure the function is at least Align bytes aligned.
 void MachineFrameInfo::ensureMaxAlignment(unsigned Align) {
   if (!StackRealignable || !RealignOption)
     assert(Align <= StackAlignment &&
@@ -492,7 +486,7 @@ void MachineFrameInfo::ensureMaxAlignment(unsigned Align) {
   if (MaxAlignment < Align) MaxAlignment = Align;
 }
 
-/// clampStackAlignment - Clamp the alignment if requested and emit a warning.
+/// Clamp the alignment if requested and emit a warning.
 static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align,
                                            unsigned StackAlign) {
   if (!ShouldClamp || Align <= StackAlign)
@@ -503,9 +497,8 @@ static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align,
   return StackAlign;
 }
 
-/// CreateStackObject - Create a new statically sized stack object, returning
-/// a nonnegative identifier to represent it.
-///
+/// Create a new statically sized stack object, returning a nonnegative
+/// identifier to represent it.
 int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment,
                       bool isSS, const AllocaInst *Alloca) {
   assert(Size != 0 && "Cannot allocate zero size stack objects!");
@@ -519,10 +512,8 @@ int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment,
   return Index;
 }
 
-/// CreateSpillStackObject - Create a new statically sized stack object that
-/// represents a spill slot, returning a nonnegative identifier to represent
-/// it.
-///
+/// Create a new statically sized stack object that represents a spill slot,
+/// returning a nonnegative identifier to represent it.
 int MachineFrameInfo::CreateSpillStackObject(uint64_t Size,
                                              unsigned Alignment) {
   Alignment = clampStackAlignment(!StackRealignable || !RealignOption,
@@ -533,11 +524,9 @@ int MachineFrameInfo::CreateSpillStackObject(uint64_t Size,
   return Index;
 }
 
-/// CreateVariableSizedObject - Notify the MachineFrameInfo object that a
-/// variable sized object has been created.  This must be created whenever a
-/// variable sized object is created, whether or not the index returned is
-/// actually used.
-///
+/// Notify the MachineFrameInfo object that a variable sized object has been
+/// created. This must be created whenever a variable sized object is created,
+/// whether or not the index returned is actually used.
 int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment,
                                                 const AllocaInst *Alloca) {
   HasVarSizedObjects = true;
@@ -548,11 +537,10 @@ int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment,
   return (int)Objects.size()-NumFixedObjects-1;
 }
 
-/// CreateFixedObject - Create a new object at a fixed location on the stack.
+/// Create a new object at a fixed location on the stack.
 /// All fixed objects should be created before other objects are created for
 /// efficiency. By default, fixed objects are immutable. This returns an
 /// index with a negative value.
-///
 int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
                                         bool Immutable, bool isAliased) {
   assert(Size != 0 && "Cannot allocate zero size fixed stack objects!");
@@ -569,8 +557,8 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
   return -++NumFixedObjects;
 }
 
-/// CreateFixedSpillStackObject - Create a spill slot at a fixed location
-/// on the stack.  Returns an index with a negative value.
+/// Create a spill slot at a fixed location on the stack.
+/// Returns an index with a negative value.
 int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size,
                                                   int64_t SPOffset) {
   unsigned Align = MinAlign(SPOffset, StackAlignment);
@@ -700,7 +688,7 @@ void MachineFrameInfo::dump(const MachineFunction &MF) const {
 //  MachineJumpTableInfo implementation
 //===----------------------------------------------------------------------===//
 
-/// getEntrySize - Return the size of each entry in the jump table.
+/// Return the size of each entry in the jump table.
 unsigned MachineJumpTableInfo::getEntrySize(const DataLayout &TD) const {
   // The size of a jump table entry is 4 bytes unless the entry is just the
   // address of a block, in which case it is the pointer size.
@@ -719,7 +707,7 @@ unsigned MachineJumpTableInfo::getEntrySize(const DataLayout &TD) const {
   llvm_unreachable("Unknown jump table encoding!");
 }
 
-/// getEntryAlignment - Return the alignment of each entry in the jump table.
+/// Return the alignment of each entry in the jump table.
 unsigned MachineJumpTableInfo::getEntryAlignment(const DataLayout &TD) const {
   // The alignment of a jump table entry is the alignment of int32 unless the
   // entry is just the address of a block, in which case it is the pointer
@@ -739,8 +727,7 @@ unsigned MachineJumpTableInfo::getEntryAlignment(const DataLayout &TD) const {
   llvm_unreachable("Unknown jump table encoding!");
 }
 
-/// createJumpTableIndex - Create a new jump table entry in the jump table info.
-///
+/// Create a new jump table entry in the jump table info.
 unsigned MachineJumpTableInfo::createJumpTableIndex(
                                const std::vector<MachineBasicBlock*> &DestBBs) {
   assert(!DestBBs.empty() && "Cannot create an empty jump table!");
@@ -748,8 +735,8 @@ unsigned MachineJumpTableInfo::createJumpTableIndex(
   return JumpTables.size()-1;
 }
 
-/// ReplaceMBBInJumpTables - If Old is the target of any jump tables, update
-/// the jump tables to branch to New instead.
+/// If Old is the target of any jump tables, update the jump tables to branch
+/// to New instead.
 bool MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old,
                                                   MachineBasicBlock *New) {
   assert(Old != New && "Not making a change?");
@@ -759,8 +746,8 @@ bool MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old,
   return MadeChange;
 }
 
-/// ReplaceMBBInJumpTable - If Old is a target of the jump tables, update
-/// the jump table to branch to New instead.
+/// If Old is a target of the jump tables, update the jump table to branch to
+/// New instead.
 bool MachineJumpTableInfo::ReplaceMBBInJumpTable(unsigned Idx,
                                                  MachineBasicBlock *Old,
                                                  MachineBasicBlock *New) {
@@ -858,8 +845,8 @@ MachineConstantPool::~MachineConstantPool() {
     delete *I;
 }
 
-/// CanShareConstantPoolEntry - Test whether the given two constants
-/// can be allocated the same constant pool entry.
+/// Test whether the given two constants can be allocated the same constant pool
+/// entry.
 static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
                                       const DataLayout *TD) {
   // Handle the trivial case quickly.
@@ -901,10 +888,8 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
   return A == B;
 }
 
-/// getConstantPoolIndex - Create a new entry in the constant pool or return
-/// an existing one.  User must specify the log2 of the minimum required
-/// alignment for the object.
-///
+/// Create a new entry in the constant pool or return an existing one.
+/// User must specify the log2 of the minimum required alignment for the object.
 unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C,
                                                    unsigned Alignment) {
   assert(Alignment && "Alignment must be specified!");
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
index f6f34ba..338cd1e 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -15,12 +15,14 @@
 #include "llvm/CodeGen/GCMetadata.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionInitializer.h"
 using namespace llvm;
 
 char MachineFunctionAnalysis::ID = 0;
 
-MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm) :
-  FunctionPass(ID), TM(tm), MF(nullptr) {
+MachineFunctionAnalysis::MachineFunctionAnalysis(
+    const TargetMachine &tm, MachineFunctionInitializer *MFInitializer)
+    : FunctionPass(ID), TM(tm), MF(nullptr), MFInitializer(MFInitializer) {
   initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
 }
 
@@ -47,6 +49,8 @@ bool MachineFunctionAnalysis::runOnFunction(Function &F) {
   assert(!MF && "MachineFunctionAnalysis already initialized!");
   MF = new MachineFunction(&F, TM, NextFnNum++,
                            getAnalysis<MachineModuleInfo>());
+  if (MFInitializer)
+    MFInitializer->initializeMachineFunction(*MF);
   return false;
 }
 
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
index 790f5ac..57b7230 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -49,7 +49,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass {
 };
 
 char MachineFunctionPrinterPass::ID = 0;
-}
+} // namespace
 
 char &llvm::MachineFunctionPrinterPassID = MachineFunctionPrinterPass::ID;
 INITIALIZE_PASS(MachineFunctionPrinterPass, "machineinstr-printer",
diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
index e671028..19ba5cf 100644
--- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
@@ -1450,9 +1450,9 @@ bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
 
     if (const Value *V = (*I)->getValue()) {
       // If we have an AliasAnalysis, ask it whether the memory is constant.
-      if (AA && AA->pointsToConstantMemory(
-                      AliasAnalysis::Location(V, (*I)->getSize(),
-                                              (*I)->getAAInfo())))
+      if (AA &&
+          AA->pointsToConstantMemory(
+              MemoryLocation(V, (*I)->getSize(), (*I)->getAAInfo())))
         continue;
     }
 
diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
index cce590c..e9ea5ed 100644
--- a/contrib/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
@@ -27,7 +27,7 @@
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/CodeGen/TargetSchedule.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -74,7 +74,7 @@ namespace {
     const TargetRegisterInfo *TRI;
     const MachineFrameInfo *MFI;
     MachineRegisterInfo *MRI;
-    const InstrItineraryData *InstrItins;
+    TargetSchedModel SchedModel;
     bool PreRegAlloc;
 
     // Various analyses that we use...
@@ -338,12 +338,13 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
     return false;
 
   Changed = FirstInLoop = false;
-  TII = MF.getSubtarget().getInstrInfo();
-  TLI = MF.getSubtarget().getTargetLowering();
-  TRI = MF.getSubtarget().getRegisterInfo();
+  const TargetSubtargetInfo &ST = MF.getSubtarget();
+  TII = ST.getInstrInfo();
+  TLI = ST.getTargetLowering();
+  TRI = ST.getRegisterInfo();
   MFI = MF.getFrameInfo();
   MRI = &MF.getRegInfo();
-  InstrItins = MF.getSubtarget().getInstrItineraryData();
+  SchedModel.init(ST.getSchedModel(), &ST, TII);
 
   PreRegAlloc = MRI->isSSA();
 
@@ -1046,7 +1047,7 @@ bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const {
 /// it 'high'.
 bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
                                         unsigned DefIdx, unsigned Reg) const {
-  if (!InstrItins || InstrItins->isEmpty() || MRI->use_nodbg_empty(Reg))
+  if (MRI->use_nodbg_empty(Reg))
     return false;
 
   for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) {
@@ -1062,7 +1063,7 @@ bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
       if (MOReg != Reg)
         continue;
 
-      if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, &UseMI, i))
+      if (TII->hasHighOperandLatency(SchedModel, MRI, &MI, DefIdx, &UseMI, i))
         return true;
     }
 
@@ -1078,8 +1079,6 @@ bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
 bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
   if (TII->isAsCheapAsAMove(&MI) || MI.isCopyLike())
     return true;
-  if (!InstrItins || InstrItins->isEmpty())
-    return false;
 
   bool isCheap = false;
   unsigned NumDefs = MI.getDesc().getNumDefs();
@@ -1092,7 +1091,7 @@ bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
     if (TargetRegisterInfo::isPhysicalRegister(Reg))
       continue;
 
-    if (!TII->hasLowDefLatency(InstrItins, &MI, i))
+    if (!TII->hasLowDefLatency(SchedModel, &MI, i))
       return false;
     isCheap = true;
   }
diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
index eec984f..a303426 100644
--- a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -97,7 +97,7 @@ public:
   void UpdateForDeletedBlock(BasicBlock *BB);
   void UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New);
 };
-}
+} // namespace llvm
 
 MCSymbol *MMIAddrLabelMap::getAddrLabelSymbol(BasicBlock *BB) {
   assert(BB->hasAddressTaken() &&
@@ -318,23 +318,6 @@ void MachineModuleInfo::EndFunction() {
   VariableDbgInfos.clear();
 }
 
-/// AnalyzeModule - Scan the module for global debug information.
-///
-void MachineModuleInfo::AnalyzeModule(const Module &M) {
-  // Insert functions in the llvm.used array (but not llvm.compiler.used) into
-  // UsedFunctions.
-  const GlobalVariable *GV = M.getGlobalVariable("llvm.used");
-  if (!GV || !GV->hasInitializer()) return;
-
-  // Should be an array of 'i8*'.
-  const ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer());
-
-  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
-    if (const Function *F =
-          dyn_cast<Function>(InitList->getOperand(i)->stripPointerCasts()))
-      UsedFunctions.insert(F);
-}
-
 //===- Address of Block Management ----------------------------------------===//
 
 
diff --git a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp
index 71a6eba..fd1bf31 100644
--- a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp
@@ -340,7 +340,7 @@ public:
   }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 /// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
 /// for the specified BB and if so, return it.  If not, construct SSA form by
diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
index 44107d6..dd7654b 100644
--- a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -347,7 +347,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
   if (skipOptnoneFunction(*mf.getFunction()))
     return false;
 
-  if (!mf.getSubtarget().enablePostMachineScheduler()) {
+  if (!mf.getSubtarget().enablePostRAScheduler()) {
     DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
     return false;
   }
@@ -1262,7 +1262,7 @@ public:
 protected:
   void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG);
 };
-} // anonymous
+} // namespace
 
 void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads,
                                                   ScheduleDAGMI *DAG) {
@@ -1271,7 +1271,7 @@ void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads,
     SUnit *SU = Loads[Idx];
     unsigned BaseReg;
     unsigned Offset;
-    if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI))
+    if (TII->getMemOpBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI))
       LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset));
   }
   if (LoadRecords.size() < 2)
@@ -1355,7 +1355,7 @@ public:
 
   void apply(ScheduleDAGMI *DAG) override;
 };
-} // anonymous
+} // namespace
 
 /// \brief Callback from DAG postProcessing to create cluster edges to encourage
 /// fused operations.
@@ -1407,7 +1407,7 @@ public:
 protected:
   void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
 };
-} // anonymous
+} // namespace
 
 /// constrainLocalCopy handles two possibilities:
 /// 1) Local src:
@@ -2150,7 +2150,7 @@ void GenericSchedulerBase::setPolicy(CandPolicy &Policy,
                                      bool IsPostRA,
                                      SchedBoundary &CurrZone,
                                      SchedBoundary *OtherZone) {
-  // Apply preemptive heuristics based on the the total latency and resources
+  // Apply preemptive heuristics based on the total latency and resources
   // inside and outside this zone. Potential stalls should be considered before
   // following this policy.
 
diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp
index aed0e50..1b9be50 100644
--- a/contrib/llvm/lib/CodeGen/MachineSink.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp
@@ -73,6 +73,9 @@ namespace {
 
     SparseBitVector<> RegsToClearKillFlags;
 
+    typedef std::map<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>
+        AllSuccsCache;
+
   public:
     static char ID; // Pass identification
     MachineSinking() : MachineFunctionPass(ID) {
@@ -120,18 +123,24 @@ namespace {
                                    MachineBasicBlock *From,
                                    MachineBasicBlock *To,
                                    bool BreakPHIEdge);
-    bool SinkInstruction(MachineInstr *MI, bool &SawStore);
+    bool SinkInstruction(MachineInstr *MI, bool &SawStore,
+                         AllSuccsCache &AllSuccessors);
     bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB,
                                  MachineBasicBlock *DefMBB,
                                  bool &BreakPHIEdge, bool &LocalUse) const;
     MachineBasicBlock *FindSuccToSinkTo(MachineInstr *MI, MachineBasicBlock *MBB,
-               bool &BreakPHIEdge);
+               bool &BreakPHIEdge, AllSuccsCache &AllSuccessors);
     bool isProfitableToSinkTo(unsigned Reg, MachineInstr *MI,
                               MachineBasicBlock *MBB,
-                              MachineBasicBlock *SuccToSinkTo);
+                              MachineBasicBlock *SuccToSinkTo,
+                              AllSuccsCache &AllSuccessors);
 
     bool PerformTrivialForwardCoalescing(MachineInstr *MI,
                                          MachineBasicBlock *MBB);
+
+    SmallVector<MachineBasicBlock *, 4> &
+    GetAllSortedSuccessors(MachineInstr *MI, MachineBasicBlock *MBB,
+                           AllSuccsCache &AllSuccessors) const;
   };
 } // end anonymous namespace
 
@@ -269,9 +278,8 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
     // Process all basic blocks.
     CEBCandidates.clear();
     ToSplit.clear();
-    for (MachineFunction::iterator I = MF.begin(), E = MF.end();
-         I != E; ++I)
-      MadeChange |= ProcessBlock(*I);
+    for (auto &MBB: MF)
+      MadeChange |= ProcessBlock(MBB);
 
     // If we have anything we marked as toSplit, split it now.
     for (auto &Pair : ToSplit) {
@@ -310,6 +318,9 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
 
   bool MadeChange = false;
 
+  // Cache all successors, sorted by frequency info and loop depth.
+  AllSuccsCache AllSuccessors;
+
   // Walk the basic block bottom-up.  Remember if we saw a store.
   MachineBasicBlock::iterator I = MBB.end();
   --I;
@@ -332,7 +343,7 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
       continue;
     }
 
-    if (SinkInstruction(MI, SawStore))
+    if (SinkInstruction(MI, SawStore, AllSuccessors))
       ++NumSunk, MadeChange = true;
 
     // If we just processed the first instruction in the block, we're done.
@@ -484,7 +495,8 @@ static void collectDebugValues(MachineInstr *MI,
 /// isProfitableToSinkTo - Return true if it is profitable to sink MI.
 bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr *MI,
                                           MachineBasicBlock *MBB,
-                                          MachineBasicBlock *SuccToSinkTo) {
+                                          MachineBasicBlock *SuccToSinkTo,
+                                          AllSuccsCache &AllSuccessors) {
   assert (MI && "Invalid MachineInstr!");
   assert (SuccToSinkTo && "Invalid SinkTo Candidate BB");
 
@@ -514,18 +526,66 @@ bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr *MI,
   // can further profitably sinked into another block in next round.
   bool BreakPHIEdge = false;
   // FIXME - If finding successor is compile time expensive then cache results.
-  if (MachineBasicBlock *MBB2 = FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge))
-    return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2);
+  if (MachineBasicBlock *MBB2 =
+          FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge, AllSuccessors))
+    return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2, AllSuccessors);
 
   // If SuccToSinkTo is final destination and it is a post dominator of current
   // block then it is not profitable to sink MI into SuccToSinkTo block.
   return false;
 }
 
+/// Get the sorted sequence of successors for this MachineBasicBlock, possibly
+/// computing it if it was not already cached.
+SmallVector<MachineBasicBlock *, 4> &
+MachineSinking::GetAllSortedSuccessors(MachineInstr *MI, MachineBasicBlock *MBB,
+                                       AllSuccsCache &AllSuccessors) const {
+
+  // Do we have the sorted successors in cache ?
+  auto Succs = AllSuccessors.find(MBB);
+  if (Succs != AllSuccessors.end())
+    return Succs->second;
+
+  SmallVector<MachineBasicBlock *, 4> AllSuccs(MBB->succ_begin(),
+                                               MBB->succ_end());
+
+  // Handle cases where sinking can happen but where the sink point isn't a
+  // successor. For example:
+  //
+  //   x = computation
+  //   if () {} else {}
+  //   use x
+  //
+  const std::vector<MachineDomTreeNode *> &Children =
+    DT->getNode(MBB)->getChildren();
+  for (const auto &DTChild : Children)
+    // DomTree children of MBB that have MBB as immediate dominator are added.
+    if (DTChild->getIDom()->getBlock() == MI->getParent() &&
+        // Skip MBBs already added to the AllSuccs vector above.
+        !MBB->isSuccessor(DTChild->getBlock()))
+      AllSuccs.push_back(DTChild->getBlock());
+
+  // Sort Successors according to their loop depth or block frequency info.
+  std::stable_sort(
+      AllSuccs.begin(), AllSuccs.end(),
+      [this](const MachineBasicBlock *L, const MachineBasicBlock *R) {
+        uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0;
+        uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0;
+        bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0;
+        return HasBlockFreq ? LHSFreq < RHSFreq
+                            : LI->getLoopDepth(L) < LI->getLoopDepth(R);
+      });
+
+  auto it = AllSuccessors.insert(std::make_pair(MBB, AllSuccs));
+
+  return it.first->second;
+}
+
 /// FindSuccToSinkTo - Find a successor to sink this instruction to.
 MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI,
                                    MachineBasicBlock *MBB,
-                                   bool &BreakPHIEdge) {
+                                   bool &BreakPHIEdge,
+                                   AllSuccsCache &AllSuccessors) {
 
   assert (MI && "Invalid MachineInstr!");
   assert (MBB && "Invalid MachineBasicBlock!");
@@ -579,38 +639,8 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI,
       // we should sink to. If we have reliable block frequency information
       // (frequency != 0) available, give successors with smaller frequencies
       // higher priority, otherwise prioritize smaller loop depths.
-      SmallVector<MachineBasicBlock*, 4> Succs(MBB->succ_begin(),
-                                               MBB->succ_end());
-
-      // Handle cases where sinking can happen but where the sink point isn't a
-      // successor. For example:
-      //
-      //   x = computation
-      //   if () {} else {}
-      //   use x
-      //
-      const std::vector<MachineDomTreeNode *> &Children =
-        DT->getNode(MBB)->getChildren();
-      for (const auto &DTChild : Children)
-        // DomTree children of MBB that have MBB as immediate dominator are added.
-        if (DTChild->getIDom()->getBlock() == MI->getParent() &&
-            // Skip MBBs already added to the Succs vector above.
-            !MBB->isSuccessor(DTChild->getBlock()))
-          Succs.push_back(DTChild->getBlock());
-
-      // Sort Successors according to their loop depth or block frequency info.
-      std::stable_sort(
-          Succs.begin(), Succs.end(),
-          [this](const MachineBasicBlock *L, const MachineBasicBlock *R) {
-            uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0;
-            uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0;
-            bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0;
-            return HasBlockFreq ? LHSFreq < RHSFreq
-                                : LI->getLoopDepth(L) < LI->getLoopDepth(R);
-          });
-      for (SmallVectorImpl<MachineBasicBlock *>::iterator SI = Succs.begin(),
-             E = Succs.end(); SI != E; ++SI) {
-        MachineBasicBlock *SuccBlock = *SI;
+      for (MachineBasicBlock *SuccBlock :
+           GetAllSortedSuccessors(MI, MBB, AllSuccessors)) {
         bool LocalUse = false;
         if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB,
                                     BreakPHIEdge, LocalUse)) {
@@ -625,7 +655,7 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI,
       // If we couldn't find a block to sink to, ignore this instruction.
       if (!SuccToSinkTo)
         return nullptr;
-      if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo))
+      if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo, AllSuccessors))
         return nullptr;
     }
   }
@@ -645,7 +675,8 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI,
 
 /// SinkInstruction - Determine whether it is safe to sink the specified machine
 /// instruction out of its current block into a successor.
-bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
+bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore,
+                                     AllSuccsCache &AllSuccessors) {
   // Don't sink insert_subreg, subreg_to_reg, reg_sequence. These are meant to
   // be close to the source to make it easier to coalesce.
   if (AvoidsSinking(MI, MRI))
@@ -669,8 +700,8 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
 
   bool BreakPHIEdge = false;
   MachineBasicBlock *ParentBlock = MI->getParent();
-  MachineBasicBlock *SuccToSinkTo = FindSuccToSinkTo(MI, ParentBlock,
-                                                     BreakPHIEdge);
+  MachineBasicBlock *SuccToSinkTo =
+      FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge, AllSuccessors);
 
   // If there are no outputs, it must have side-effects.
   if (!SuccToSinkTo)
diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
index 34ac9d5..7704d14 100644
--- a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -306,7 +306,7 @@ public:
   MinInstrCountEnsemble(MachineTraceMetrics *mtm)
     : MachineTraceMetrics::Ensemble(mtm) {}
 };
-}
+} // namespace
 
 // Select the preferred predecessor for MBB.
 const MachineBasicBlock*
@@ -414,7 +414,7 @@ struct LoopBounds {
              const MachineLoopInfo *loops)
     : Blocks(blocks), Loops(loops), Downward(false) {}
 };
-}
+} // namespace
 
 // Specialize po_iterator_storage in order to prune the post-order traversal so
 // it is limited to the current loop and doesn't traverse the loop back edges.
@@ -447,7 +447,7 @@ public:
     return LB.Visited.insert(To).second;
   }
 };
-}
+} // namespace llvm
 
 /// Compute the trace through MBB.
 void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
@@ -619,7 +619,7 @@ struct DataDep {
     assert((++DefI).atEnd() && "Register has multiple defs");
   }
 };
-}
+} // namespace
 
 // Get the input data dependencies that must be ready before UseMI can issue.
 // Return true if UseMI has any physreg operands.
@@ -681,7 +681,7 @@ struct LiveRegUnit {
 
   LiveRegUnit(unsigned RU) : RegUnit(RU), Cycle(0), MI(nullptr), Op(0) {}
 };
-}
+} // namespace
 
 // Identify physreg dependencies for UseMI, and update the live regunit
 // tracking set when scanning instructions downwards.
diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
index ca35ec5..72a6769 100644
--- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -258,7 +258,7 @@ namespace {
     }
   };
 
-}
+} // namespace
 
 char MachineVerifierPass::ID = 0;
 INITIALIZE_PASS(MachineVerifierPass, "machineverifier",
@@ -1710,7 +1710,7 @@ namespace {
     bool EntryIsSetup;
     bool ExitIsSetup;
   };
-}
+} // namespace
 
 /// Make sure on every path through the CFG, a FrameSetup <n> is always followed
 /// by a FrameDestroy <n>, stack adjustments are identical on all
diff --git a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
index a1042e7..9780d75 100644
--- a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
+++ b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
@@ -55,7 +55,7 @@ namespace {
     bool IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle);
     bool OptimizeBB(MachineBasicBlock &MBB);
   };
-}
+} // namespace
 
 char OptimizePHIs::ID = 0;
 char &llvm::OptimizePHIsID = OptimizePHIs::ID;
diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
index d514190..471c78a 100644
--- a/contrib/llvm/lib/CodeGen/PHIElimination.cpp
+++ b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
@@ -88,8 +88,8 @@ namespace {
 
     // These functions are temporary abstractions around LiveVariables and
     // LiveIntervals, so they can go away when LiveVariables does.
-    bool isLiveIn(unsigned Reg, MachineBasicBlock *MBB);
-    bool isLiveOutPastPHIs(unsigned Reg, MachineBasicBlock *MBB);
+    bool isLiveIn(unsigned Reg, const MachineBasicBlock *MBB);
+    bool isLiveOutPastPHIs(unsigned Reg, const MachineBasicBlock *MBB);
 
     typedef std::pair<unsigned, unsigned> BBVRegPair;
     typedef DenseMap<BBVRegPair, unsigned> VRegPHIUse;
@@ -104,7 +104,7 @@ namespace {
                      MachineInstrExpressionTrait> LoweredPHIMap;
     LoweredPHIMap LoweredPHIs;
   };
-}
+} // namespace
 
 STATISTIC(NumLowered, "Number of phis lowered");
 STATISTIC(NumCriticalEdgesSplit, "Number of critical edges split");
@@ -143,16 +143,16 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
   // updating LiveIntervals, so we disable it.
   if (!DisableEdgeSplitting && (LV || LIS)) {
     MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
-    for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
-      Changed |= SplitPHIEdges(MF, *I, MLI);
+    for (auto &MBB : MF)
+      Changed |= SplitPHIEdges(MF, MBB, MLI);
   }
 
   // Populate VRegPHIUseCount
   analyzePHINodes(MF);
 
   // Eliminate PHI instructions by inserting copies into predecessor blocks.
-  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
-    Changed |= EliminatePHINodes(MF, *I);
+  for (auto &MBB : MF)
+    Changed |= EliminatePHINodes(MF, MBB);
 
   // Remove dead IMPLICIT_DEF instructions.
   for (MachineInstr *DefMI : ImpDefs) {
@@ -623,7 +623,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
   return Changed;
 }
 
-bool PHIElimination::isLiveIn(unsigned Reg, MachineBasicBlock *MBB) {
+bool PHIElimination::isLiveIn(unsigned Reg, const MachineBasicBlock *MBB) {
   assert((LV || LIS) &&
          "isLiveIn() requires either LiveVariables or LiveIntervals");
   if (LIS)
@@ -632,7 +632,8 @@ bool PHIElimination::isLiveIn(unsigned Reg, MachineBasicBlock *MBB) {
     return LV->isLiveIn(Reg, *MBB);
 }
 
-bool PHIElimination::isLiveOutPastPHIs(unsigned Reg, MachineBasicBlock *MBB) {
+bool PHIElimination::isLiveOutPastPHIs(unsigned Reg,
+                                       const MachineBasicBlock *MBB) {
   assert((LV || LIS) &&
          "isLiveOutPastPHIs() requires either LiveVariables or LiveIntervals");
   // LiveVariables considers uses in PHIs to be in the predecessor basic block,
@@ -642,11 +643,9 @@ bool PHIElimination::isLiveOutPastPHIs(unsigned Reg, MachineBasicBlock *MBB) {
   // out of the block.
   if (LIS) {
     const LiveInterval &LI = LIS->getInterval(Reg);
-    for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
-         SE = MBB->succ_end(); SI != SE; ++SI) {
-      if (LI.liveAt(LIS->getMBBStartIdx(*SI)))
+    for (const MachineBasicBlock *SI : MBB->successors())
+      if (LI.liveAt(LIS->getMBBStartIdx(SI)))
         return true;
-    }
     return false;
   } else {
     return LV->isLiveOut(Reg, *MBB);
diff --git a/contrib/llvm/lib/CodeGen/Passes.cpp b/contrib/llvm/lib/CodeGen/Passes.cpp
index 4cd86e6..210a7a1 100644
--- a/contrib/llvm/lib/CodeGen/Passes.cpp
+++ b/contrib/llvm/lib/CodeGen/Passes.cpp
@@ -24,6 +24,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/SymbolRewriter.h"
 
@@ -72,6 +73,10 @@ static cl::opt<bool> DisableCopyProp("disable-copyprop", cl::Hidden,
     cl::desc("Disable Copy Propagation pass"));
 static cl::opt<bool> DisablePartialLibcallInlining("disable-partial-libcall-inlining",
     cl::Hidden, cl::desc("Disable Partial Libcall Inlining"));
+static cl::opt<bool> EnableImplicitNullChecks(
+    "enable-implicit-null-checks",
+    cl::desc("Fold null checks into faulting memory operations"),
+    cl::init(false));
 static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden,
     cl::desc("Print LLVM IR produced by the loop-reduce pass"));
 static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden,
@@ -452,6 +457,9 @@ void TargetPassConfig::addCodeGenPrepare() {
 void TargetPassConfig::addISelPrepare() {
   addPreISel();
 
+  // Add both the safe stack and the stack protection passes: each of them will
+  // only protect functions that have corresponding attributes.
+  addPass(createSafeStackPass());
   addPass(createStackProtectorPass(TM));
 
   if (PrintISelInput)
@@ -543,6 +551,9 @@ void TargetPassConfig::addMachinePasses() {
   // Run pre-sched2 passes.
   addPreSched2();
 
+  if (EnableImplicitNullChecks)
+    addPass(&ImplicitNullChecksID);
+
   // Second pass scheduler.
   if (getOptLevel() != CodeGenOpt::None) {
     if (MISchedPostRA)
diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index ebe05e3..71c0a64 100644
--- a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -293,7 +293,7 @@ namespace {
     /// register of the last source.
     unsigned getReg() const { return Reg; }
   };
-}
+} // namespace
 
 char PeepholeOptimizer::ID = 0;
 char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID;
diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
index 55f08e4..6760b5f 100644
--- a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
+++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -184,7 +184,7 @@ namespace {
     void dumpSchedule() const;
     void emitNoop(unsigned CurCycle);
   };
-}
+} // namespace
 
 char &llvm::PostRASchedulerID = PostRAScheduler::ID;
 
@@ -257,7 +257,7 @@ bool PostRAScheduler::enablePostRAScheduler(
     TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const {
   Mode = ST.getAntiDepBreakMode();
   ST.getCriticalPathRCs(CriticalPathRCs);
-  return ST.enablePostMachineScheduler() &&
+  return ST.enablePostRAScheduler() &&
          OptLevel >= ST.getOptLevelToEnablePostRAScheduler();
 }
 
diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
index fd3d4d7..4a46638 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -194,7 +194,7 @@ namespace {
     bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg);
   };
   char RAFast::ID = 0;
-}
+} // namespace
 
 /// getStackSpaceFor - This allocates space for the specified virtual register
 /// to be held on the stack.
diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
index e513a4f..e2061fe 100644
--- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -2633,7 +2633,8 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
       // "overflow bit" 32. As a workaround we drop all subregister ranges
       // which means we loose some precision but are back to a well defined
       // state.
-      assert((CP.getNewRC()->getLaneMask() & 0x80000000u)
+      assert(TargetRegisterInfo::isImpreciseLaneMask(
+             CP.getNewRC()->getLaneMask())
              && "SubRange merge should only fail when merging into bit 32.");
       DEBUG(dbgs() << "\tSubrange join aborted!\n");
       LHS.clearSubRanges();
@@ -2696,7 +2697,7 @@ struct MBBPriorityInfo {
   MBBPriorityInfo(MachineBasicBlock *mbb, unsigned depth, bool issplit)
     : MBB(mbb), Depth(depth), IsSplit(issplit) {}
 };
-}
+} // namespace
 
 /// C-style comparator that sorts first based on the loop depth of the basic
 /// block (the unsigned), and then on the MBB number.
diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.h b/contrib/llvm/lib/CodeGen/RegisterCoalescer.h
index 04067a1..4ba7441 100644
--- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.h
+++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.h
@@ -111,6 +111,6 @@ namespace llvm {
     /// Return the register class of the coalesced register.
     const TargetRegisterClass *getNewRC() const { return NewRC; }
   };
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
index a34bd63..4176686 100644
--- a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -103,10 +103,6 @@ void RegScavenger::determineKillsAndDefs() {
 
   // Find out which registers are early clobbered, killed, defined, and marked
   // def-dead in this instruction.
-  // FIXME: The scavenger is not predication aware. If the instruction is
-  // predicated, conservatively assume "kill" markers do not actually kill the
-  // register. Similarly ignores "dead" markers.
-  bool isPred = TII->isPredicated(MI);
   KillRegUnits.reset();
   DefRegUnits.reset();
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -124,7 +120,7 @@ void RegScavenger::determineKillsAndDefs() {
       }
       
       // Apply the mask.
-      (isPred ? DefRegUnits : KillRegUnits) |= TmpRegUnits;
+      KillRegUnits |= TmpRegUnits;
     }
     if (!MO.isReg())
       continue;
@@ -136,11 +132,11 @@ void RegScavenger::determineKillsAndDefs() {
       // Ignore undef uses.
       if (MO.isUndef())
         continue;
-      if (!isPred && MO.isKill())
+      if (MO.isKill())
         addRegUnits(KillRegUnits, Reg);
     } else {
       assert(MO.isDef());
-      if (!isPred && MO.isDead())
+      if (MO.isDead())
         addRegUnits(KillRegUnits, Reg);
       else
         addRegUnits(DefRegUnits, Reg);
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index e8e47b7..ae4b935 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -574,11 +574,11 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
   int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset;
   int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset;
 
-  AliasAnalysis::AliasResult AAResult = AA->alias(
-      AliasAnalysis::Location(MMOa->getValue(), Overlapa,
-                              UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
-      AliasAnalysis::Location(MMOb->getValue(), Overlapb,
-                              UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
+  AliasAnalysis::AliasResult AAResult =
+      AA->alias(MemoryLocation(MMOa->getValue(), Overlapa,
+                               UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
+                MemoryLocation(MMOb->getValue(), Overlapb,
+                               UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
 
   return (AAResult != AliasAnalysis::NoAlias);
 }
@@ -1508,7 +1508,7 @@ public:
     return getCurr()->Preds.end();
   }
 };
-} // anonymous
+} // namespace
 
 static bool hasDataSucc(const SUnit *SU) {
   for (SUnit::const_succ_iterator
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
index b2e4617..cdf27ae 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -72,7 +72,7 @@ namespace llvm {
       return G->addCustomGraphFeatures(GW);
     }
   };
-}
+} // namespace llvm
 
 std::string DOTGraphTraits<ScheduleDAG*>::getNodeLabel(const SUnit *SU,
                                                        const ScheduleDAG *G) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a71c676..5fea52c 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -255,6 +255,7 @@ namespace {
     SDValue visitSRA(SDNode *N);
     SDValue visitSRL(SDNode *N);
     SDValue visitRotate(SDNode *N);
+    SDValue visitBSWAP(SDNode *N);
     SDValue visitCTLZ(SDNode *N);
     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
     SDValue visitCTTZ(SDNode *N);
@@ -387,6 +388,13 @@ namespace {
       unsigned SequenceNum;
     };
 
+    /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a
+    /// constant build_vector of the stored constant values in Stores.
+    SDValue getMergedConstantVectorStore(SelectionDAG &DAG,
+                                         SDLoc SL,
+                                         ArrayRef<MemOpLink> Stores,
+                                         EVT Ty) const;
+
     /// This is a helper function for MergeConsecutiveStores. When the source
     /// elements of the consecutive stores are all constants or all extracted
     /// vector elements, try to merge them into one larger store.
@@ -395,6 +403,13 @@ namespace {
                                          EVT MemVT, unsigned NumElem,
                                          bool IsConstantSrc, bool UseVector);
 
+    /// This is a helper function for MergeConsecutiveStores.
+    /// Stores that may be merged are placed in StoreNodes.
+    /// Loads that may alias with those stores are placed in AliasLoadNodes.
+    void getStoreMergeAndAliasCandidates(
+        StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
+        SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes);
+    
     /// Merge consecutive store operations into a wide store.
     /// This optimization uses wide integers or vectors when possible.
     /// \return True if some memory operations were changed.
@@ -444,7 +459,7 @@ namespace {
       return TLI.getSetCCResultType(*DAG.getContext(), VT);
     }
   };
-}
+} // namespace
 
 
 namespace {
@@ -460,7 +475,7 @@ public:
     DC.removeFromWorklist(N);
   }
 };
-}
+} // namespace
 
 //===----------------------------------------------------------------------===//
 //  TargetLowering::DAGCombinerInfo implementation
@@ -1335,6 +1350,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::SRL:                return visitSRL(N);
   case ISD::ROTR:
   case ISD::ROTL:               return visitRotate(N);
+  case ISD::BSWAP:              return visitBSWAP(N);
   case ISD::CTLZ:               return visitCTLZ(N);
   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
   case ISD::CTTZ:               return visitCTTZ(N);
@@ -1454,12 +1470,9 @@ SDValue DAGCombiner::combine(SDNode *N) {
     if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
       SDValue Ops[] = {N1, N0};
       SDNode *CSENode;
-      if (const BinaryWithFlagsSDNode *BinNode =
-              dyn_cast<BinaryWithFlagsSDNode>(N)) {
+      if (const auto *BinNode = dyn_cast<BinaryWithFlagsSDNode>(N)) {
         CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
-                                      BinNode->Flags.hasNoUnsignedWrap(),
-                                      BinNode->Flags.hasNoSignedWrap(),
-                                      BinNode->Flags.hasExact());
+                                      &BinNode->Flags);
       } else {
         CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops);
       }
@@ -4764,6 +4777,19 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitBSWAP(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // fold (bswap c1) -> c2
+  if (isConstantIntBuildVectorOrConstantInt(N0))
+    return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
+  // fold (bswap (bswap x)) -> x
+  if (N0.getOpcode() == ISD::BSWAP)
+    return N0->getOperand(0);
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
@@ -5141,7 +5167,7 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
   std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
 
   MachineMemOperand *MMO = DAG.getMachineFunction().
-    getMachineMemOperand(MSC->getPointerInfo(), 
+    getMachineMemOperand(MSC->getPointerInfo(),
                           MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
                           Alignment, MSC->getAAInfo(), MSC->getRanges());
 
@@ -5280,7 +5306,7 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) {
   std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
 
   MachineMemOperand *MMO = DAG.getMachineFunction().
-    getMachineMemOperand(MGT->getPointerInfo(), 
+    getMachineMemOperand(MGT->getPointerInfo(),
                           MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
                           Alignment, MGT->getAAInfo(), MGT->getRanges());
 
@@ -8078,7 +8104,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
       auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
       auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
       auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
-      
+
       // Check 1: Make sure that the first operand of the inner multiply is NOT
       // a constant. Otherwise, we may induce infinite looping.
       if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
@@ -9928,7 +9954,7 @@ struct LoadedSlice {
     return true;
   }
 };
-}
+} // namespace
 
 /// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
 /// \p UsedBits looks like 0..0 1..1 0..0.
@@ -10576,6 +10602,18 @@ struct BaseIndexOffset {
 };
 } // namespace
 
+SDValue DAGCombiner::getMergedConstantVectorStore(SelectionDAG &DAG,
+                                                  SDLoc SL,
+                                                  ArrayRef<MemOpLink> Stores,
+                                                  EVT Ty) const {
+  SmallVector<SDValue, 8> BuildVector;
+
+  for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I)
+    BuildVector.push_back(cast<StoreSDNode>(Stores[I].MemNode)->getValue());
+
+  return DAG.getNode(ISD::BUILD_VECTOR, SL, Ty, BuildVector);
+}
+
 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
                   SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
                   unsigned NumElem, bool IsConstantSrc, bool UseVector) {
@@ -10606,12 +10644,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
     EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
     assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
     if (IsConstantSrc) {
-      // A vector store with a constant source implies that the constant is
-      // zero; we only handle merging stores of constant zeros because the zero
-      // can be materialized without a load.
-      // It may be beneficial to loosen this restriction to allow non-zero
-      // store merging.
-      StoredVal = DAG.getConstant(0, DL, Ty);
+      StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Ty);
     } else {
       SmallVector<SDValue, 8> Ops;
       for (unsigned i = 0; i < NumElem ; ++i) {
@@ -10631,8 +10664,8 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
     // elements, so this path implies a store of constants.
     assert(IsConstantSrc && "Merged vector elements should use vector store");
 
-    unsigned StoreBW = NumElem * ElementSizeBytes * 8;
-    APInt StoreInt(StoreBW, 0);
+    unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
+    APInt StoreInt(SizeInBits, 0);
 
     // Construct a single integer constant which is made of the smaller
     // constant inputs.
@@ -10641,18 +10674,18 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
       unsigned Idx = IsLE ? (NumElem - 1 - i) : i;
       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
       SDValue Val = St->getValue();
-      StoreInt <<= ElementSizeBytes*8;
+      StoreInt <<= ElementSizeBytes * 8;
       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
-        StoreInt |= C->getAPIntValue().zext(StoreBW);
+        StoreInt |= C->getAPIntValue().zext(SizeInBits);
       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
-        StoreInt |= C->getValueAPF().bitcastToAPInt().zext(StoreBW);
+        StoreInt |= C->getValueAPF().bitcastToAPInt().zext(SizeInBits);
       } else {
         llvm_unreachable("Invalid constant element type");
       }
     }
 
     // Create the new Load and Store operations.
-    EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
+    EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
   }
 
@@ -10698,62 +10731,25 @@ static bool allowableAlignment(const SelectionDAG &DAG,
   return (Align >= ABIAlignment);
 }
 
-bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
-  if (OptLevel == CodeGenOpt::None)
-    return false;
-
-  EVT MemVT = St->getMemoryVT();
-  int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;
-  bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
-      Attribute::NoImplicitFloat);
-
-  // This function cannot currently deal with non-byte-sized memory sizes.
-  if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
-    return false;
-
-  // Don't merge vectors into wider inputs.
-  if (MemVT.isVector() || !MemVT.isSimple())
-    return false;
-
-  // Perform an early exit check. Do not bother looking at stored values that
-  // are not constants, loads, or extracted vector elements.
-  SDValue StoredVal = St->getValue();
-  bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
-  bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
-                       isa<ConstantFPSDNode>(StoredVal);
-  bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT);
-
-  if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc)
-    return false;
-
-  // Only look at ends of store sequences.
-  SDValue Chain = SDValue(St, 0);
-  if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
-    return false;
-
+void DAGCombiner::getStoreMergeAndAliasCandidates(
+    StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
+    SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes) {
   // This holds the base pointer, index, and the offset in bytes from the base
   // pointer.
   BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr());
 
   // We must have a base and an offset.
   if (!BasePtr.Base.getNode())
-    return false;
+    return;
 
   // Do not handle stores to undef base pointers.
   if (BasePtr.Base.getOpcode() == ISD::UNDEF)
-    return false;
-
-  // Save the LoadSDNodes that we find in the chain.
-  // We need to make sure that these nodes do not interfere with
-  // any of the store nodes.
-  SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
-
-  // Save the StoreSDNodes that we find in the chain.
-  SmallVector<MemOpLink, 8> StoreNodes;
+    return;
 
   // Walk up the chain and look for nodes with offsets from the same
   // base pointer. Stop when reaching an instruction with a different kind
   // or instruction which has a different base pointer.
+  EVT MemVT = St->getMemoryVT();
   unsigned Seq = 0;
   StoreSDNode *Index = St;
   while (Index) {
@@ -10810,7 +10806,51 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
       }
     }
   }
+}
+
+bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
+  if (OptLevel == CodeGenOpt::None)
+    return false;
+
+  EVT MemVT = St->getMemoryVT();
+  int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
+  bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
+      Attribute::NoImplicitFloat);
+
+  // This function cannot currently deal with non-byte-sized memory sizes.
+  if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
+    return false;
+
+  // Don't merge vectors into wider inputs.
+  if (MemVT.isVector() || !MemVT.isSimple())
+    return false;
+
+  // Perform an early exit check. Do not bother looking at stored values that
+  // are not constants, loads, or extracted vector elements.
+  SDValue StoredVal = St->getValue();
+  bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
+  bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
+                       isa<ConstantFPSDNode>(StoredVal);
+  bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT);
+
+  if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc)
+    return false;
+
+  // Only look at ends of store sequences.
+  SDValue Chain = SDValue(St, 0);
+  if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
+    return false;
+
+  // Save the LoadSDNodes that we find in the chain.
+  // We need to make sure that these nodes do not interfere with
+  // any of the store nodes.
+  SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
+  
+  // Save the StoreSDNodes that we find in the chain.
+  SmallVector<MemOpLink, 8> StoreNodes;
 
+  getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes);
+  
   // Check if there is anything to merge.
   if (StoreNodes.size() < 2)
     return false;
@@ -10876,8 +10916,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
       }
 
       // Find a legal type for the constant store.
-      unsigned StoreBW = (i+1) * ElementSizeBytes * 8;
-      EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
+      unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
+      EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
       if (TLI.isTypeLegal(StoreTy) &&
           allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS,
                              FirstStoreAlign)) {
@@ -11039,8 +11079,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
     }
 
     // Find a legal type for the integer store.
-    unsigned StoreBW = (i+1) * ElementSizeBytes * 8;
-    StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
+    unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
+    StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
     if (TLI.isTypeLegal(StoreTy) &&
         allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) &&
         allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign))
@@ -11094,8 +11134,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
   if (UseVectorTy) {
     JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
   } else {
-    unsigned StoreBW = NumElem * ElementSizeBytes * 8;
-    JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
+    unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
+    JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
   }
 
   SDLoc LoadDL(LoadNodes[0].MemNode);
@@ -12093,7 +12133,7 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
   }
 
   // If any of the operands is a floating point scalar bitcast to a vector,
-  // use floating point types throughout, and bitcast everything.  
+  // use floating point types throughout, and bitcast everything.
   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
   if (AnyFP) {
     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
@@ -12924,7 +12964,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
   SDValue RHS = N->getOperand(1);
   SDLoc dl(N);
 
-  // Make sure we're not running after operation legalization where it 
+  // Make sure we're not running after operation legalization where it
   // may have custom lowered the vector shuffles.
   if (LegalOperations)
     return SDValue();
@@ -13845,12 +13885,10 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
     int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) +
         Op1->getSrcValueOffset() - MinOffset;
     AliasAnalysis::AliasResult AAResult =
-        AA.alias(AliasAnalysis::Location(Op0->getMemOperand()->getValue(),
-                                         Overlap1,
-                                         UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
-                 AliasAnalysis::Location(Op1->getMemOperand()->getValue(),
-                                         Overlap2,
-                                         UseTBAA ? Op1->getAAInfo() : AAMDNodes()));
+        AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap1,
+                                UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
+                 MemoryLocation(Op1->getMemOperand()->getValue(), Overlap2,
+                                UseTBAA ? Op1->getAAInfo() : AAMDNodes()));
     if (AAResult == AliasAnalysis::NoAlias)
       return false;
   }
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index f3d75cb..ecaa2c9 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -259,20 +259,27 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
 
   // If this is an MSVC EH personality, we need to do a bit more work.
   EHPersonality Personality = EHPersonality::Unknown;
-  if (!LPads.empty())
-    Personality = classifyEHPersonality(LPads.back()->getPersonalityFn());
+  if (Fn->hasPersonalityFn())
+    Personality = classifyEHPersonality(Fn->getPersonalityFn());
   if (!isMSVCEHPersonality(Personality))
     return;
 
-  if (Personality == EHPersonality::MSVC_Win64SEH) {
+  if (Personality == EHPersonality::MSVC_Win64SEH ||
+      Personality == EHPersonality::MSVC_X86SEH) {
     addSEHHandlersForLPads(LPads);
-  } else if (Personality == EHPersonality::MSVC_CXX) {
+  }
+
+  WinEHFuncInfo &EHInfo = MMI.getWinEHFuncInfo(&fn);
+  if (Personality == EHPersonality::MSVC_CXX) {
     const Function *WinEHParentFn = MMI.getWinEHParent(&fn);
-    WinEHFuncInfo &EHInfo = MMI.getWinEHFuncInfo(WinEHParentFn);
     calculateWinCXXEHStateNumbers(WinEHParentFn, EHInfo);
+  }
 
-    // Copy the state numbers to LandingPadInfo for the current function, which
-    // could be a handler or the parent.
+  // Copy the state numbers to LandingPadInfo for the current function, which
+  // could be a handler or the parent. This should happen for 32-bit SEH and
+  // C++ EH.
+  if (Personality == EHPersonality::MSVC_CXX ||
+      Personality == EHPersonality::MSVC_X86SEH) {
     for (const LandingPadInst *LP : LPads) {
       MachineBasicBlock *LPadMBB = MBBMap[LP->getParent()];
       MMI.addWinEHState(LPadMBB, EHInfo.LandingPadStateMap[LP]);
@@ -539,8 +546,10 @@ void llvm::ComputeUsesVAFloatArgument(const CallInst &I,
 /// landingpad instruction and add them to the specified machine module info.
 void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI,
                              MachineBasicBlock *MBB) {
-  MMI.addPersonality(MBB,
-                     cast<Function>(I.getPersonalityFn()->stripPointerCasts()));
+  MMI.addPersonality(
+      MBB,
+      cast<Function>(
+          I.getParent()->getParent()->getPersonalityFn()->stripPointerCasts()));
 
   if (I.isCleanup())
     MMI.addCleanup(MBB);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
index 7b86f7d..2a61914 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -140,6 +140,6 @@ private:
                        DenseMap<SDValue, unsigned> &VRBaseMap);
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 7d98872..37f95e5 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -198,7 +198,7 @@ public:
     ReplacedNode(Old);
   }
 };
-}
+} // namespace
 
 /// Return a vector shuffle operation which
 /// performs the same shuffe in terms of order or result bytes, but on a type
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 9c29769..c3e3b7c 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -676,7 +676,7 @@ namespace {
       NodesToAnalyze.insert(N);
     }
   };
-}
+} // namespace
 
 
 /// ReplaceValueWith - The specified value was legalized to the specified other
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index c06227b..50ad239 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -1010,7 +1010,7 @@ SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
   return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
 }
 
-}
+} // namespace
 
 bool SelectionDAG::LegalizeVectors() {
   return VectorLegalizer(*this).Run();
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index c27f8de..9493532 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -119,6 +119,6 @@ public:
   bool isInvalidated() const { return Invalid; }
 };
 
-} // end llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 6351fa2..4c74182 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -180,6 +180,6 @@ namespace llvm {
     void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
                          MachineBasicBlock::iterator InsertPos);
   };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index cf51e75..0eff930 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -400,19 +400,24 @@ static void AddNodeIDOperands(FoldingSetNodeID &ID,
     ID.AddInteger(Op.getResNo());
   }
 }
+/// Add logical or fast math flag values to FoldingSetNodeID value.
+static void AddNodeIDFlags(FoldingSetNodeID &ID, unsigned Opcode,
+                           const SDNodeFlags *Flags) {
+  if (!Flags || !isBinOpWithFlags(Opcode))
+    return;
 
-static void AddBinaryNodeIDCustom(FoldingSetNodeID &ID, bool nuw, bool nsw,
-                                  bool exact) {
-  ID.AddBoolean(nuw);
-  ID.AddBoolean(nsw);
-  ID.AddBoolean(exact);
+  unsigned RawFlags = Flags->getRawFlags();
+  // If no flags are set, do not alter the ID. We must match the ID of nodes
+  // that were created without explicitly specifying flags. This also saves time
+  // and allows a gradual increase in API usage of the optional optimization
+  // flags.
+  if (RawFlags != 0)
+    ID.AddInteger(RawFlags);
 }
 
-/// AddBinaryNodeIDCustom - Add BinarySDNodes special infos
-static void AddBinaryNodeIDCustom(FoldingSetNodeID &ID, unsigned Opcode,
-                                  bool nuw, bool nsw, bool exact) {
-  if (isBinOpWithFlags(Opcode))
-    AddBinaryNodeIDCustom(ID, nuw, nsw, exact);
+static void AddNodeIDFlags(FoldingSetNodeID &ID, const SDNode *N) {
+  if (auto *Node = dyn_cast<BinaryWithFlagsSDNode>(N))
+    AddNodeIDFlags(ID, Node->getOpcode(), &Node->Flags);
 }
 
 static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC,
@@ -507,20 +512,6 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
     ID.AddInteger(ST->getPointerInfo().getAddrSpace());
     break;
   }
-  case ISD::SDIV:
-  case ISD::UDIV:
-  case ISD::SRA:
-  case ISD::SRL:
-  case ISD::MUL:
-  case ISD::ADD:
-  case ISD::SUB:
-  case ISD::SHL: {
-    const BinaryWithFlagsSDNode *BinNode = cast<BinaryWithFlagsSDNode>(N);
-    AddBinaryNodeIDCustom(
-        ID, N->getOpcode(), BinNode->Flags.hasNoUnsignedWrap(),
-        BinNode->Flags.hasNoSignedWrap(), BinNode->Flags.hasExact());
-    break;
-  }
   case ISD::ATOMIC_CMP_SWAP:
   case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
   case ISD::ATOMIC_SWAP:
@@ -564,6 +555,8 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
   }
   } // end switch (N->getOpcode())
 
+  AddNodeIDFlags(ID, N);
+
   // Target specific memory nodes could also have address spaces to check.
   if (N->isTargetMemoryOpcode())
     ID.AddInteger(cast<MemSDNode>(N)->getPointerInfo().getAddrSpace());
@@ -960,14 +953,16 @@ void SelectionDAG::allnodes_clear() {
 
 BinarySDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, SDLoc DL,
                                             SDVTList VTs, SDValue N1,
-                                            SDValue N2, bool nuw, bool nsw,
-                                            bool exact) {
+                                            SDValue N2,
+                                            const SDNodeFlags *Flags) {
   if (isBinOpWithFlags(Opcode)) {
+    // If no flags were passed in, use a default flags object.
+    SDNodeFlags F;
+    if (Flags == nullptr)
+      Flags = &F;
+
     BinaryWithFlagsSDNode *FN = new (NodeAllocator) BinaryWithFlagsSDNode(
-        Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2);
-    FN->Flags.setNoUnsignedWrap(nuw);
-    FN->Flags.setNoSignedWrap(nsw);
-    FN->Flags.setExact(exact);
+        Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2, *Flags);
 
     return FN;
   }
@@ -2932,6 +2927,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
       case ISD::TRUNCATE:
       case ISD::UINT_TO_FP:
       case ISD::SINT_TO_FP:
+      case ISD::BSWAP:
       case ISD::CTLZ:
       case ISD::CTLZ_ZERO_UNDEF:
       case ISD::CTTZ:
@@ -3081,6 +3077,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
     if (OpOpcode == ISD::UNDEF)
       return getUNDEF(VT);
     break;
+  case ISD::BSWAP:
+    assert(VT.isInteger() && VT == Operand.getValueType() &&
+           "Invalid BSWAP!");
+    assert((VT.getScalarSizeInBits() % 16 == 0) &&
+           "BSWAP types must be a multiple of 16 bits!");
+    if (OpOpcode == ISD::UNDEF)
+      return getUNDEF(VT);
+    break;
   case ISD::BITCAST:
     // Basic sanity checking.
     assert(VT.getSizeInBits() == Operand.getValueType().getSizeInBits()
@@ -3260,7 +3264,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT,
 }
 
 SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
-                              SDValue N2, bool nuw, bool nsw, bool exact) {
+                              SDValue N2, const SDNodeFlags *Flags) {
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
   switch (Opcode) {
@@ -3747,22 +3751,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
   // Memoize this node if possible.
   BinarySDNode *N;
   SDVTList VTs = getVTList(VT);
-  const bool BinOpHasFlags = isBinOpWithFlags(Opcode);
   if (VT != MVT::Glue) {
     SDValue Ops[] = {N1, N2};
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opcode, VTs, Ops);
-    if (BinOpHasFlags)
-      AddBinaryNodeIDCustom(ID, Opcode, nuw, nsw, exact);
+    AddNodeIDFlags(ID, Opcode, Flags);
     void *IP = nullptr;
     if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))
       return SDValue(E, 0);
 
-    N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact);
+    N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, Flags);
 
     CSEMap.InsertNode(N, IP);
   } else {
-    N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact);
+    N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, Flags);
   }
 
   InsertNode(N);
@@ -4023,10 +4025,10 @@ static bool isMemSrcFromString(SDValue Src, StringRef &Str) {
   return getConstantStringInfo(G->getGlobal(), Str, SrcDelta, false);
 }
 
-/// FindOptimalMemOpLowering - Determines the optimial series memory ops
-/// to replace the memset / memcpy. Return true if the number of memory ops
-/// is below the threshold. It returns the types of the sequence of
-/// memory ops to perform memset / memcpy by reference.
+/// Determines the optimal series of memory ops to replace the memset / memcpy.
+/// Return true if the number of memory ops is below the threshold (Limit).
+/// It returns the types of the sequence of memory ops to perform
+/// memset / memcpy by reference.
 static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
                                      unsigned Limit, uint64_t Size,
                                      unsigned DstAlign, unsigned SrcAlign,
@@ -6066,13 +6068,12 @@ SelectionDAG::getTargetInsertSubreg(int SRIdx, SDLoc DL, EVT VT,
 /// getNodeIfExists - Get the specified node if it's already available, or
 /// else return NULL.
 SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
-                                      ArrayRef<SDValue> Ops, bool nuw, bool nsw,
-                                      bool exact) {
+                                      ArrayRef<SDValue> Ops,
+                                      const SDNodeFlags *Flags) {
   if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) {
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opcode, VTList, Ops);
-    if (isBinOpWithFlags(Opcode))
-      AddBinaryNodeIDCustom(ID, nuw, nsw, exact);
+    AddNodeIDFlags(ID, Opcode, Flags);
     void *IP = nullptr;
     if (SDNode *E = FindNodeOrInsertPos(ID, DebugLoc(), IP))
       return E;
@@ -6133,7 +6134,7 @@ public:
     : SelectionDAG::DAGUpdateListener(d), UI(ui), UE(ue) {}
 };
 
-}
+} // namespace
 
 /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
 /// This can cause recursive merging of nodes in the DAG.
@@ -6343,7 +6344,7 @@ namespace {
   bool operator<(const UseMemo &L, const UseMemo &R) {
     return (intptr_t)L.User < (intptr_t)R.User;
   }
-}
+} // namespace
 
 /// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving
 /// uses of other values produced by From.getNode() alone.  The same value
@@ -6588,7 +6589,7 @@ namespace {
         VTs.push_back(MVT((MVT::SimpleValueType)i));
     }
   };
-}
+} // namespace
 
 static ManagedStatic<std::set<EVT, EVT::compareRawBits> > EVTs;
 static ManagedStatic<EVTArray> SimpleVTArray;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 8ba957d..8313a48 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -78,12 +78,16 @@ LimitFPPrecision("limit-float-precision",
                  cl::location(LimitFloatPrecision),
                  cl::init(0));
 
+static cl::opt<bool>
+EnableFMFInDAG("enable-fmf-dag", cl::init(false), cl::Hidden,
+                cl::desc("Enable fast-math-flags for DAG nodes"));
+
 // Limit the width of DAG chains. This is important in general to prevent
-// prevent DAG-based analysis from blowing up. For example, alias analysis and
+// DAG-based analysis from blowing up. For example, alias analysis and
 // load clustering may not complete in reasonable time. It is difficult to
 // recognize and avoid this situation within each individual analysis, and
 // future analyses are likely to have the same behavior. Limiting DAG width is
-// the safe approach, and will be especially important with global DAGs.
+// the safe approach and will be especially important with global DAGs.
 //
 // MaxParallelChains default is arbitrarily high to avoid affecting
 // optimization, but could be lowered to improve compile time. Any ld-ld-st-st
@@ -2148,6 +2152,8 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
   bool nuw = false;
   bool nsw = false;
   bool exact = false;
+  FastMathFlags FMF;
+
   if (const OverflowingBinaryOperator *OFBinOp =
           dyn_cast<const OverflowingBinaryOperator>(&I)) {
     nuw = OFBinOp->hasNoUnsignedWrap();
@@ -2156,9 +2162,22 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
   if (const PossiblyExactOperator *ExactOp =
           dyn_cast<const PossiblyExactOperator>(&I))
     exact = ExactOp->isExact();
-
+  if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(&I))
+    FMF = FPOp->getFastMathFlags();
+
+  SDNodeFlags Flags;
+  Flags.setExact(exact);
+  Flags.setNoSignedWrap(nsw);
+  Flags.setNoUnsignedWrap(nuw);
+  if (EnableFMFInDAG) {
+    Flags.setAllowReciprocal(FMF.allowReciprocal());
+    Flags.setNoInfs(FMF.noInfs());
+    Flags.setNoNaNs(FMF.noNaNs());
+    Flags.setNoSignedZeros(FMF.noSignedZeros());
+    Flags.setUnsafeAlgebra(FMF.unsafeAlgebra());
+  }
   SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(),
-                                     Op1, Op2, nuw, nsw, exact);
+                                     Op1, Op2, &Flags);
   setValue(&I, BinNodeValue);
 }
 
@@ -2206,9 +2225,12 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
             dyn_cast<const PossiblyExactOperator>(&I))
       exact = ExactOp->isExact();
   }
-
+  SDNodeFlags Flags;
+  Flags.setExact(exact);
+  Flags.setNoSignedWrap(nsw);
+  Flags.setNoUnsignedWrap(nuw);
   SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2,
-                            nuw, nsw, exact);
+                            &Flags);
   setValue(&I, Res);
 }
 
@@ -2892,7 +2914,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
     // Serialize volatile loads with other side effects.
     Root = getRoot();
   else if (AA->pointsToConstantMemory(
-             AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), AAInfo))) {
+               MemoryLocation(SV, AA->getTypeStoreSize(Ty), AAInfo))) {
     // Do not serialize (non-volatile) loads of constant memory with anything.
     Root = DAG.getEntryNode();
     ConstantMemory = true;
@@ -2907,8 +2929,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
     Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG);
 
   SmallVector<SDValue, 4> Values(NumValues);
-  SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
-                                          NumValues));
+  SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
   EVT PtrVT = Ptr.getValueType();
   unsigned ChainI = 0;
   for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
@@ -2972,8 +2993,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
   SDValue Ptr = getValue(PtrV);
 
   SDValue Root = getRoot();
-  SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
-                                          NumValues));
+  SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
   EVT PtrVT = Ptr.getValueType();
   bool isVolatile = I.isVolatile();
   bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr;
@@ -3141,10 +3161,8 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) {
   const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
 
   SDValue InChain = DAG.getRoot();
-  if (AA->pointsToConstantMemory(
-      AliasAnalysis::Location(PtrOperand,
-                              AA->getTypeStoreSize(I.getType()),
-                              AAInfo))) {
+  if (AA->pointsToConstantMemory(MemoryLocation(
+          PtrOperand, AA->getTypeStoreSize(I.getType()), AAInfo))) {
     // Do not serialize (non-volatile) loads of constant memory with anything.
     InChain = DAG.getEntryNode();
   }
@@ -3186,10 +3204,9 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
   Value *BasePtr = Ptr;
   bool UniformBase = getUniformBase(BasePtr, Base, Index, this);
   bool ConstantMemory = false;
-  if (UniformBase && AA->pointsToConstantMemory(
-      AliasAnalysis::Location(BasePtr,
-	                            AA->getTypeStoreSize(I.getType()),
-                              AAInfo))) {
+  if (UniformBase &&
+      AA->pointsToConstantMemory(
+          MemoryLocation(BasePtr, AA->getTypeStoreSize(I.getType()), AAInfo))) {
     // Do not serialize (non-volatile) loads of constant memory with anything.
     Root = DAG.getEntryNode();
     ConstantMemory = true;
@@ -4983,6 +5000,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     assert(Reg && "cannot get exception code on this platform");
     MVT PtrVT = TLI.getPointerTy();
     const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT);
+    assert(FuncInfo.MBB->isLandingPad() && "eh.exceptioncode in non-lpad");
     unsigned VReg = FuncInfo.MBB->addLiveIn(Reg, PtrRC);
     SDValue N =
         DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT);
@@ -7486,6 +7504,31 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
   const int64_t N = Clusters.size();
   const unsigned MinJumpTableSize = TLI.getMinimumJumpTableEntries();
 
+  // TotalCases[i]: Total nbr of cases in Clusters[0..i].
+  SmallVector<unsigned, 8> TotalCases(N);
+
+  for (unsigned i = 0; i < N; ++i) {
+    APInt Hi = Clusters[i].High->getValue();
+    APInt Lo = Clusters[i].Low->getValue();
+    TotalCases[i] = (Hi - Lo).getLimitedValue() + 1;
+    if (i != 0)
+      TotalCases[i] += TotalCases[i - 1];
+  }
+
+  if (N >= MinJumpTableSize && isDense(Clusters, &TotalCases[0], 0, N - 1)) {
+    // Cheap case: the whole range might be suitable for jump table.
+    CaseCluster JTCluster;
+    if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) {
+      Clusters[0] = JTCluster;
+      Clusters.resize(1);
+      return;
+    }
+  }
+
+  // The algorithm below is not suitable for -O0.
+  if (TM.getOptLevel() == CodeGenOpt::None)
+    return;
+
   // Split Clusters into minimum number of dense partitions. The algorithm uses
   // the same idea as Kannan & Proebsting "Correction to 'Producing Good Code
   // for the Case Statement'" (1994), but builds the MinPartitions array in
@@ -7499,16 +7542,6 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
   SmallVector<unsigned, 8> LastElement(N);
   // NumTables[i]: nbr of >= MinJumpTableSize partitions from Clusters[i..N-1].
   SmallVector<unsigned, 8> NumTables(N);
-  // TotalCases[i]: Total nbr of cases in Clusters[0..i].
-  SmallVector<unsigned, 8> TotalCases(N);
-
-  for (unsigned i = 0; i < N; ++i) {
-    APInt Hi = Clusters[i].High->getValue();
-    APInt Lo = Clusters[i].Low->getValue();
-    TotalCases[i] = (Hi - Lo).getLimitedValue() + 1;
-    if (i != 0)
-      TotalCases[i] += TotalCases[i - 1];
-  }
 
   // Base case: There is only one way to partition Clusters[N-1].
   MinPartitions[N - 1] = 1;
@@ -7696,6 +7729,10 @@ void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters,
     assert(Clusters[i-1].High->getValue().slt(Clusters[i].Low->getValue()));
 #endif
 
+  // The algorithm below is not suitable for -O0.
+  if (TM.getOptLevel() == CodeGenOpt::None)
+    return;
+
   // If target does not have legal shift left, do not emit bit tests at all.
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   EVT PTy = TLI.getPointerTy();
@@ -7959,6 +7996,18 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
   }
 }
 
+unsigned SelectionDAGBuilder::caseClusterRank(const CaseCluster &CC,
+                                              CaseClusterIt First,
+                                              CaseClusterIt Last) {
+  return std::count_if(First, Last + 1, [&](const CaseCluster &X) {
+    if (X.Weight != CC.Weight)
+      return X.Weight > CC.Weight;
+
+    // Ties are broken by comparing the case value.
+    return X.Low->getValue().slt(CC.Low->getValue());
+  });
+}
+
 void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
                                         const SwitchWorkListItem &W,
                                         Value *Cond,
@@ -7988,6 +8037,48 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
       RightWeight += (--FirstRight)->Weight;
     I++;
   }
+
+  for (;;) {
+    // Our binary search tree differs from a typical BST in that ours can have up
+    // to three values in each leaf. The pivot selection above doesn't take that
+    // into account, which means the tree might require more nodes and be less
+    // efficient. We compensate for this here.
+
+    unsigned NumLeft = LastLeft - W.FirstCluster + 1;
+    unsigned NumRight = W.LastCluster - FirstRight + 1;
+
+    if (std::min(NumLeft, NumRight) < 3 && std::max(NumLeft, NumRight) > 3) {
+      // If one side has less than 3 clusters, and the other has more than 3,
+      // consider taking a cluster from the other side.
+
+      if (NumLeft < NumRight) {
+        // Consider moving the first cluster on the right to the left side.
+        CaseCluster &CC = *FirstRight;
+        unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster);
+        unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft);
+        if (LeftSideRank <= RightSideRank) {
+          // Moving the cluster to the left does not demote it.
+          ++LastLeft;
+          ++FirstRight;
+          continue;
+        }
+      } else {
+        assert(NumRight < NumLeft);
+        // Consider moving the last element on the left to the right side.
+        CaseCluster &CC = *LastLeft;
+        unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft);
+        unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster);
+        if (RightSideRank <= LeftSideRank) {
+          // Moving the cluster to the right does not demot it.
+          --LastLeft;
+          --FirstRight;
+          continue;
+        }
+      }
+    }
+    break;
+  }
+
   assert(LastLeft + 1 == FirstRight);
   assert(LastLeft >= W.FirstCluster);
   assert(FirstRight <= W.LastCluster);
@@ -8111,11 +8202,8 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
     return;
   }
 
-  if (TM.getOptLevel() != CodeGenOpt::None) {
-    findJumpTables(Clusters, &SI, DefaultMBB);
-    findBitTestClusters(Clusters, &SI);
-  }
-
+  findJumpTables(Clusters, &SI, DefaultMBB);
+  findBitTestClusters(Clusters, &SI);
 
   DEBUG({
     dbgs() << "Case clusters: ";
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index f0c03af..f225d54 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -342,6 +342,11 @@ private:
   };
   typedef SmallVector<SwitchWorkListItem, 4> SwitchWorkList;
 
+  /// Determine the rank by weight of CC in [First,Last]. If CC has more weight
+  /// than each cluster in the range, its rank is 0.
+  static unsigned caseClusterRank(const CaseCluster &CC, CaseClusterIt First,
+                                  CaseClusterIt Last);
+
   /// Emit comparison and split W into two subtrees.
   void splitWorkItem(SwitchWorkList &WorkList, const SwitchWorkListItem &W,
                      Value *Cond, MachineBasicBlock *SwitchMBB);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 22f592a..c5562cd 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -307,7 +307,7 @@ namespace llvm {
            "Unknown sched type!");
     return createILPListDAGScheduler(IS, OptLevel);
   }
-}
+} // namespace llvm
 
 // EmitInstrWithCustomInserter - This method should be implemented by targets
 // that mark instructions with the 'usesCustomInserter' flag.  These
@@ -938,8 +938,10 @@ bool SelectionDAGISel::PrepareEHLandingPad() {
   // pad into several BBs.
   const BasicBlock *LLVMBB = MBB->getBasicBlock();
   const LandingPadInst *LPadInst = LLVMBB->getLandingPadInst();
-  MF->getMMI().addPersonality(
-      MBB, cast<Function>(LPadInst->getPersonalityFn()->stripPointerCasts()));
+  MF->getMMI().addPersonality(MBB, cast<Function>(LPadInst->getParent()
+                                                      ->getParent()
+                                                      ->getPersonalityFn()
+                                                      ->stripPointerCasts()));
   EHPersonality Personality = MF->getMMI().getPersonalityType();
 
   if (isMSVCEHPersonality(Personality)) {
@@ -2540,7 +2542,7 @@ public:
           J.setNode(E);
   }
 };
-}
+} // namespace
 
 SDNode *SelectionDAGISel::
 SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 4df5ede..19b5d16 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -132,7 +132,7 @@ namespace llvm {
                     "color=blue,style=dashed");
     }
   };
-}
+} // namespace llvm
 
 std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
                                                         const SelectionDAG *G) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 8bbfa01..a6b3fc6 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -113,84 +113,137 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType,
   llvm_unreachable("infinite loop?");
 }
 
+/// Utility function for reservePreviousStackSlotForValue. Tries to find
+/// stack slot index to which we have spilled value for previous statepoints.
+/// LookUpDepth specifies maximum DFS depth this function is allowed to look.
+static Optional<int> findPreviousSpillSlot(const Value *Val,
+                                           SelectionDAGBuilder &Builder,
+                                           int LookUpDepth) {
+  // Can not look any futher - give up now
+  if (LookUpDepth <= 0)
+    return Optional<int>();
+
+  // Spill location is known for gc relocates
+  if (isGCRelocate(Val)) {
+    GCRelocateOperands RelocOps(cast<Instruction>(Val));
+
+    FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap =
+        Builder.FuncInfo.StatepointRelocatedValues[RelocOps.getStatepoint()];
+
+    auto It = SpillMap.find(RelocOps.getDerivedPtr());
+    if (It == SpillMap.end())
+      return Optional<int>();
+
+    return It->second;
+  }
+
+  // Look through bitcast instructions.
+  if (const BitCastInst *Cast = dyn_cast<BitCastInst>(Val)) {
+    return findPreviousSpillSlot(Cast->getOperand(0), Builder, LookUpDepth - 1);
+  }
+
+  // Look through phi nodes
+  // All incoming values should have same known stack slot, otherwise result
+  // is unknown.
+  if (const PHINode *Phi = dyn_cast<PHINode>(Val)) {
+    Optional<int> MergedResult = None;
+
+    for (auto &IncomingValue : Phi->incoming_values()) {
+      Optional<int> SpillSlot =
+          findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth - 1);
+      if (!SpillSlot.hasValue())
+        return Optional<int>();
+
+      if (MergedResult.hasValue() && *MergedResult != *SpillSlot)
+        return Optional<int>();
+
+      MergedResult = SpillSlot;
+    }
+    return MergedResult;
+  }
+
+  // TODO: We can do better for PHI nodes. In cases like this:
+  //   ptr = phi(relocated_pointer, not_relocated_pointer)
+  //   statepoint(ptr)
+  // We will return that stack slot for ptr is unknown. And later we might
+  // assign different stack slots for ptr and relocated_pointer. This limits
+  // llvm's ability to remove redundant stores.
+  // Unfortunately it's hard to accomplish in current infrastructure.
+  // We use this function to eliminate spill store completely, while
+  // in example we still need to emit store, but instead of any location
+  // we need to use special "preferred" location.
+
+  // TODO: handle simple updates.  If a value is modified and the original
+  // value is no longer live, it would be nice to put the modified value in the
+  // same slot.  This allows folding of the memory accesses for some
+  // instructions types (like an increment).
+  //   statepoint (i)
+  //   i1 = i+1
+  //   statepoint (i1)
+  // However we need to be careful for cases like this:
+  //   statepoint(i)
+  //   i1 = i+1
+  //   statepoint(i, i1)
+  // Here we want to reserve spill slot for 'i', but not for 'i+1'. If we just
+  // put handling of simple modifications in this function like it's done
+  // for bitcasts we might end up reserving i's slot for 'i+1' because order in
+  // which we visit values is unspecified.
+
+  // Don't know any information about this instruction
+  return Optional<int>();
+}
+
 /// Try to find existing copies of the incoming values in stack slots used for
 /// statepoint spilling.  If we can find a spill slot for the incoming value,
 /// mark that slot as allocated, and reuse the same slot for this safepoint.
 /// This helps to avoid series of loads and stores that only serve to resuffle
 /// values on the stack between calls.
-static void reservePreviousStackSlotForValue(SDValue Incoming,
+static void reservePreviousStackSlotForValue(const Value *IncomingValue,
                                              SelectionDAGBuilder &Builder) {
 
+  SDValue Incoming = Builder.getValue(IncomingValue);
+
   if (isa<ConstantSDNode>(Incoming) || isa<FrameIndexSDNode>(Incoming)) {
     // We won't need to spill this, so no need to check for previously
     // allocated stack slots
     return;
   }
 
-  SDValue Loc = Builder.StatepointLowering.getLocation(Incoming);
-  if (Loc.getNode()) {
+  SDValue OldLocation = Builder.StatepointLowering.getLocation(Incoming);
+  if (OldLocation.getNode())
     // duplicates in input
     return;
-  }
-
-  // Search back for the load from a stack slot pattern to find the original
-  // slot we allocated for this value.  We could extend this to deal with
-  // simple modification patterns, but simple dealing with trivial load/store
-  // sequences helps a lot already.
-  if (LoadSDNode *Load = dyn_cast<LoadSDNode>(Incoming)) {
-    if (auto *FI = dyn_cast<FrameIndexSDNode>(Load->getBasePtr())) {
-      const int Index = FI->getIndex();
-      auto Itr = std::find(Builder.FuncInfo.StatepointStackSlots.begin(),
-                           Builder.FuncInfo.StatepointStackSlots.end(), Index);
-      if (Itr == Builder.FuncInfo.StatepointStackSlots.end()) {
-        // not one of the lowering stack slots, can't reuse!
-        // TODO: Actually, we probably could reuse the stack slot if the value
-        // hasn't changed at all, but we'd need to look for intervening writes
-        return;
-      } else {
-        // This is one of our dedicated lowering slots
-        const int Offset =
-            std::distance(Builder.FuncInfo.StatepointStackSlots.begin(), Itr);
-        if (Builder.StatepointLowering.isStackSlotAllocated(Offset)) {
-          // stack slot already assigned to someone else, can't use it!
-          // TODO: currently we reserve space for gc arguments after doing
-          // normal allocation for deopt arguments.  We should reserve for
-          // _all_ deopt and gc arguments, then start allocating.  This
-          // will prevent some moves being inserted when vm state changes,
-          // but gc state doesn't between two calls.
-          return;
-        }
-        // Reserve this stack slot
-        Builder.StatepointLowering.reserveStackSlot(Offset);
-      }
 
-      // Cache this slot so we find it when going through the normal
-      // assignment loop.
-      SDValue Loc =
-          Builder.DAG.getTargetFrameIndex(Index, Incoming.getValueType());
+  const int LookUpDepth = 6;
+  Optional<int> Index =
+      findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth);
+  if (!Index.hasValue())
+    return;
 
-      Builder.StatepointLowering.setLocation(Incoming, Loc);
-    }
+  auto Itr = std::find(Builder.FuncInfo.StatepointStackSlots.begin(),
+                       Builder.FuncInfo.StatepointStackSlots.end(), *Index);
+  assert(Itr != Builder.FuncInfo.StatepointStackSlots.end() &&
+         "value spilled to the unknown stack slot");
+
+  // This is one of our dedicated lowering slots
+  const int Offset =
+      std::distance(Builder.FuncInfo.StatepointStackSlots.begin(), Itr);
+  if (Builder.StatepointLowering.isStackSlotAllocated(Offset)) {
+    // stack slot already assigned to someone else, can't use it!
+    // TODO: currently we reserve space for gc arguments after doing
+    // normal allocation for deopt arguments.  We should reserve for
+    // _all_ deopt and gc arguments, then start allocating.  This
+    // will prevent some moves being inserted when vm state changes,
+    // but gc state doesn't between two calls.
+    return;
   }
+  // Reserve this stack slot
+  Builder.StatepointLowering.reserveStackSlot(Offset);
 
-  // TODO: handle case where a reloaded value flows through a phi to
-  // another safepoint.  e.g.
-  // bb1:
-  //  a' = relocated...
-  // bb2: % pred: bb1, bb3, bb4, etc.
-  //  a_phi = phi(a', ...)
-  // statepoint ... a_phi
-  // NOTE: This will require reasoning about cross basic block values.  This is
-  // decidedly non trivial and this might not be the right place to do it.  We
-  // don't really have the information we need here...
-
-  // TODO: handle simple updates.  If a value is modified and the original
-  // value is no longer live, it would be nice to put the modified value in the
-  // same slot.  This allows folding of the memory accesses for some
-  // instructions types (like an increment).
-  // statepoint (i)
-  // i1 = i+1
-  // statepoint (i1)
+  // Cache this slot so we find it when going through the normal
+  // assignment loop.
+  SDValue Loc = Builder.DAG.getTargetFrameIndex(*Index, Incoming.getValueType());
+  Builder.StatepointLowering.setLocation(Incoming, Loc);
 }
 
 /// Remove any duplicate (as SDValues) from the derived pointer pairs.  This
@@ -319,8 +372,7 @@ static void getIncomingStatepointGCValues(
     SmallVectorImpl<const Value *> &Bases, SmallVectorImpl<const Value *> &Ptrs,
     SmallVectorImpl<const Value *> &Relocs, ImmutableStatepoint StatepointSite,
     SelectionDAGBuilder &Builder) {
-  for (GCRelocateOperands relocateOpers :
-       StatepointSite.getRelocates(StatepointSite)) {
+  for (GCRelocateOperands relocateOpers : StatepointSite.getRelocates()) {
     Relocs.push_back(relocateOpers.getUnderlyingCallSite().getInstruction());
     Bases.push_back(relocateOpers.getBasePtr());
     Ptrs.push_back(relocateOpers.getDerivedPtr());
@@ -458,15 +510,11 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
   // doesn't change semantics at all.  It is important for performance that we
   // reserve slots for both deopt and gc values before lowering either.
   for (const Value *V : StatepointSite.vm_state_args()) {
-    SDValue Incoming = Builder.getValue(V);
-    reservePreviousStackSlotForValue(Incoming, Builder);
+    reservePreviousStackSlotForValue(V, Builder);
   }
   for (unsigned i = 0; i < Bases.size(); ++i) {
-    const Value *Base = Bases[i];
-    reservePreviousStackSlotForValue(Builder.getValue(Base), Builder);
-
-    const Value *Ptr = Ptrs[i];
-    reservePreviousStackSlotForValue(Builder.getValue(Ptr), Builder);
+    reservePreviousStackSlotForValue(Bases[i], Builder);
+    reservePreviousStackSlotForValue(Ptrs[i], Builder);
   }
 
   // First, prefix the list with the number of unique values to be
@@ -524,8 +572,7 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
   FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap =
     Builder.FuncInfo.StatepointRelocatedValues[StatepointInstr];
 
-  for (GCRelocateOperands RelocateOpers :
-       StatepointSite.getRelocates(StatepointSite)) {
+  for (GCRelocateOperands RelocateOpers : StatepointSite.getRelocates()) {
     const Value *V = RelocateOpers.getDerivedPtr();
     SDValue SDV = Builder.getValue(V);
     SDValue Loc = Builder.StatepointLowering.getLocation(SDV);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 9daf2a5..c70c3a2 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2671,8 +2671,9 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, SDLoc dl,
     // TODO: For UDIV use SRL instead of SRA.
     SDValue Amt =
         DAG.getConstant(ShAmt, dl, getShiftAmountTy(Op1.getValueType()));
-    Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, false, false,
-                      true);
+    SDNodeFlags Flags;
+    Flags.setExact(true);
+    Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, &Flags);
     d = d.ashr(ShAmt);
   }
 
diff --git a/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
index 7c0b2bb..d60e5f9 100644
--- a/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -59,7 +59,7 @@ private:
                                       Type *Ty, Value *BasePtr, int Idx1, int Idx2,
                                       const char *Name);
 };
-}
+} // namespace
 
 INITIALIZE_PASS_BEGIN(ShadowStackGCLowering, "shadow-stack-gc-lowering",
                       "Shadow Stack GC Lowering", false, false)
@@ -144,10 +144,14 @@ public:
       BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F);
       Type *ExnTy =
           StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C), nullptr);
-      Constant *PersFn = F.getParent()->getOrInsertFunction(
-          "__gcc_personality_v0", FunctionType::get(Type::getInt32Ty(C), true));
+      if (!F.hasPersonalityFn()) {
+        Constant *PersFn = F.getParent()->getOrInsertFunction(
+            "__gcc_personality_v0",
+            FunctionType::get(Type::getInt32Ty(C), true));
+        F.setPersonalityFn(PersFn);
+      }
       LandingPadInst *LPad =
-          LandingPadInst::Create(ExnTy, PersFn, 1, "cleanup.lpad", CleanupBB);
+          LandingPadInst::Create(ExnTy, 1, "cleanup.lpad", CleanupBB);
       LPad->setCleanup(true);
       ResumeInst *RI = ResumeInst::Create(LPad, CleanupBB);
 
@@ -185,7 +189,7 @@ public:
     }
   }
 };
-}
+} // namespace
 
 
 Constant *ShadowStackGCLowering::GetFrameMap(Function &F) {
diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
index 42d277e..116fd5b 100644
--- a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
@@ -227,7 +227,7 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
   // Personality function
   IRBuilder<> Builder(EntryBB->getTerminator());
   if (!PersonalityFn)
-    PersonalityFn = LPads[0]->getPersonalityFn();
+    PersonalityFn = F.getPersonalityFn();
   Value *PersonalityFieldPtr = Builder.CreateConstGEP2_32(
       FunctionContextTy, FuncCtx, 0, 3, "pers_fn_gep");
   Builder.CreateStore(
diff --git a/contrib/llvm/lib/CodeGen/Spiller.h b/contrib/llvm/lib/CodeGen/Spiller.h
index 08f99ec..b1019c1 100644
--- a/contrib/llvm/lib/CodeGen/Spiller.h
+++ b/contrib/llvm/lib/CodeGen/Spiller.h
@@ -37,6 +37,6 @@ namespace llvm {
                                MachineFunction &mf,
                                VirtRegMap &vrm);
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm/lib/CodeGen/SplitKit.h
index a062763..4eaf03e 100644
--- a/contrib/llvm/lib/CodeGen/SplitKit.h
+++ b/contrib/llvm/lib/CodeGen/SplitKit.h
@@ -466,6 +466,6 @@ public:
                         unsigned IntvOut, SlotIndex EnterAfter);
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp b/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp
index 95dfd75..b9523e5 100644
--- a/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp
+++ b/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp
@@ -45,7 +45,7 @@ public:
     return (1 == PT->getAddressSpace());
   }
 };
-}
+} // namespace
 
 static GCRegistry::Add<StatepointGC> X("statepoint-example",
                                        "an example strategy for statepoint");
diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
index 23f41c8..164badd 100644
--- a/contrib/llvm/lib/CodeGen/TailDuplication.cpp
+++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
@@ -125,7 +125,7 @@ namespace {
   };
 
   char TailDuplicatePass::ID = 0;
-}
+} // namespace
 
 char &llvm::TailDuplicateID = TailDuplicatePass::ID;
 
diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
index c809087..97ca025 100644
--- a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -19,6 +19,7 @@
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
 #include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetSchedule.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCInstrItineraries.h"
@@ -219,9 +220,8 @@ TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
   return !isPredicated(MI);
 }
 
-
-bool TargetInstrInfo::PredicateInstruction(MachineInstr *MI,
-                            const SmallVectorImpl<MachineOperand> &Pred) const {
+bool TargetInstrInfo::PredicateInstruction(
+    MachineInstr *MI, ArrayRef<MachineOperand> Pred) const {
   bool MadeChange = false;
 
   assert(!MI->isBundle() &&
@@ -802,9 +802,10 @@ getInstrLatency(const InstrItineraryData *ItinData,
   return ItinData->getStageLatency(MI->getDesc().getSchedClass());
 }
 
-bool TargetInstrInfo::hasLowDefLatency(const InstrItineraryData *ItinData,
+bool TargetInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
                                        const MachineInstr *DefMI,
                                        unsigned DefIdx) const {
+  const InstrItineraryData *ItinData = SchedModel.getInstrItineraries();
   if (!ItinData || ItinData->isEmpty())
     return false;
 
diff --git a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
index d393e10..5c54cdb 100644
--- a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
@@ -51,7 +51,7 @@ namespace {
       AU.addPreserved<DominatorTreeWrapperPass>();
     }
   };
-}
+} // namespace
 char UnreachableBlockElim::ID = 0;
 INITIALIZE_PASS(UnreachableBlockElim, "unreachableblockelim",
                 "Remove unreachable blocks from the CFG", false, false)
diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
index 32d5100..2912bdd 100644
--- a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -167,6 +167,7 @@ class VirtRegRewriter : public MachineFunctionPass {
 
   void rewrite();
   void addMBBLiveIns();
+  bool readsUndefSubreg(const MachineOperand &MO) const;
 public:
   static char ID;
   VirtRegRewriter() : MachineFunctionPass(ID) {}
@@ -288,6 +289,31 @@ void VirtRegRewriter::addMBBLiveIns() {
     MBB.sortUniqueLiveIns();
 }
 
+/// Returns true if the given machine operand \p MO only reads undefined lanes.
+/// The function only works for use operands with a subregister set.
+bool VirtRegRewriter::readsUndefSubreg(const MachineOperand &MO) const {
+  // Shortcut if the operand is already marked undef.
+  if (MO.isUndef())
+    return true;
+
+  unsigned Reg = MO.getReg();
+  const LiveInterval &LI = LIS->getInterval(Reg);
+  const MachineInstr &MI = *MO.getParent();
+  SlotIndex BaseIndex = LIS->getInstructionIndex(&MI);
+  // This code is only meant to handle reading undefined subregisters which
+  // we couldn't properly detect before.
+  assert(LI.liveAt(BaseIndex) &&
+         "Reads of completely dead register should be marked undef already");
+  unsigned SubRegIdx = MO.getSubReg();
+  unsigned UseMask = TRI->getSubRegIndexLaneMask(SubRegIdx);
+  // See if any of the relevant subregister liveranges is defined at this point.
+  for (const LiveInterval::SubRange &SR : LI.subranges()) {
+    if ((SR.LaneMask & UseMask) != 0 && SR.liveAt(BaseIndex))
+      return false;
+  }
+  return true;
+}
+
 void VirtRegRewriter::rewrite() {
   bool NoSubRegLiveness = !MRI->subRegLivenessEnabled();
   SmallVector<unsigned, 8> SuperDeads;
@@ -367,32 +393,51 @@ void VirtRegRewriter::rewrite() {
         assert(!MRI->isReserved(PhysReg) && "Reserved register assignment");
 
         // Preserve semantics of sub-register operands.
-        if (MO.getSubReg()) {
-          // A virtual register kill refers to the whole register, so we may
-          // have to add <imp-use,kill> operands for the super-register.  A
-          // partial redef always kills and redefines the super-register.
-          if (NoSubRegLiveness && MO.readsReg()
-              && (MO.isDef() || MO.isKill()))
-            SuperKills.push_back(PhysReg);
-
-          if (MO.isDef()) {
-            // The <def,undef> flag only makes sense for sub-register defs, and
-            // we are substituting a full physreg.  An <imp-use,kill> operand
-            // from the SuperKills list will represent the partial read of the
-            // super-register.
-            MO.setIsUndef(false);
-
-            // Also add implicit defs for the super-register.
-            if (NoSubRegLiveness) {
+        unsigned SubReg = MO.getSubReg();
+        if (SubReg != 0) {
+          if (NoSubRegLiveness) {
+            // A virtual register kill refers to the whole register, so we may
+            // have to add <imp-use,kill> operands for the super-register.  A
+            // partial redef always kills and redefines the super-register.
+            if (MO.readsReg() && (MO.isDef() || MO.isKill()))
+              SuperKills.push_back(PhysReg);
+
+            if (MO.isDef()) {
+              // Also add implicit defs for the super-register.
               if (MO.isDead())
                 SuperDeads.push_back(PhysReg);
               else
                 SuperDefs.push_back(PhysReg);
             }
+          } else {
+            if (MO.isUse()) {
+              if (readsUndefSubreg(MO))
+                // We need to add an <undef> flag if the subregister is
+                // completely undefined (and we are not adding super-register
+                // defs).
+                MO.setIsUndef(true);
+            } else if (!MO.isDead()) {
+              assert(MO.isDef());
+              // Things get tricky when we ran out of lane mask bits and
+              // merged multiple lanes into the overflow bit: In this case
+              // our subregister liveness tracking isn't precise and we can't
+              // know what subregister parts are undefined, fall back to the
+              // implicit super-register def then.
+              unsigned LaneMask = TRI->getSubRegIndexLaneMask(SubReg);
+              if (TargetRegisterInfo::isImpreciseLaneMask(LaneMask))
+                SuperDefs.push_back(PhysReg);
+            }
           }
 
+          // The <def,undef> flag only makes sense for sub-register defs, and
+          // we are substituting a full physreg.  An <imp-use,kill> operand
+          // from the SuperKills list will represent the partial read of the
+          // super-register.
+          if (MO.isDef())
+            MO.setIsUndef(false);
+
           // PhysReg operands cannot have subregister indexes.
-          PhysReg = TRI->getSubReg(PhysReg, MO.getSubReg());
+          PhysReg = TRI->getSubReg(PhysReg, SubReg);
           assert(PhysReg && "Invalid SubReg for physical register");
           MO.setSubReg(0);
         }
diff --git a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
index c2b3d84..8c932cf 100644
--- a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
@@ -24,6 +24,7 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
@@ -110,7 +111,7 @@ private:
   bool outlineHandler(ActionHandler *Action, Function *SrcFn,
                       LandingPadInst *LPad, BasicBlock *StartBB,
                       FrameVarInfoMap &VarInfo);
-  void addStubInvokeToHandlerIfNeeded(Function *Handler, Value *PersonalityFn);
+  void addStubInvokeToHandlerIfNeeded(Function *Handler);
 
   void mapLandingPadBlocks(LandingPadInst *LPad, LandingPadActions &Actions);
   CatchHandler *findCatchHandler(BasicBlock *BB, BasicBlock *&NextBB,
@@ -124,6 +125,7 @@ private:
 
   // All fields are reset by runOnFunction.
   DominatorTree *DT = nullptr;
+  const TargetLibraryInfo *LibInfo = nullptr;
   EHPersonality Personality = EHPersonality::Unknown;
   CatchHandlerMapTy CatchHandlerMap;
   CleanupHandlerMapTy CleanupHandlerMap;
@@ -377,13 +379,14 @@ bool WinEHPrepare::runOnFunction(Function &Fn) {
     return false;
 
   // Classify the personality to see what kind of preparation we need.
-  Personality = classifyEHPersonality(LPads.back()->getPersonalityFn());
+  Personality = classifyEHPersonality(Fn.getPersonalityFn());
 
   // Do nothing if this is not an MSVC personality.
   if (!isMSVCEHPersonality(Personality))
     return false;
 
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
 
   // If there were any landing pads, prepareExceptionHandlers will make changes.
   prepareExceptionHandlers(Fn, LPads);
@@ -394,6 +397,7 @@ bool WinEHPrepare::doFinalization(Module &M) { return false; }
 
 void WinEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<DominatorTreeWrapperPass>();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
 }
 
 static bool isSelectorDispatch(BasicBlock *BB, BasicBlock *&CatchHandler,
@@ -1016,10 +1020,17 @@ bool WinEHPrepare::prepareExceptionHandlers(
   Builder.CreateCall(FrameEscapeFn, AllocasToEscape);
 
   if (SEHExceptionCodeSlot) {
-    if (SEHExceptionCodeSlot->hasNUses(0))
-      SEHExceptionCodeSlot->eraseFromParent();
-    else if (isAllocaPromotable(SEHExceptionCodeSlot))
+    if (isAllocaPromotable(SEHExceptionCodeSlot)) {
+      SmallPtrSet<BasicBlock *, 4> UserBlocks;
+      for (User *U : SEHExceptionCodeSlot->users()) {
+        if (auto *Inst = dyn_cast<Instruction>(U))
+          UserBlocks.insert(Inst->getParent());
+      }
       PromoteMemToReg(SEHExceptionCodeSlot, *DT);
+      // After the promotion, kill off dead instructions.
+      for (BasicBlock *BB : UserBlocks)
+        SimplifyInstructionsInBlock(BB, LibInfo);
+    }
   }
 
   // Clean up the handler action maps we created for this function
@@ -1029,6 +1040,7 @@ bool WinEHPrepare::prepareExceptionHandlers(
   CleanupHandlerMap.clear();
   HandlerToParentFP.clear();
   DT = nullptr;
+  LibInfo = nullptr;
   SEHExceptionCodeSlot = nullptr;
   EHBlocks.clear();
   NormalBlocks.clear();
@@ -1143,7 +1155,6 @@ void WinEHPrepare::completeNestedLandingPad(Function *ParentFn,
   ++II;
   // The instruction after the landing pad should now be a call to eh.actions.
   const Instruction *Recover = II;
-  assert(match(Recover, m_Intrinsic<Intrinsic::eh_actions>()));
   const IntrinsicInst *EHActions = cast<IntrinsicInst>(Recover);
 
   // Remap the return target in the nested handler.
@@ -1254,8 +1265,7 @@ static bool isCatchBlock(BasicBlock *BB) {
   return false;
 }
 
-static BasicBlock *createStubLandingPad(Function *Handler,
-                                        Value *PersonalityFn) {
+static BasicBlock *createStubLandingPad(Function *Handler) {
   // FIXME: Finish this!
   LLVMContext &Context = Handler->getContext();
   BasicBlock *StubBB = BasicBlock::Create(Context, "stub");
@@ -1264,7 +1274,7 @@ static BasicBlock *createStubLandingPad(Function *Handler,
   LandingPadInst *LPad = Builder.CreateLandingPad(
       llvm::StructType::get(Type::getInt8PtrTy(Context),
                             Type::getInt32Ty(Context), nullptr),
-      PersonalityFn, 0);
+      0);
   // Insert a call to llvm.eh.actions so that we don't try to outline this lpad.
   Function *ActionIntrin =
       Intrinsic::getDeclaration(Handler->getParent(), Intrinsic::eh_actions);
@@ -1279,8 +1289,7 @@ static BasicBlock *createStubLandingPad(Function *Handler,
 // landing pad if none is found.  The code that generates the .xdata tables for
 // the handler needs at least one landing pad to identify the parent function's
 // personality.
-void WinEHPrepare::addStubInvokeToHandlerIfNeeded(Function *Handler,
-                                                  Value *PersonalityFn) {
+void WinEHPrepare::addStubInvokeToHandlerIfNeeded(Function *Handler) {
   ReturnInst *Ret = nullptr;
   UnreachableInst *Unreached = nullptr;
   for (BasicBlock &BB : *Handler) {
@@ -1312,7 +1321,7 @@ void WinEHPrepare::addStubInvokeToHandlerIfNeeded(Function *Handler,
   // parent block.  We want to replace that with an invoke call, so we can
   // erase it now.
   OldRetBB->getTerminator()->eraseFromParent();
-  BasicBlock *StubLandingPad = createStubLandingPad(Handler, PersonalityFn);
+  BasicBlock *StubLandingPad = createStubLandingPad(Handler);
   Function *F =
       Intrinsic::getDeclaration(Handler->getParent(), Intrinsic::donothing);
   InvokeInst::Create(F, NewRetBB, StubLandingPad, None, "", OldRetBB);
@@ -1368,6 +1377,7 @@ bool WinEHPrepare::outlineHandler(ActionHandler *Action, Function *SrcFn,
     Handler = createHandlerFunc(Type::getVoidTy(Context),
                                 SrcFn->getName() + ".cleanup", M, ParentFP);
   }
+  Handler->setPersonalityFn(SrcFn->getPersonalityFn());
   HandlerToParentFP[Handler] = ParentFP;
   Handler->addFnAttr("wineh-parent", SrcFn->getName());
   BasicBlock *Entry = &Handler->getEntryBlock();
@@ -1445,7 +1455,7 @@ bool WinEHPrepare::outlineHandler(ActionHandler *Action, Function *SrcFn,
   ClonedEntryBB->eraseFromParent();
 
   // Make sure we can identify the handler's personality later.
-  addStubInvokeToHandlerIfNeeded(Handler, LPad->getPersonalityFn());
+  addStubInvokeToHandlerIfNeeded(Handler);
 
   if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
     WinEHCatchDirector *CatchDirector =
@@ -2286,7 +2296,7 @@ void WinEHPrepare::findCleanupHandlers(LandingPadActions &Actions,
         // value for this block but the value is a nullptr.  This means that
         // we have previously analyzed the block and determined that it did
         // not contain any cleanup code.  Based on the earlier analysis, we
-        // know the the block must end in either an unconditional branch, a
+        // know the block must end in either an unconditional branch, a
         // resume or a conditional branch that is predicated on a comparison
         // with a selector.  Either the resume or the selector dispatch
         // would terminate the search for cleanup code, so the unconditional
@@ -2454,6 +2464,8 @@ void WinEHPrepare::findCleanupHandlers(LandingPadActions &Actions,
 void llvm::parseEHActions(
     const IntrinsicInst *II,
     SmallVectorImpl<std::unique_ptr<ActionHandler>> &Actions) {
+  assert(II->getIntrinsicID() == Intrinsic::eh_actions &&
+         "attempted to parse non eh.actions intrinsic");
   for (unsigned I = 0, E = II->getNumArgOperands(); I != E;) {
     uint64_t ActionKind =
         cast<ConstantInt>(II->getArgOperand(I))->getZExtValue();
@@ -2506,7 +2518,7 @@ struct WinEHNumbering {
   void calculateStateNumbers(const Function &F);
   void findActionRootLPads(const Function &F);
 };
-}
+} // namespace
 
 void WinEHNumbering::createUnwindMapEntry(int ToState, ActionHandler *AH) {
   WinEHUnwindMapEntry UME;
@@ -2766,7 +2778,6 @@ void WinEHNumbering::calculateStateNumbers(const Function &F) {
     auto *ActionsCall = dyn_cast<IntrinsicInst>(LPI->getNextNode());
     if (!ActionsCall)
       continue;
-    assert(ActionsCall->getIntrinsicID() == Intrinsic::eh_actions);
     parseEHActions(ActionsCall, ActionList);
     if (ActionList.empty())
       continue;
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
index 8ae0543..fd33c7d 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
@@ -129,4 +129,4 @@ void DWARFAcceleratorTable::dump(raw_ostream &OS) const {
     }
   }
 }
-}
+} // namespace llvm
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
index baab387..32654f8 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -674,7 +674,7 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
         uint64_t SymAddr = 0;
         uint64_t SectionLoadAddress = 0;
         object::symbol_iterator Sym = Reloc.getSymbol();
-        object::section_iterator RSec = Reloc.getSection();
+        object::section_iterator RSec = Obj.section_end();
 
         // First calculate the address of the symbol or section as it appears
         // in the objct file
@@ -682,8 +682,13 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
           Sym->getAddress(SymAddr);
           // Also remember what section this symbol is in for later
           Sym->getSection(RSec);
-        } else if (RSec != Obj.section_end())
+        } else if (auto *MObj = dyn_cast<MachOObjectFile>(&Obj)) {
+          // MachO also has relocations that point to sections and
+          // scattered relocations.
+          // FIXME: We are not handling scattered relocations, do we have to?
+          RSec = MObj->getRelocationSection(Reloc.getRawDataRefImpl());
           SymAddr = RSec->getAddress();
+        }
 
         // If we are given load addresses for the sections, we need to adjust:
         // SymAddr = (Address of Symbol Or Section in File) -
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
index 53a676e..48e1d55 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
@@ -61,7 +61,7 @@ ArrayRef<uint8_t> makeFixedFormSizesArrayRef() {
   };
   return makeArrayRef(sizes);
 }
-}
+} // namespace
 
 ArrayRef<uint8_t> DWARFFormValue::getFixedFormSizes(uint8_t AddrSize,
                                                     uint16_t Version) {
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/SyntaxHighlighting.h b/contrib/llvm/lib/DebugInfo/DWARF/SyntaxHighlighting.h
index 946a313..84afd37 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/SyntaxHighlighting.h
+++ b/contrib/llvm/lib/DebugInfo/DWARF/SyntaxHighlighting.h
@@ -32,8 +32,8 @@ public:
   llvm::raw_ostream& get() { return OS; }
   operator llvm::raw_ostream& () { return OS; }
 };
-}
-}
-}
+} // namespace syntax
+} // namespace dwarf
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFunc.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
index 0aff327..8f56de8 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
@@ -80,7 +80,7 @@ private:
   ArgListType Args;
   ArgListType::const_iterator CurIter;
 };
-}
+} // namespace
 
 PDBSymbolFunc::PDBSymbolFunc(const IPDBSession &PDBSession,
                              std::unique_ptr<IPDBRawSymbol> Symbol)
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
index af3563f..fcee182 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
@@ -63,7 +63,7 @@ private:
   const IPDBSession &Session;
   std::unique_ptr<ArgEnumeratorType> Enumerator;
 };
-}
+} // namespace
 
 PDBSymbolTypeFunctionSig::PDBSymbolTypeFunctionSig(
     const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
diff --git a/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp b/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
index 9e71b10..94e8090 100644
--- a/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -153,6 +153,14 @@ Function *ExecutionEngine::FindFunctionNamed(const char *FnName) {
   return nullptr;
 }
 
+GlobalVariable *ExecutionEngine::FindGlobalVariableNamed(const char *Name, bool AllowInternal) {
+  for (unsigned i = 0, e = Modules.size(); i != e; ++i) {
+    GlobalVariable *GV = Modules[i]->getGlobalVariable(Name,AllowInternal);
+    if (GV && !GV->isDeclaration())
+      return GV;
+  }
+  return nullptr;
+}
 
 uint64_t ExecutionEngineState::RemoveMapping(StringRef Name) {
   GlobalAddressMapTy::iterator I = GlobalAddressMap.find(Name);
@@ -376,7 +384,7 @@ void ExecutionEngine::runStaticConstructorsDestructors(Module &module,
 
     // Execute the ctor/dtor function!
     if (Function *F = dyn_cast<Function>(FP))
-      runFunction(F, std::vector<GenericValue>());
+      runFunction(F, None);
 
     // FIXME: It is marginally lame that we just do nothing here if we see an
     // entry we don't recognize. It might not be unreasonable for the verifier
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
index 39a8027..dbfa37e 100644
--- a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -2073,8 +2073,7 @@ GenericValue Interpreter::getOperandValue(Value *V, ExecutionContext &SF) {
 //===----------------------------------------------------------------------===//
 // callFunction - Execute the specified function...
 //
-void Interpreter::callFunction(Function *F,
-                               const std::vector<GenericValue> &ArgVals) {
+void Interpreter::callFunction(Function *F, ArrayRef<GenericValue> ArgVals) {
   assert((ECStack.empty() || !ECStack.back().Caller.getInstruction() ||
           ECStack.back().Caller.arg_size() == ArgVals.size()) &&
          "Incorrect number of arguments passed into function call!");
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index e2fe065..9b44042 100644
--- a/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -49,8 +49,7 @@ using namespace llvm;
 
 static ManagedStatic<sys::Mutex> FunctionsLock;
 
-typedef GenericValue (*ExFunc)(FunctionType *,
-                               const std::vector<GenericValue> &);
+typedef GenericValue (*ExFunc)(FunctionType *, ArrayRef<GenericValue>);
 static ManagedStatic<std::map<const Function *, ExFunc> > ExportedFunctions;
 static ManagedStatic<std::map<std::string, ExFunc> > FuncNames;
 
@@ -178,8 +177,7 @@ static void *ffiValueFor(Type *Ty, const GenericValue &AV,
   return NULL;
 }
 
-static bool ffiInvoke(RawFunc Fn, Function *F,
-                      const std::vector<GenericValue> &ArgVals,
+static bool ffiInvoke(RawFunc Fn, Function *F, ArrayRef<GenericValue> ArgVals,
                       const DataLayout *TD, GenericValue &Result) {
   ffi_cif cif;
   FunctionType *FTy = F->getFunctionType();
@@ -245,7 +243,7 @@ static bool ffiInvoke(RawFunc Fn, Function *F,
 #endif // USE_LIBFFI
 
 GenericValue Interpreter::callExternalFunction(Function *F,
-                                     const std::vector<GenericValue> &ArgVals) {
+                                               ArrayRef<GenericValue> ArgVals) {
   TheInterpreter = this;
 
   unique_lock<sys::Mutex> Guard(*FunctionsLock);
@@ -298,9 +296,8 @@ GenericValue Interpreter::callExternalFunction(Function *F,
 //
 
 // void atexit(Function*)
-static
-GenericValue lle_X_atexit(FunctionType *FT,
-                          const std::vector<GenericValue> &Args) {
+static GenericValue lle_X_atexit(FunctionType *FT,
+                                 ArrayRef<GenericValue> Args) {
   assert(Args.size() == 1);
   TheInterpreter->addAtExitHandler((Function*)GVTOP(Args[0]));
   GenericValue GV;
@@ -309,17 +306,13 @@ GenericValue lle_X_atexit(FunctionType *FT,
 }
 
 // void exit(int)
-static
-GenericValue lle_X_exit(FunctionType *FT,
-                        const std::vector<GenericValue> &Args) {
+static GenericValue lle_X_exit(FunctionType *FT, ArrayRef<GenericValue> Args) {
   TheInterpreter->exitCalled(Args[0]);
   return GenericValue();
 }
 
 // void abort(void)
-static
-GenericValue lle_X_abort(FunctionType *FT,
-                         const std::vector<GenericValue> &Args) {
+static GenericValue lle_X_abort(FunctionType *FT, ArrayRef<GenericValue> Args) {
   //FIXME: should we report or raise here?
   //report_fatal_error("Interpreted program raised SIGABRT");
   raise (SIGABRT);
@@ -328,9 +321,8 @@ GenericValue lle_X_abort(FunctionType *FT,
 
 // int sprintf(char *, const char *, ...) - a very rough implementation to make
 // output useful.
-static
-GenericValue lle_X_sprintf(FunctionType *FT,
-                           const std::vector<GenericValue> &Args) {
+static GenericValue lle_X_sprintf(FunctionType *FT,
+                                  ArrayRef<GenericValue> Args) {
   char *OutputBuffer = (char *)GVTOP(Args[0]);
   const char *FmtStr = (const char *)GVTOP(Args[1]);
   unsigned ArgNo = 2;
@@ -411,9 +403,8 @@ GenericValue lle_X_sprintf(FunctionType *FT,
 
 // int printf(const char *, ...) - a very rough implementation to make output
 // useful.
-static
-GenericValue lle_X_printf(FunctionType *FT,
-                          const std::vector<GenericValue> &Args) {
+static GenericValue lle_X_printf(FunctionType *FT,
+                                 ArrayRef<GenericValue> Args) {
   char Buffer[10000];
   std::vector<GenericValue> NewArgs;
   NewArgs.push_back(PTOGV((void*)&Buffer[0]));
@@ -424,9 +415,8 @@ GenericValue lle_X_printf(FunctionType *FT,
 }
 
 // int sscanf(const char *format, ...);
-static
-GenericValue lle_X_sscanf(FunctionType *FT,
-                          const std::vector<GenericValue> &args) {
+static GenericValue lle_X_sscanf(FunctionType *FT,
+                                 ArrayRef<GenericValue> args) {
   assert(args.size() < 10 && "Only handle up to 10 args to sscanf right now!");
 
   char *Args[10];
@@ -440,9 +430,7 @@ GenericValue lle_X_sscanf(FunctionType *FT,
 }
 
 // int scanf(const char *format, ...);
-static
-GenericValue lle_X_scanf(FunctionType *FT,
-                         const std::vector<GenericValue> &args) {
+static GenericValue lle_X_scanf(FunctionType *FT, ArrayRef<GenericValue> args) {
   assert(args.size() < 10 && "Only handle up to 10 args to scanf right now!");
 
   char *Args[10];
@@ -457,9 +445,8 @@ GenericValue lle_X_scanf(FunctionType *FT,
 
 // int fprintf(FILE *, const char *, ...) - a very rough implementation to make
 // output useful.
-static
-GenericValue lle_X_fprintf(FunctionType *FT,
-                           const std::vector<GenericValue> &Args) {
+static GenericValue lle_X_fprintf(FunctionType *FT,
+                                  ArrayRef<GenericValue> Args) {
   assert(Args.size() >= 2);
   char Buffer[10000];
   std::vector<GenericValue> NewArgs;
@@ -472,7 +459,7 @@ GenericValue lle_X_fprintf(FunctionType *FT,
 }
 
 static GenericValue lle_X_memset(FunctionType *FT,
-                                 const std::vector<GenericValue> &Args) {
+                                 ArrayRef<GenericValue> Args) {
   int val = (int)Args[1].IntVal.getSExtValue();
   size_t len = (size_t)Args[2].IntVal.getZExtValue();
   memset((void *)GVTOP(Args[0]), val, len);
@@ -484,7 +471,7 @@ static GenericValue lle_X_memset(FunctionType *FT,
 }
 
 static GenericValue lle_X_memcpy(FunctionType *FT,
-                                 const std::vector<GenericValue> &Args) {
+                                 ArrayRef<GenericValue> Args) {
   memcpy(GVTOP(Args[0]), GVTOP(Args[1]),
          (size_t)(Args[2].IntVal.getLimitedValue()));
 
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.cpp
index 8562981..f103c09 100644
--- a/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.cpp
@@ -67,7 +67,7 @@ Interpreter::~Interpreter() {
 
 void Interpreter::runAtExitHandlers () {
   while (!AtExitHandlers.empty()) {
-    callFunction(AtExitHandlers.back(), std::vector<GenericValue>());
+    callFunction(AtExitHandlers.back(), None);
     AtExitHandlers.pop_back();
     run();
   }
@@ -75,9 +75,8 @@ void Interpreter::runAtExitHandlers () {
 
 /// run - Start execution with the specified function and arguments.
 ///
-GenericValue
-Interpreter::runFunction(Function *F,
-                         const std::vector<GenericValue> &ArgValues) {
+GenericValue Interpreter::runFunction(Function *F,
+                                      ArrayRef<GenericValue> ArgValues) {
   assert (F && "Function *F was null at entry to run()");
 
   // Try extra hard not to pass extra args to a function that isn't
@@ -87,10 +86,9 @@ Interpreter::runFunction(Function *F,
   // parameters than it is declared to take. This does not attempt to
   // take into account gratuitous differences in declared types,
   // though.
-  std::vector<GenericValue> ActualArgs;
-  const unsigned ArgCount = F->getFunctionType()->getNumParams();
-  for (unsigned i = 0; i < ArgCount; ++i)
-    ActualArgs.push_back(ArgValues[i]);
+  const size_t ArgCount = F->getFunctionType()->getNumParams();
+  ArrayRef<GenericValue> ActualArgs =
+      ArgValues.slice(0, std::min(ArgValues.size(), ArgCount));
 
   // Set up the function call.
   callFunction(F, ActualArgs);
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h b/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h
index 0dc0463..f6cac58 100644
--- a/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -127,7 +127,7 @@ public:
   /// run - Start execution with the specified function and arguments.
   ///
   GenericValue runFunction(Function *F,
-                           const std::vector<GenericValue> &ArgValues) override;
+                           ArrayRef<GenericValue> ArgValues) override;
 
   void *getPointerToNamedFunction(StringRef Name,
                                   bool AbortOnFailure = true) override {
@@ -137,7 +137,7 @@ public:
 
   // Methods used to execute code:
   // Place a call on the stack
-  void callFunction(Function *F, const std::vector<GenericValue> &ArgVals);
+  void callFunction(Function *F, ArrayRef<GenericValue> ArgVals);
   void run();                // Execute instructions until nothing left to do
 
   // Opcode Implementations
@@ -194,7 +194,7 @@ public:
   }
 
   GenericValue callExternalFunction(Function *F,
-                                    const std::vector<GenericValue> &ArgVals);
+                                    ArrayRef<GenericValue> ArgVals);
   void exitCalled(GenericValue GV);
 
   void addAtExitHandler(Function *F) {
@@ -251,6 +251,6 @@ private:  // Helper functions
 
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index 7e37afe..87243e4 100644
--- a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -429,6 +429,19 @@ Function *MCJIT::FindFunctionNamedInModulePtrSet(const char *FnName,
   return nullptr;
 }
 
+GlobalVariable *MCJIT::FindGlobalVariableNamedInModulePtrSet(const char *Name,
+                                                             bool AllowInternal,
+                                                             ModulePtrSet::iterator I,
+                                                             ModulePtrSet::iterator E) {
+  for (; I != E; ++I) {
+    GlobalVariable *GV = (*I)->getGlobalVariable(Name, AllowInternal);
+    if (GV && !GV->isDeclaration())
+      return GV;
+  }
+  return nullptr;
+}
+
+
 Function *MCJIT::FindFunctionNamed(const char *FnName) {
   Function *F = FindFunctionNamedInModulePtrSet(
       FnName, OwnedModules.begin_added(), OwnedModules.end_added());
@@ -441,8 +454,19 @@ Function *MCJIT::FindFunctionNamed(const char *FnName) {
   return F;
 }
 
-GenericValue MCJIT::runFunction(Function *F,
-                                const std::vector<GenericValue> &ArgValues) {
+GlobalVariable *MCJIT::FindGlobalVariableNamed(const char *Name, bool AllowInternal) {
+  GlobalVariable *GV = FindGlobalVariableNamedInModulePtrSet(
+      Name, AllowInternal, OwnedModules.begin_added(), OwnedModules.end_added());
+  if (!GV)
+    GV = FindGlobalVariableNamedInModulePtrSet(Name, AllowInternal, OwnedModules.begin_loaded(),
+                                        OwnedModules.end_loaded());
+  if (!GV)
+    GV = FindGlobalVariableNamedInModulePtrSet(Name, AllowInternal, OwnedModules.begin_finalized(),
+                                        OwnedModules.end_finalized());
+  return GV;
+}
+
+GenericValue MCJIT::runFunction(Function *F, ArrayRef<GenericValue> ArgValues) {
   assert(F && "Function *F was null at entry to run()");
 
   void *FPtr = getPointerToFunction(F);
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
index 59e9949..7fda1e0 100644
--- a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -200,6 +200,11 @@ class MCJIT : public ExecutionEngine {
                                             ModulePtrSet::iterator I,
                                             ModulePtrSet::iterator E);
 
+  GlobalVariable *FindGlobalVariableNamedInModulePtrSet(const char *Name,
+                                                        bool AllowInternal,
+                                                        ModulePtrSet::iterator I,
+                                                        ModulePtrSet::iterator E);
+
   void runStaticConstructorsDestructorsInModulePtrSet(bool isDtors,
                                                       ModulePtrSet::iterator I,
                                                       ModulePtrSet::iterator E);
@@ -215,10 +220,15 @@ public:
   void addArchive(object::OwningBinary<object::Archive> O) override;
   bool removeModule(Module *M) override;
 
-  /// FindFunctionNamed - Search all of the active modules to find the one that
+  /// FindFunctionNamed - Search all of the active modules to find the function that
   /// defines FnName.  This is very slow operation and shouldn't be used for
   /// general code.
-  Function *FindFunctionNamed(const char *FnName) override;
+  virtual Function *FindFunctionNamed(const char *FnName) override;
+
+  /// FindGlobalVariableNamed - Search all of the active modules to find the global variable
+  /// that defines Name.  This is very slow operation and shouldn't be used for
+  /// general code.
+  virtual GlobalVariable *FindGlobalVariableNamed(const char *Name, bool AllowInternal = false) override;
 
   /// Sets the object manager that MCJIT should use to avoid compilation.
   void setObjectCache(ObjectCache *manager) override;
@@ -251,7 +261,7 @@ public:
   void *getPointerToFunction(Function *F) override;
 
   GenericValue runFunction(Function *F,
-                           const std::vector<GenericValue> &ArgValues) override;
+                           ArrayRef<GenericValue> ArgValues) override;
 
   /// getPointerToNamedFunction - This method returns the address of the
   /// specified function by using the dlsym function call.  As such it is only
@@ -325,6 +335,6 @@ protected:
                               bool CheckFunctionsOnly);
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
index 4ed8730..b439810 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
@@ -30,8 +30,6 @@ Constant* createIRTypedAddress(FunctionType &FT, TargetAddress Addr) {
 
 GlobalVariable* createImplPointer(PointerType &PT, Module &M,
                                   const Twine &Name, Constant *Initializer) {
-  if (!Initializer)
-    Initializer = Constant::getNullValue(&PT);
   auto IP = new GlobalVariable(M, &PT, false, GlobalValue::ExternalLinkage,
                                Initializer, Name, nullptr,
                                GlobalValue::NotThreadLocal, 0, true);
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp
index 48fd31e..b7a68e0 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp
@@ -25,7 +25,7 @@ namespace orc {
 
 GenericValue
 OrcMCJITReplacement::runFunction(Function *F,
-                                 const std::vector<GenericValue> &ArgValues) {
+                                 ArrayRef<GenericValue> ArgValues) {
   assert(F && "Function *F was null at entry to run()");
 
   void *FPtr = getPointerToFunction(F);
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
index 4023344..eb39798 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
@@ -229,7 +229,7 @@ public:
   }
 
   GenericValue runFunction(Function *F,
-                           const std::vector<GenericValue> &ArgValues) override;
+                           ArrayRef<GenericValue> ArgValues) override;
 
   void setObjectCache(ObjectCache *NewCache) override {
     CompileLayer.setObjectCache(NewCache);
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
index 2a5e4f8..044eee4 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
@@ -41,8 +41,8 @@ RTDyldMemoryManager::~RTDyldMemoryManager() {}
 #endif
 
 #if HAVE_EHTABLE_SUPPORT
-extern "C" void __register_frame(void*);
-extern "C" void __deregister_frame(void*);
+extern "C" void __register_frame(void *);
+extern "C" void __deregister_frame(void *);
 #else
 // The building compiler does not have __(de)register_frame but
 // it may be found at runtime in a dynamically-loaded library.
@@ -50,28 +50,28 @@ extern "C" void __deregister_frame(void*);
 // but using the MingW runtime.
 void __register_frame(void *p) {
   static bool Searched = false;
-  static void *rf = 0;
+  static void((*rf)(void *)) = 0;
 
   if (!Searched) {
     Searched = true;
-    rf = llvm::sys::DynamicLibrary::SearchForAddressOfSymbol(
-                                                      "__register_frame");
+    *(void **)&rf =
+        llvm::sys::DynamicLibrary::SearchForAddressOfSymbol("__register_frame");
   }
   if (rf)
-    ((void (*)(void *))rf)(p);
+    rf(p);
 }
 
 void __deregister_frame(void *p) {
   static bool Searched = false;
-  static void *df = 0;
+  static void((*df)(void *)) = 0;
 
   if (!Searched) {
     Searched = true;
-    df = llvm::sys::DynamicLibrary::SearchForAddressOfSymbol(
-                                                      "__deregister_frame");
+    *(void **)&df = llvm::sys::DynamicLibrary::SearchForAddressOfSymbol(
+        "__deregister_frame");
   }
   if (df)
-    ((void (*)(void *))df)(p);
+    df(p);
 }
 #endif
 
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
index c8d3d22..9f80e5a 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
@@ -36,7 +36,7 @@ public:
     return OwningBinary<ObjectFile>();
   }
 };
-}
+} // namespace
 
 namespace llvm {
 
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
index 957571b..c8c2516 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
@@ -673,7 +673,7 @@ private:
     return (S == MCDisassembler::Success);
   }
 };
-}
+} // namespace llvm
 
 RuntimeDyldCheckerImpl::RuntimeDyldCheckerImpl(RuntimeDyld &RTDyld,
                                                MCDisassembler *Disassembler,
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h
index 69d2a7d..a0a1118 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h
@@ -72,6 +72,6 @@ private:
 
   StubMap Stubs;
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index b4a34e8..967d7c0 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -519,7 +519,8 @@ void RuntimeDyldELF::resolveMIPSRelocation(const SectionEntry &Section,
 }
 
 void RuntimeDyldELF::setMipsABI(const ObjectFile &Obj) {
-  if (!StringRef(Triple::getArchTypePrefix(Arch)).equals("mips")) {
+  if (Arch == Triple::UnknownArch ||
+      !StringRef(Triple::getArchTypePrefix(Arch)).equals("mips")) {
     IsMipsO32ABI = false;
     IsMipsN64ABI = false;
     return;
@@ -717,7 +718,7 @@ void RuntimeDyldELF::applyMIPS64Relocation(uint8_t *TargetPtr,
 }
 
 // Return the .TOC. section and offset.
-void RuntimeDyldELF::findPPC64TOCSection(const ObjectFile &Obj,
+void RuntimeDyldELF::findPPC64TOCSection(const ELFObjectFileBase &Obj,
                                          ObjSectionToIDMap &LocalSections,
                                          RelocationValueRef &Rel) {
   // Set a default SectionID in case we do not find a TOC section below.
@@ -750,7 +751,7 @@ void RuntimeDyldELF::findPPC64TOCSection(const ObjectFile &Obj,
 
 // Returns the sections and offset associated with the ODP entry referenced
 // by Symbol.
-void RuntimeDyldELF::findOPDEntrySection(const ObjectFile &Obj,
+void RuntimeDyldELF::findOPDEntrySection(const ELFObjectFileBase &Obj,
                                          ObjSectionToIDMap &LocalSections,
                                          RelocationValueRef &Rel) {
   // Get the ELF symbol value (st_value) to compare with Relocation offset in
@@ -781,8 +782,10 @@ void RuntimeDyldELF::findOPDEntrySection(const ObjectFile &Obj,
       uint64_t TargetSymbolOffset;
       symbol_iterator TargetSymbol = i->getSymbol();
       check(i->getOffset(TargetSymbolOffset));
-      int64_t Addend;
-      check(getELFRelocationAddend(*i, Addend));
+      ErrorOr<int64_t> AddendOrErr =
+          Obj.getRelocationAddend(i->getRawDataRefImpl());
+      Check(AddendOrErr.getError());
+      int64_t Addend = *AddendOrErr;
 
       ++i;
       if (i == e)
@@ -1055,14 +1058,14 @@ void RuntimeDyldELF::processSimpleRelocation(unsigned SectionID, uint64_t Offset
 }
 
 relocation_iterator RuntimeDyldELF::processRelocationRef(
-    unsigned SectionID, relocation_iterator RelI,
-    const ObjectFile &Obj,
-    ObjSectionToIDMap &ObjSectionToID,
-    StubMap &Stubs) {
+    unsigned SectionID, relocation_iterator RelI, const ObjectFile &O,
+    ObjSectionToIDMap &ObjSectionToID, StubMap &Stubs) {
+  const auto &Obj = cast<ELFObjectFileBase>(O);
   uint64_t RelType;
   Check(RelI->getType(RelType));
-  int64_t Addend;
-  Check(getELFRelocationAddend(*RelI, Addend));
+  int64_t Addend = 0;
+  if (Obj.hasRelocationAddend(RelI->getRawDataRefImpl()))
+    Addend = *Obj.getRelocationAddend(RelI->getRawDataRefImpl());
   symbol_iterator Symbol = RelI->getSymbol();
 
   // Obtain the symbol name which is referenced in the relocation
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
index 3a377a2..1a2552d 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
@@ -87,10 +87,10 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
 
   void setMipsABI(const ObjectFile &Obj) override;
 
-  void findPPC64TOCSection(const ObjectFile &Obj,
+  void findPPC64TOCSection(const ELFObjectFileBase &Obj,
                            ObjSectionToIDMap &LocalSections,
                            RelocationValueRef &Rel);
-  void findOPDEntrySection(const ObjectFile &Obj,
+  void findOPDEntrySection(const ELFObjectFileBase &Obj,
                            ObjSectionToIDMap &LocalSections,
                            RelocationValueRef &Rel);
 
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
index d4a680d..f7a4fcc 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -39,7 +39,7 @@ public:
   }
 };
 
-}
+} // namespace
 
 namespace llvm {
 
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
index 99fd6e3..5149d01 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
@@ -400,7 +400,7 @@ private:
     addRelocationForSection(TargetRE, RE.SectionID);
   }
 };
-}
+} // namespace llvm
 
 #undef DEBUG_TYPE
 
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
index 09e51f2..8600763 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
@@ -272,7 +272,7 @@ private:
   }
 
 };
-}
+} // namespace llvm
 
 #undef DEBUG_TYPE
 
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
index dd454ae..f36f940 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
@@ -254,7 +254,7 @@ private:
   }
 
 };
-}
+} // namespace llvm
 
 #undef DEBUG_TYPE
 
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
index 4b3b01b..419b27a 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
@@ -131,7 +131,7 @@ private:
     resolveRelocation(TargetRE, (uint64_t)Addr);
   }
 };
-}
+} // namespace llvm
 
 #undef DEBUG_TYPE
 
diff --git a/contrib/llvm/lib/IR/AsmWriter.cpp b/contrib/llvm/lib/IR/AsmWriter.cpp
index 0744fdf..bc35cb3 100644
--- a/contrib/llvm/lib/IR/AsmWriter.cpp
+++ b/contrib/llvm/lib/IR/AsmWriter.cpp
@@ -67,7 +67,7 @@ struct OrderMap {
     IDs[V].first = ID;
   }
 };
-}
+} // namespace
 
 static void orderValue(const Value *V, OrderMap &OM) {
   if (OM.lookup(V).first)
@@ -109,6 +109,10 @@ static OrderMap orderModule(const Module *M) {
       if (!isa<GlobalValue>(F.getPrologueData()))
         orderValue(F.getPrologueData(), OM);
 
+    if (F.hasPersonalityFn())
+      if (!isa<GlobalValue>(F.getPersonalityFn()))
+        orderValue(F.getPersonalityFn(), OM);
+
     orderValue(&F, OM);
 
     if (F.isDeclaration())
@@ -725,33 +729,33 @@ void SlotTracker::processModule() {
   ST_DEBUG("begin processModule!\n");
 
   // Add all of the unnamed global variables to the value table.
-  for (Module::const_global_iterator I = TheModule->global_begin(),
-         E = TheModule->global_end(); I != E; ++I) {
-    if (!I->hasName())
-      CreateModuleSlot(I);
+  for (const GlobalVariable &Var : TheModule->globals()) {
+    if (!Var.hasName())
+      CreateModuleSlot(&Var);
+  }
+
+  for (const GlobalAlias &A : TheModule->aliases()) {
+    if (!A.hasName())
+      CreateModuleSlot(&A);
   }
 
   // Add metadata used by named metadata.
-  for (Module::const_named_metadata_iterator
-         I = TheModule->named_metadata_begin(),
-         E = TheModule->named_metadata_end(); I != E; ++I) {
-    const NamedMDNode *NMD = I;
-    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
-      CreateMetadataSlot(NMD->getOperand(i));
+  for (const NamedMDNode &NMD : TheModule->named_metadata()) {
+    for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i)
+      CreateMetadataSlot(NMD.getOperand(i));
   }
 
-  for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
-       I != E; ++I) {
-    if (!I->hasName())
+  for (const Function &F : *TheModule) {
+    if (!F.hasName())
       // Add all the unnamed functions to the table.
-      CreateModuleSlot(I);
+      CreateModuleSlot(&F);
 
     if (ShouldInitializeAllMetadata)
-      processFunctionMetadata(*I);
+      processFunctionMetadata(F);
 
     // Add all the function attributes to the table.
     // FIXME: Add attributes of other objects?
-    AttributeSet FnAttrs = I->getAttributes().getFnAttributes();
+    AttributeSet FnAttrs = F.getAttributes().getFnAttributes();
     if (FnAttrs.hasAttributes(AttributeSet::FunctionIndex))
       CreateAttributeSetSlot(FnAttrs);
   }
@@ -2169,23 +2173,21 @@ void AssemblyWriter::printModule(const Module *M) {
 
   // Output all globals.
   if (!M->global_empty()) Out << '\n';
-  for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
-       I != E; ++I) {
-    printGlobal(I); Out << '\n';
+  for (const GlobalVariable &GV : M->globals()) {
+    printGlobal(&GV); Out << '\n';
   }
 
   // Output all aliases.
   if (!M->alias_empty()) Out << "\n";
-  for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
-       I != E; ++I)
-    printAlias(I);
+  for (const GlobalAlias &GA : M->aliases())
+    printAlias(&GA);
 
   // Output global use-lists.
   printUseLists(nullptr);
 
   // Output all of the functions.
-  for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I)
-    printFunction(I);
+  for (const Function &F : *M)
+    printFunction(&F);
   assert(UseListOrders.empty() && "All use-lists should have been consumed");
 
   // Output all attribute groups.
@@ -2197,9 +2199,8 @@ void AssemblyWriter::printModule(const Module *M) {
   // Output named metadata.
   if (!M->named_metadata_empty()) Out << '\n';
 
-  for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
-       E = M->named_metadata_end(); I != E; ++I)
-    printNamedMDNode(I);
+  for (const NamedMDNode &Node : M->named_metadata())
+    printNamedMDNode(&Node);
 
   // Output metadata.
   if (!Machine.mdn_empty()) {
@@ -2364,13 +2365,9 @@ void AssemblyWriter::printAlias(const GlobalAlias *GA) {
   if (GA->isMaterializable())
     Out << "; Materializable\n";
 
-  // Don't crash when dumping partially built GA
-  if (!GA->hasName())
-    Out << "<<nameless>> = ";
-  else {
-    PrintLLVMName(Out, GA);
-    Out << " = ";
-  }
+  WriteAsOperandInternal(Out, GA, &TypePrinter, &Machine, GA->getParent());
+  Out << " = ";
+
   PrintLinkage(GA->getLinkage(), Out);
   PrintVisibility(GA->getVisibility(), Out);
   PrintDLLStorageClass(GA->getDLLStorageClass(), Out);
@@ -2547,6 +2544,10 @@ void AssemblyWriter::printFunction(const Function *F) {
     Out << " prologue ";
     writeOperand(F->getPrologueData(), true);
   }
+  if (F->hasPersonalityFn()) {
+    Out << " personality ";
+    writeOperand(F->getPersonalityFn(), /*PrintType=*/true);
+  }
 
   SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
   F->getAllMetadata(MDs);
@@ -2789,8 +2790,8 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
   } else if (const LandingPadInst *LPI = dyn_cast<LandingPadInst>(&I)) {
     Out << ' ';
     TypePrinter.print(I.getType(), Out);
-    Out << " personality ";
-    writeOperand(I.getOperand(0), true); Out << '\n';
+    if (LPI->isCleanup() || LPI->getNumClauses() != 0)
+      Out << '\n';
 
     if (LPI->isCleanup())
       Out << "          cleanup";
diff --git a/contrib/llvm/lib/IR/AttributeImpl.h b/contrib/llvm/lib/IR/AttributeImpl.h
index dbd7d63..8159dce 100644
--- a/contrib/llvm/lib/IR/AttributeImpl.h
+++ b/contrib/llvm/lib/IR/AttributeImpl.h
@@ -181,6 +181,9 @@ public:
       AttrList[I].Profile(ID);
   }
 };
+static_assert(
+    AlignOf<AttributeSetNode>::Alignment >= AlignOf<Attribute>::Alignment,
+    "Alignment is insufficient for objects appended to AttributeSetNode");
 
 //===----------------------------------------------------------------------===//
 /// \class
@@ -189,9 +192,11 @@ public:
 class AttributeSetImpl : public FoldingSetNode {
   friend class AttributeSet;
 
-  LLVMContext &Context;
-
+public:
   typedef std::pair<unsigned, AttributeSetNode*> IndexAttrPair;
+
+private:
+  LLVMContext &Context;
   unsigned NumAttrs; ///< Number of entries in this set.
 
   /// \brief Return a pointer to the IndexAttrPair for the specified slot.
@@ -206,6 +211,7 @@ public:
   AttributeSetImpl(LLVMContext &C,
                    ArrayRef<std::pair<unsigned, AttributeSetNode *> > Attrs)
       : Context(C), NumAttrs(Attrs.size()) {
+
 #ifndef NDEBUG
     if (Attrs.size() >= 2) {
       for (const std::pair<unsigned, AttributeSetNode *> *i = Attrs.begin() + 1,
@@ -267,7 +273,11 @@ public:
 
   void dump() const;
 };
+static_assert(
+    AlignOf<AttributeSetImpl>::Alignment >=
+        AlignOf<AttributeSetImpl::IndexAttrPair>::Alignment,
+    "Alignment is insufficient for objects appended to AttributeSetImpl");
 
-} // end llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/IR/Attributes.cpp b/contrib/llvm/lib/IR/Attributes.cpp
index fef05c8..c3032f4 100644
--- a/contrib/llvm/lib/IR/Attributes.cpp
+++ b/contrib/llvm/lib/IR/Attributes.cpp
@@ -252,6 +252,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
     return "sspreq";
   if (hasAttribute(Attribute::StackProtectStrong))
     return "sspstrong";
+  if (hasAttribute(Attribute::SafeStack))
+    return "safestack";
   if (hasAttribute(Attribute::StructRet))
     return "sret";
   if (hasAttribute(Attribute::SanitizeThread))
@@ -437,6 +439,7 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) {
   case Attribute::NonNull:         return 1ULL << 44;
   case Attribute::JumpTable:       return 1ULL << 45;
   case Attribute::Convergent:      return 1ULL << 46;
+  case Attribute::SafeStack:       return 1ULL << 47;
   case Attribute::Dereferenceable:
     llvm_unreachable("dereferenceable attribute not supported in raw format");
     break;
diff --git a/contrib/llvm/lib/IR/BasicBlock.cpp b/contrib/llvm/lib/IR/BasicBlock.cpp
index 70ae3c3..77cb10d 100644
--- a/contrib/llvm/lib/IR/BasicBlock.cpp
+++ b/contrib/llvm/lib/IR/BasicBlock.cpp
@@ -362,12 +362,15 @@ BasicBlock *BasicBlock::splitBasicBlock(iterator I, const Twine &BBName) {
   BasicBlock *New = BasicBlock::Create(getContext(), BBName,
                                        getParent(), InsertBefore);
 
+  // Save DebugLoc of split point before invalidating iterator.
+  DebugLoc Loc = I->getDebugLoc();
   // Move all of the specified instructions from the original basic block into
   // the new basic block.
   New->getInstList().splice(New->end(), this->getInstList(), I, end());
 
   // Add a branch instruction to the newly formed basic block.
-  BranchInst::Create(New, this);
+  BranchInst *BI = BranchInst::Create(New, this);
+  BI->setDebugLoc(Loc);
 
   // Now we must loop through all of the successors of the New block (which
   // _were_ the successors of the 'this' block), and update any PHI nodes in
diff --git a/contrib/llvm/lib/IR/ConstantFold.cpp b/contrib/llvm/lib/IR/ConstantFold.cpp
index 2efc612..46bb20e 100644
--- a/contrib/llvm/lib/IR/ConstantFold.cpp
+++ b/contrib/llvm/lib/IR/ConstantFold.cpp
@@ -2163,11 +2163,11 @@ static Constant *ConstantFoldGetElementPtrImpl(Type *PointeeTy, Constant *C,
   // Check to see if any array indices are not within the corresponding
   // notional array or vector bounds. If so, try to determine if they can be
   // factored out into preceding dimensions.
-  bool Unknown = false;
   SmallVector<Constant *, 8> NewIdxs;
-  Type *Ty = C->getType();
-  Type *Prev = nullptr;
-  for (unsigned i = 0, e = Idxs.size(); i != e;
+  Type *Ty = PointeeTy;
+  Type *Prev = C->getType();
+  bool Unknown = !isa<ConstantInt>(Idxs[0]);
+  for (unsigned i = 1, e = Idxs.size(); i != e;
        Prev = Ty, Ty = cast<CompositeType>(Ty)->getTypeAtIndex(Idxs[i]), ++i) {
     if (ConstantInt *CI = dyn_cast<ConstantInt>(Idxs[i])) {
       if (isa<ArrayType>(Ty) || isa<VectorType>(Ty))
diff --git a/contrib/llvm/lib/IR/ConstantFold.h b/contrib/llvm/lib/IR/ConstantFold.h
index 42a9c6b..715c429 100644
--- a/contrib/llvm/lib/IR/ConstantFold.h
+++ b/contrib/llvm/lib/IR/ConstantFold.h
@@ -55,6 +55,6 @@ namespace llvm {
                                       ArrayRef<Constant *> Idxs);
   Constant *ConstantFoldGetElementPtr(Type *Ty, Constant *C, bool inBounds,
                                       ArrayRef<Value *> Idxs);
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/IR/Constants.cpp b/contrib/llvm/lib/IR/Constants.cpp
index fb83ebb..76c55b6 100644
--- a/contrib/llvm/lib/IR/Constants.cpp
+++ b/contrib/llvm/lib/IR/Constants.cpp
@@ -2389,6 +2389,7 @@ GetElementPtrConstantExpr::GetElementPtrConstantExpr(
                    IdxList.size() + 1),
       SrcElementTy(SrcElementTy) {
   Op<0>() = C;
+  Use *OperandList = getOperandList();
   for (unsigned i = 0, E = IdxList.size(); i != E; ++i)
     OperandList[i+1] = IdxList[i];
 }
@@ -2851,6 +2852,7 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To,
 
   // Keep track of whether all the values in the array are "ToC".
   bool AllSame = true;
+  Use *OperandList = getOperandList();
   for (Use *O = OperandList, *E = OperandList+getNumOperands(); O != E; ++O) {
     Constant *Val = cast<Constant>(O->get());
     if (Val == From) {
@@ -2887,6 +2889,7 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
   assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
   Constant *ToC = cast<Constant>(To);
 
+  Use *OperandList = getOperandList();
   unsigned OperandToUpdate = U-OperandList;
   assert(getOperand(OperandToUpdate) == From && "ReplaceAllUsesWith broken!");
 
@@ -2955,6 +2958,7 @@ void ConstantVector::replaceUsesOfWithOnConstant(Value *From, Value *To,
   }
 
   // Update to the new value.
+  Use *OperandList = getOperandList();
   if (Constant *C = getContext().pImpl->VectorConstants.replaceOperandsInPlace(
           Values, this, From, ToC, NumUpdated, U - OperandList))
     replaceUsesOfWithOnConstantImpl(C);
@@ -2983,6 +2987,7 @@ void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV,
   }
 
   // Update to the new value.
+  Use *OperandList = getOperandList();
   if (Constant *C = getContext().pImpl->ExprConstants.replaceOperandsInPlace(
           NewOps, this, From, To, NumUpdated, U - OperandList))
     replaceUsesOfWithOnConstantImpl(C);
diff --git a/contrib/llvm/lib/IR/Core.cpp b/contrib/llvm/lib/IR/Core.cpp
index d476434..23e923d 100644
--- a/contrib/llvm/lib/IR/Core.cpp
+++ b/contrib/llvm/lib/IR/Core.cpp
@@ -2249,11 +2249,8 @@ LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef B, LLVMValueRef Fn,
 }
 
 LLVMValueRef LLVMBuildLandingPad(LLVMBuilderRef B, LLVMTypeRef Ty,
-                                 LLVMValueRef PersFn, unsigned NumClauses,
-                                 const char *Name) {
-  return wrap(unwrap(B)->CreateLandingPad(unwrap(Ty),
-                                          cast<Function>(unwrap(PersFn)),
-                                          NumClauses, Name));
+                                 unsigned NumClauses, const char *Name) {
+  return wrap(unwrap(B)->CreateLandingPad(unwrap(Ty), NumClauses, Name));
 }
 
 LLVMValueRef LLVMBuildResume(LLVMBuilderRef B, LLVMValueRef Exn) {
diff --git a/contrib/llvm/lib/IR/DIBuilder.cpp b/contrib/llvm/lib/IR/DIBuilder.cpp
index b1925ea..c41d844 100644
--- a/contrib/llvm/lib/IR/DIBuilder.cpp
+++ b/contrib/llvm/lib/IR/DIBuilder.cpp
@@ -55,7 +55,7 @@ public:
     return HeaderBuilder().concat("0x" + Twine::utohexstr(Tag));
   }
 };
-}
+} // namespace
 
 DIBuilder::DIBuilder(Module &m, bool AllowUnresolvedNodes)
     : M(m), VMContext(M.getContext()), TempEnumTypes(nullptr),
@@ -327,7 +327,8 @@ DIBuilder::createObjCProperty(StringRef Name, DIFile *File, unsigned LineNumber,
                               StringRef GetterName, StringRef SetterName,
                               unsigned PropertyAttributes, DIType *Ty) {
   return DIObjCProperty::get(VMContext, Name, File, LineNumber, GetterName,
-                             SetterName, PropertyAttributes, Ty);
+                             SetterName, PropertyAttributes,
+                             DITypeRef::get(Ty));
 }
 
 DITemplateTypeParameter *
diff --git a/contrib/llvm/lib/IR/DiagnosticInfo.cpp b/contrib/llvm/lib/IR/DiagnosticInfo.cpp
index 45be100..5de9289 100644
--- a/contrib/llvm/lib/IR/DiagnosticInfo.cpp
+++ b/contrib/llvm/lib/IR/DiagnosticInfo.cpp
@@ -22,9 +22,9 @@
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
-#include "llvm/Support/Atomic.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Regex.h"
+#include <atomic>
 #include <string>
 
 using namespace llvm;
@@ -84,11 +84,11 @@ PassRemarksAnalysis(
         "the given regular expression"),
     cl::Hidden, cl::location(PassRemarksAnalysisOptLoc), cl::ValueRequired,
     cl::ZeroOrMore);
-}
+} // namespace
 
 int llvm::getNextAvailablePluginDiagnosticKind() {
-  static sys::cas_flag PluginKindID = DK_FirstPluginKind;
-  return (int)sys::AtomicIncrement(&PluginKindID);
+  static std::atomic<int> PluginKindID(DK_FirstPluginKind);
+  return ++PluginKindID;
 }
 
 DiagnosticInfoInlineAsm::DiagnosticInfoInlineAsm(const Instruction &I,
@@ -170,6 +170,10 @@ bool DiagnosticInfoOptimizationRemarkAnalysis::isEnabled() const {
          PassRemarksAnalysisOptLoc.Pattern->match(getPassName());
 }
 
+void DiagnosticInfoMIRParser::print(DiagnosticPrinter &DP) const {
+  DP << Diagnostic;
+}
+
 void llvm::emitOptimizationRemark(LLVMContext &Ctx, const char *PassName,
                                   const Function &Fn, const DebugLoc &DLoc,
                                   const Twine &Msg) {
diff --git a/contrib/llvm/lib/IR/DiagnosticPrinter.cpp b/contrib/llvm/lib/IR/DiagnosticPrinter.cpp
index f25fc20..659ff49 100644
--- a/contrib/llvm/lib/IR/DiagnosticPrinter.cpp
+++ b/contrib/llvm/lib/IR/DiagnosticPrinter.cpp
@@ -16,6 +16,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SourceMgr.h"
 
 using namespace llvm;
 
@@ -105,3 +106,12 @@ DiagnosticPrinter &DiagnosticPrinterRawOStream::operator<<(const Module &M) {
   Stream << M.getModuleIdentifier();
   return *this;
 }
+
+// Other types.
+DiagnosticPrinter &DiagnosticPrinterRawOStream::
+operator<<(const SMDiagnostic &Diag) {
+  // We don't have to print the SMDiagnostic kind, as the diagnostic severity
+  // is printed by the diagnostic handler.
+  Diag.print("", Stream, /*ShowColors=*/true, /*ShowKindLabel=*/false);
+  return *this;
+}
diff --git a/contrib/llvm/lib/IR/Function.cpp b/contrib/llvm/lib/IR/Function.cpp
index cf8e3ed..b50ad12 100644
--- a/contrib/llvm/lib/IR/Function.cpp
+++ b/contrib/llvm/lib/IR/Function.cpp
@@ -154,10 +154,8 @@ bool Argument::hasNoCaptureAttr() const {
 /// it in its containing function.
 bool Argument::hasStructRetAttr() const {
   if (!getType()->isPointerTy()) return false;
-  if (this != getParent()->arg_begin())
-    return false; // StructRet param must be first param
   return getParent()->getAttributes().
-    hasAttribute(1, Attribute::StructRet);
+    hasAttribute(getArgNo()+1, Attribute::StructRet);
 }
 
 /// hasReturnedAttr - Return true if this argument has the returned attribute on
@@ -250,8 +248,8 @@ void Function::eraseFromParent() {
 
 Function::Function(FunctionType *Ty, LinkageTypes Linkage, const Twine &name,
                    Module *ParentModule)
-    : GlobalObject(PointerType::getUnqual(Ty), Value::FunctionVal, nullptr, 0,
-                   Linkage, name),
+    : GlobalObject(PointerType::getUnqual(Ty), Value::FunctionVal,
+                   OperandTraits<Function>::op_begin(this), 0, Linkage, name),
       Ty(Ty) {
   assert(FunctionType::isValidReturnType(getReturnType()) &&
          "invalid return type");
@@ -281,6 +279,9 @@ Function::~Function() {
 
   // Remove the function from the on-the-side GC table.
   clearGC();
+
+  // FIXME: needed by operator delete
+  setFunctionNumOperands(1);
 }
 
 void Function::BuildLazyArguments() const {
@@ -333,6 +334,8 @@ void Function::dropAllReferences() {
 
   // Metadata is stored in a side-table.
   clearMetadata();
+
+  setPersonalityFn(nullptr);
 }
 
 void Function::addAttribute(unsigned i, Attribute::AttrKind attr) {
@@ -428,6 +431,10 @@ void Function::copyAttributesFrom(const GlobalValue *Src) {
     setPrologueData(SrcF->getPrologueData());
   else
     setPrologueData(nullptr);
+  if (SrcF->hasPersonalityFn())
+    setPersonalityFn(SrcF->getPersonalityFn());
+  else
+    setPersonalityFn(nullptr);
 }
 
 /// \brief This does the actual lookup of an intrinsic ID which
@@ -839,6 +846,18 @@ bool Intrinsic::isOverloaded(ID id) {
 #undef GET_INTRINSIC_OVERLOAD_TABLE
 }
 
+bool Intrinsic::isLeaf(ID id) {
+  switch (id) {
+  default:
+    return true;
+
+  case Intrinsic::experimental_gc_statepoint:
+  case Intrinsic::experimental_patchpoint_void:
+  case Intrinsic::experimental_patchpoint_i64:
+    return false;
+  }
+}
+
 /// This defines the "Intrinsic::getAttributes(ID id)" method.
 #define GET_INTRINSIC_ATTRIBUTES
 #include "llvm/IR/Intrinsics.gen"
@@ -978,3 +997,22 @@ Optional<uint64_t> Function::getEntryCount() const {
       }
   return None;
 }
+
+void Function::setPersonalityFn(Constant *C) {
+  if (!C) {
+    if (hasPersonalityFn()) {
+      // Note, the num operands is used to compute the offset of the operand, so
+      // the order here matters.  Clearing the operand then clearing the num
+      // operands ensures we have the correct offset to the operand.
+      Op<0>().set(nullptr);
+      setFunctionNumOperands(0);
+    }
+  } else {
+    // Note, the num operands is used to compute the offset of the operand, so
+    // the order here matters.  We need to set num operands to 1 first so that
+    // we get the correct offset to the first operand when we set it.
+    if (!hasPersonalityFn())
+      setFunctionNumOperands(1);
+    Op<0>().set(C);
+  }
+}
diff --git a/contrib/llvm/lib/IR/GCOV.cpp b/contrib/llvm/lib/IR/GCOV.cpp
index 6ed58913..a0a3db4 100644
--- a/contrib/llvm/lib/IR/GCOV.cpp
+++ b/contrib/llvm/lib/IR/GCOV.cpp
@@ -496,7 +496,7 @@ public:
     OS << format("%5u:", LineNum) << Line << "\n";
   }
 };
-}
+} // namespace
 
 /// Convert a path to a gcov filename. If PreservePaths is true, this
 /// translates "/" to "#", ".." to "^", and drops ".", to match gcov.
diff --git a/contrib/llvm/lib/IR/Globals.cpp b/contrib/llvm/lib/IR/Globals.cpp
index 1028e33..79a458c 100644
--- a/contrib/llvm/lib/IR/Globals.cpp
+++ b/contrib/llvm/lib/IR/Globals.cpp
@@ -214,14 +214,20 @@ void GlobalVariable::replaceUsesOfWithOnConstant(Value *From, Value *To,
 void GlobalVariable::setInitializer(Constant *InitVal) {
   if (!InitVal) {
     if (hasInitializer()) {
+      // Note, the num operands is used to compute the offset of the operand, so
+      // the order here matters.  Clearing the operand then clearing the num
+      // operands ensures we have the correct offset to the operand.
       Op<0>().set(nullptr);
-      NumOperands = 0;
+      setGlobalVariableNumOperands(0);
     }
   } else {
     assert(InitVal->getType() == getType()->getElementType() &&
            "Initializer type must match GlobalVariable type");
+    // Note, the num operands is used to compute the offset of the operand, so
+    // the order here matters.  We need to set num operands to 1 first so that
+    // we get the correct offset to the first operand when we set it.
     if (!hasInitializer())
-      NumOperands = 1;
+      setGlobalVariableNumOperands(1);
     Op<0>().set(InitVal);
   }
 }
diff --git a/contrib/llvm/lib/IR/IRBuilder.cpp b/contrib/llvm/lib/IR/IRBuilder.cpp
index 335cf36..bddb278 100644
--- a/contrib/llvm/lib/IR/IRBuilder.cpp
+++ b/contrib/llvm/lib/IR/IRBuilder.cpp
@@ -25,13 +25,15 @@ using namespace llvm;
 /// specified.  If Name is specified, it is the name of the global variable
 /// created.
 GlobalVariable *IRBuilderBase::CreateGlobalString(StringRef Str,
-                                                  const Twine &Name) {
+                                                  const Twine &Name,
+                                                  unsigned AddressSpace) {
   Constant *StrConstant = ConstantDataArray::getString(Context, Str);
   Module &M = *BB->getParent()->getParent();
   GlobalVariable *GV = new GlobalVariable(M, StrConstant->getType(),
                                           true, GlobalValue::PrivateLinkage,
-                                          StrConstant);
-  GV->setName(Name);
+                                          StrConstant, Name, nullptr,
+                                          GlobalVariable::NotThreadLocal,
+                                          AddressSpace);
   GV->setUnnamedAddr(true);
   return GV;
 }
diff --git a/contrib/llvm/lib/IR/IRPrintingPasses.cpp b/contrib/llvm/lib/IR/IRPrintingPasses.cpp
index c1ac336..03e7d55 100644
--- a/contrib/llvm/lib/IR/IRPrintingPasses.cpp
+++ b/contrib/llvm/lib/IR/IRPrintingPasses.cpp
@@ -103,7 +103,7 @@ public:
   }
 };
 
-}
+} // namespace
 
 char PrintModulePassWrapper::ID = 0;
 INITIALIZE_PASS(PrintModulePassWrapper, "print-module",
diff --git a/contrib/llvm/lib/IR/Instruction.cpp b/contrib/llvm/lib/IR/Instruction.cpp
index 45bb296..af42638 100644
--- a/contrib/llvm/lib/IR/Instruction.cpp
+++ b/contrib/llvm/lib/IR/Instruction.cpp
@@ -26,9 +26,9 @@ Instruction::Instruction(Type *ty, unsigned it, Use *Ops, unsigned NumOps,
 
   // If requested, insert this instruction into a basic block...
   if (InsertBefore) {
-    assert(InsertBefore->getParent() &&
-           "Instruction to insert before is not in a basic block!");
-    InsertBefore->getParent()->getInstList().insert(InsertBefore, this);
+    BasicBlock *BB = InsertBefore->getParent();
+    assert(BB && "Instruction to insert before is not in a basic block!");
+    BB->getInstList().insert(InsertBefore, this);
   }
 }
 
diff --git a/contrib/llvm/lib/IR/Instructions.cpp b/contrib/llvm/lib/IR/Instructions.cpp
index 1478bff..d45b511 100644
--- a/contrib/llvm/lib/IR/Instructions.cpp
+++ b/contrib/llvm/lib/IR/Instructions.cpp
@@ -85,30 +85,14 @@ const char *SelectInst::areInvalidOperands(Value *Op0, Value *Op1, Value *Op2) {
 //===----------------------------------------------------------------------===//
 
 PHINode::PHINode(const PHINode &PN)
-  : Instruction(PN.getType(), Instruction::PHI,
-                allocHungoffUses(PN.getNumOperands()), PN.getNumOperands()),
-    ReservedSpace(PN.getNumOperands()) {
+    : Instruction(PN.getType(), Instruction::PHI, nullptr, PN.getNumOperands()),
+      ReservedSpace(PN.getNumOperands()) {
+  allocHungoffUses(PN.getNumOperands());
   std::copy(PN.op_begin(), PN.op_end(), op_begin());
   std::copy(PN.block_begin(), PN.block_end(), block_begin());
   SubclassOptionalData = PN.SubclassOptionalData;
 }
 
-PHINode::~PHINode() {
-  dropHungoffUses();
-}
-
-Use *PHINode::allocHungoffUses(unsigned N) const {
-  // Allocate the array of Uses of the incoming values, followed by a pointer
-  // (with bottom bit set) to the User, followed by the array of pointers to
-  // the incoming basic blocks.
-  size_t size = N * sizeof(Use) + sizeof(Use::UserRef)
-    + N * sizeof(BasicBlock*);
-  Use *Begin = static_cast<Use*>(::operator new(size));
-  Use *End = Begin + N;
-  (void) new(End) Use::UserRef(const_cast<PHINode*>(this), 1);
-  return Use::initTags(Begin, End);
-}
-
 // removeIncomingValue - Remove an incoming value.  This is useful if a
 // predecessor basic block is deleted.
 Value *PHINode::removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty) {
@@ -124,7 +108,7 @@ Value *PHINode::removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty) {
 
   // Nuke the last value.
   Op<-1>().set(nullptr);
-  --NumOperands;
+  setNumHungOffUseOperands(getNumOperands() - 1);
 
   // If the PHI node is dead, because it has zero entries, nuke it now.
   if (getNumOperands() == 0 && DeletePHIIfEmpty) {
@@ -144,16 +128,8 @@ void PHINode::growOperands() {
   unsigned NumOps = e + e / 2;
   if (NumOps < 2) NumOps = 2;      // 2 op PHI nodes are VERY common.
 
-  Use *OldOps = op_begin();
-  BasicBlock **OldBlocks = block_begin();
-
   ReservedSpace = NumOps;
-  OperandList = allocHungoffUses(ReservedSpace);
-
-  std::copy(OldOps, OldOps + e, op_begin());
-  std::copy(OldBlocks, OldBlocks + e, block_begin());
-
-  Use::zap(OldOps, OldOps + e, true);
+  growHungoffUses(ReservedSpace, /* IsPhi */ true);
 }
 
 /// hasConstantValue - If the specified PHI node always merges together the same
@@ -177,57 +153,47 @@ Value *PHINode::hasConstantValue() const {
 //                       LandingPadInst Implementation
 //===----------------------------------------------------------------------===//
 
-LandingPadInst::LandingPadInst(Type *RetTy, Value *PersonalityFn,
-                               unsigned NumReservedValues, const Twine &NameStr,
-                               Instruction *InsertBefore)
-  : Instruction(RetTy, Instruction::LandingPad, nullptr, 0, InsertBefore) {
-  init(PersonalityFn, 1 + NumReservedValues, NameStr);
+LandingPadInst::LandingPadInst(Type *RetTy, unsigned NumReservedValues,
+                               const Twine &NameStr, Instruction *InsertBefore)
+    : Instruction(RetTy, Instruction::LandingPad, nullptr, 0, InsertBefore) {
+  init(NumReservedValues, NameStr);
 }
 
-LandingPadInst::LandingPadInst(Type *RetTy, Value *PersonalityFn,
-                               unsigned NumReservedValues, const Twine &NameStr,
-                               BasicBlock *InsertAtEnd)
-  : Instruction(RetTy, Instruction::LandingPad, nullptr, 0, InsertAtEnd) {
-  init(PersonalityFn, 1 + NumReservedValues, NameStr);
+LandingPadInst::LandingPadInst(Type *RetTy, unsigned NumReservedValues,
+                               const Twine &NameStr, BasicBlock *InsertAtEnd)
+    : Instruction(RetTy, Instruction::LandingPad, nullptr, 0, InsertAtEnd) {
+  init(NumReservedValues, NameStr);
 }
 
 LandingPadInst::LandingPadInst(const LandingPadInst &LP)
-  : Instruction(LP.getType(), Instruction::LandingPad,
-                allocHungoffUses(LP.getNumOperands()), LP.getNumOperands()),
-    ReservedSpace(LP.getNumOperands()) {
-  Use *OL = OperandList, *InOL = LP.OperandList;
+    : Instruction(LP.getType(), Instruction::LandingPad, nullptr,
+                  LP.getNumOperands()),
+      ReservedSpace(LP.getNumOperands()) {
+  allocHungoffUses(LP.getNumOperands());
+  Use *OL = getOperandList();
+  const Use *InOL = LP.getOperandList();
   for (unsigned I = 0, E = ReservedSpace; I != E; ++I)
     OL[I] = InOL[I];
 
   setCleanup(LP.isCleanup());
 }
 
-LandingPadInst::~LandingPadInst() {
-  dropHungoffUses();
-}
-
-LandingPadInst *LandingPadInst::Create(Type *RetTy, Value *PersonalityFn,
-                                       unsigned NumReservedClauses,
+LandingPadInst *LandingPadInst::Create(Type *RetTy, unsigned NumReservedClauses,
                                        const Twine &NameStr,
                                        Instruction *InsertBefore) {
-  return new LandingPadInst(RetTy, PersonalityFn, NumReservedClauses, NameStr,
-                            InsertBefore);
+  return new LandingPadInst(RetTy, NumReservedClauses, NameStr, InsertBefore);
 }
 
-LandingPadInst *LandingPadInst::Create(Type *RetTy, Value *PersonalityFn,
-                                       unsigned NumReservedClauses,
+LandingPadInst *LandingPadInst::Create(Type *RetTy, unsigned NumReservedClauses,
                                        const Twine &NameStr,
                                        BasicBlock *InsertAtEnd) {
-  return new LandingPadInst(RetTy, PersonalityFn, NumReservedClauses, NameStr,
-                            InsertAtEnd);
+  return new LandingPadInst(RetTy, NumReservedClauses, NameStr, InsertAtEnd);
 }
 
-void LandingPadInst::init(Value *PersFn, unsigned NumReservedValues,
-                          const Twine &NameStr) {
+void LandingPadInst::init(unsigned NumReservedValues, const Twine &NameStr) {
   ReservedSpace = NumReservedValues;
-  NumOperands = 1;
-  OperandList = allocHungoffUses(ReservedSpace);
-  Op<0>() = PersFn;
+  setNumHungOffUseOperands(0);
+  allocHungoffUses(ReservedSpace);
   setName(NameStr);
   setCleanup(false);
 }
@@ -237,23 +203,16 @@ void LandingPadInst::init(Value *PersFn, unsigned NumReservedValues,
 void LandingPadInst::growOperands(unsigned Size) {
   unsigned e = getNumOperands();
   if (ReservedSpace >= e + Size) return;
-  ReservedSpace = (e + Size / 2) * 2;
-
-  Use *NewOps = allocHungoffUses(ReservedSpace);
-  Use *OldOps = OperandList;
-  for (unsigned i = 0; i != e; ++i)
-      NewOps[i] = OldOps[i];
-
-  OperandList = NewOps;
-  Use::zap(OldOps, OldOps + e, true);
+  ReservedSpace = (std::max(e, 1U) + Size / 2) * 2;
+  growHungoffUses(ReservedSpace);
 }
 
 void LandingPadInst::addClause(Constant *Val) {
   unsigned OpNo = getNumOperands();
   growOperands(1);
   assert(OpNo < ReservedSpace && "Growing didn't work!");
-  ++NumOperands;
-  OperandList[OpNo] = Val;
+  setNumHungOffUseOperands(getNumOperands() + 1);
+  getOperandList()[OpNo] = Val;
 }
 
 //===----------------------------------------------------------------------===//
@@ -266,7 +225,7 @@ CallInst::~CallInst() {
 void CallInst::init(FunctionType *FTy, Value *Func, ArrayRef<Value *> Args,
                     const Twine &NameStr) {
   this->FTy = FTy;
-  assert(NumOperands == Args.size() + 1 && "NumOperands not set up?");
+  assert(getNumOperands() == Args.size() + 1 && "NumOperands not set up?");
   Op<-1>() = Func;
 
 #ifndef NDEBUG
@@ -287,7 +246,7 @@ void CallInst::init(FunctionType *FTy, Value *Func, ArrayRef<Value *> Args,
 void CallInst::init(Value *Func, const Twine &NameStr) {
   FTy =
       cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
-  assert(NumOperands == 1 && "NumOperands not set up?");
+  assert(getNumOperands() == 1 && "NumOperands not set up?");
   Op<-1>() = Func;
 
   assert(FTy->getNumParams() == 0 && "Calling a function with bad signature");
@@ -542,7 +501,7 @@ void InvokeInst::init(FunctionType *FTy, Value *Fn, BasicBlock *IfNormal,
                       const Twine &NameStr) {
   this->FTy = FTy;
 
-  assert(NumOperands == 3 + Args.size() && "NumOperands not set up?");
+  assert(getNumOperands() == 3 + Args.size() && "NumOperands not set up?");
   Op<-3>() = Fn;
   Op<-2>() = IfNormal;
   Op<-1>() = IfException;
@@ -1238,7 +1197,8 @@ FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering,
 
 void GetElementPtrInst::init(Value *Ptr, ArrayRef<Value *> IdxList,
                              const Twine &Name) {
-  assert(NumOperands == 1 + IdxList.size() && "NumOperands not initialized?");
+  assert(getNumOperands() == 1 + IdxList.size() &&
+         "NumOperands not initialized?");
   Op<0>() = Ptr;
   std::copy(IdxList.begin(), IdxList.end(), op_begin() + 1);
   setName(Name);
@@ -1551,7 +1511,7 @@ void ShuffleVectorInst::getShuffleMask(Constant *Mask,
 
 void InsertValueInst::init(Value *Agg, Value *Val, ArrayRef<unsigned> Idxs, 
                            const Twine &Name) {
-  assert(NumOperands == 2 && "NumOperands not initialized?");
+  assert(getNumOperands() == 2 && "NumOperands not initialized?");
 
   // There's no fundamental reason why we require at least one index
   // (other than weirdness with &*IdxBegin being invalid; see
@@ -1582,7 +1542,7 @@ InsertValueInst::InsertValueInst(const InsertValueInst &IVI)
 //===----------------------------------------------------------------------===//
 
 void ExtractValueInst::init(ArrayRef<unsigned> Idxs, const Twine &Name) {
-  assert(NumOperands == 1 && "NumOperands not initialized?");
+  assert(getNumOperands() == 1 && "NumOperands not initialized?");
 
   // There's no fundamental reason why we require at least one index.
   // But there's no present need to support it.
@@ -3296,8 +3256,8 @@ bool CmpInst::isFalseWhenEqual(unsigned short predicate) {
 void SwitchInst::init(Value *Value, BasicBlock *Default, unsigned NumReserved) {
   assert(Value && Default && NumReserved);
   ReservedSpace = NumReserved;
-  NumOperands = 2;
-  OperandList = allocHungoffUses(ReservedSpace);
+  setNumHungOffUseOperands(2);
+  allocHungoffUses(ReservedSpace);
 
   Op<0>() = Value;
   Op<1>() = Default;
@@ -3328,8 +3288,9 @@ SwitchInst::SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
 SwitchInst::SwitchInst(const SwitchInst &SI)
   : TerminatorInst(SI.getType(), Instruction::Switch, nullptr, 0) {
   init(SI.getCondition(), SI.getDefaultDest(), SI.getNumOperands());
-  NumOperands = SI.getNumOperands();
-  Use *OL = OperandList, *InOL = SI.OperandList;
+  setNumHungOffUseOperands(SI.getNumOperands());
+  Use *OL = getOperandList();
+  const Use *InOL = SI.getOperandList();
   for (unsigned i = 2, E = SI.getNumOperands(); i != E; i += 2) {
     OL[i] = InOL[i];
     OL[i+1] = InOL[i+1];
@@ -3337,21 +3298,17 @@ SwitchInst::SwitchInst(const SwitchInst &SI)
   SubclassOptionalData = SI.SubclassOptionalData;
 }
 
-SwitchInst::~SwitchInst() {
-  dropHungoffUses();
-}
-
 
 /// addCase - Add an entry to the switch instruction...
 ///
 void SwitchInst::addCase(ConstantInt *OnVal, BasicBlock *Dest) {
-  unsigned NewCaseIdx = getNumCases(); 
-  unsigned OpNo = NumOperands;
+  unsigned NewCaseIdx = getNumCases();
+  unsigned OpNo = getNumOperands();
   if (OpNo+2 > ReservedSpace)
     growOperands();  // Get more space!
   // Initialize some new operands.
   assert(OpNo+1 < ReservedSpace && "Growing didn't work!");
-  NumOperands = OpNo+2;
+  setNumHungOffUseOperands(OpNo+2);
   CaseIt Case(this, NewCaseIdx);
   Case.setValue(OnVal);
   Case.setSuccessor(Dest);
@@ -3365,7 +3322,7 @@ void SwitchInst::removeCase(CaseIt i) {
   assert(2 + idx*2 < getNumOperands() && "Case index out of range!!!");
 
   unsigned NumOps = getNumOperands();
-  Use *OL = OperandList;
+  Use *OL = getOperandList();
 
   // Overwrite this case with the end of the list.
   if (2 + (idx + 1) * 2 != NumOps) {
@@ -3376,7 +3333,7 @@ void SwitchInst::removeCase(CaseIt i) {
   // Nuke the last value.
   OL[NumOps-2].set(nullptr);
   OL[NumOps-2+1].set(nullptr);
-  NumOperands = NumOps-2;
+  setNumHungOffUseOperands(NumOps-2);
 }
 
 /// growOperands - grow operands - This grows the operand list in response
@@ -3387,13 +3344,7 @@ void SwitchInst::growOperands() {
   unsigned NumOps = e*3;
 
   ReservedSpace = NumOps;
-  Use *NewOps = allocHungoffUses(NumOps);
-  Use *OldOps = OperandList;
-  for (unsigned i = 0; i != e; ++i) {
-      NewOps[i] = OldOps[i];
-  }
-  OperandList = NewOps;
-  Use::zap(OldOps, OldOps + e, true);
+  growHungoffUses(ReservedSpace);
 }
 
 
@@ -3415,9 +3366,9 @@ void IndirectBrInst::init(Value *Address, unsigned NumDests) {
   assert(Address && Address->getType()->isPointerTy() &&
          "Address of indirectbr must be a pointer");
   ReservedSpace = 1+NumDests;
-  NumOperands = 1;
-  OperandList = allocHungoffUses(ReservedSpace);
-  
+  setNumHungOffUseOperands(1);
+  allocHungoffUses(ReservedSpace);
+
   Op<0>() = Address;
 }
 
@@ -3430,12 +3381,7 @@ void IndirectBrInst::growOperands() {
   unsigned NumOps = e*2;
   
   ReservedSpace = NumOps;
-  Use *NewOps = allocHungoffUses(NumOps);
-  Use *OldOps = OperandList;
-  for (unsigned i = 0; i != e; ++i)
-    NewOps[i] = OldOps[i];
-  OperandList = NewOps;
-  Use::zap(OldOps, OldOps + e, true);
+  growHungoffUses(ReservedSpace);
 }
 
 IndirectBrInst::IndirectBrInst(Value *Address, unsigned NumCases,
@@ -3453,29 +3399,26 @@ IndirectBrInst::IndirectBrInst(Value *Address, unsigned NumCases,
 }
 
 IndirectBrInst::IndirectBrInst(const IndirectBrInst &IBI)
-  : TerminatorInst(Type::getVoidTy(IBI.getContext()), Instruction::IndirectBr,
-                   allocHungoffUses(IBI.getNumOperands()),
-                   IBI.getNumOperands()) {
-  Use *OL = OperandList, *InOL = IBI.OperandList;
+    : TerminatorInst(Type::getVoidTy(IBI.getContext()), Instruction::IndirectBr,
+                     nullptr, IBI.getNumOperands()) {
+  allocHungoffUses(IBI.getNumOperands());
+  Use *OL = getOperandList();
+  const Use *InOL = IBI.getOperandList();
   for (unsigned i = 0, E = IBI.getNumOperands(); i != E; ++i)
     OL[i] = InOL[i];
   SubclassOptionalData = IBI.SubclassOptionalData;
 }
 
-IndirectBrInst::~IndirectBrInst() {
-  dropHungoffUses();
-}
-
 /// addDestination - Add a destination.
 ///
 void IndirectBrInst::addDestination(BasicBlock *DestBB) {
-  unsigned OpNo = NumOperands;
+  unsigned OpNo = getNumOperands();
   if (OpNo+1 > ReservedSpace)
     growOperands();  // Get more space!
   // Initialize some new operands.
   assert(OpNo < ReservedSpace && "Growing didn't work!");
-  NumOperands = OpNo+1;
-  OperandList[OpNo] = DestBB;
+  setNumHungOffUseOperands(OpNo+1);
+  getOperandList()[OpNo] = DestBB;
 }
 
 /// removeDestination - This method removes the specified successor from the
@@ -3484,14 +3427,14 @@ void IndirectBrInst::removeDestination(unsigned idx) {
   assert(idx < getNumOperands()-1 && "Successor index out of range!");
   
   unsigned NumOps = getNumOperands();
-  Use *OL = OperandList;
+  Use *OL = getOperandList();
 
   // Replace this value with the last one.
   OL[idx+1] = OL[NumOps-1];
   
   // Nuke the last value.
   OL[NumOps-1].set(nullptr);
-  NumOperands = NumOps-1;
+  setNumHungOffUseOperands(NumOps-1);
 }
 
 BasicBlock *IndirectBrInst::getSuccessorV(unsigned idx) const {
diff --git a/contrib/llvm/lib/IR/LLVMContext.cpp b/contrib/llvm/lib/IR/LLVMContext.cpp
index 7bcd829..6d799e4 100644
--- a/contrib/llvm/lib/IR/LLVMContext.cpp
+++ b/contrib/llvm/lib/IR/LLVMContext.cpp
@@ -199,6 +199,20 @@ static bool isDiagnosticEnabled(const DiagnosticInfo &DI) {
   return true;
 }
 
+static const char *getDiagnosticMessagePrefix(DiagnosticSeverity Severity) {
+  switch (Severity) {
+  case DS_Error:
+    return "error";
+  case DS_Warning:
+    return "warning";
+  case DS_Remark:
+    return "remark";
+  case DS_Note:
+    return "note";
+  }
+  llvm_unreachable("Unknown DiagnosticSeverity");
+}
+
 void LLVMContext::diagnose(const DiagnosticInfo &DI) {
   // If there is a report handler, use it.
   if (pImpl->DiagnosticHandler) {
@@ -211,25 +225,12 @@ void LLVMContext::diagnose(const DiagnosticInfo &DI) {
     return;
 
   // Otherwise, print the message with a prefix based on the severity.
-  std::string MsgStorage;
-  raw_string_ostream Stream(MsgStorage);
-  DiagnosticPrinterRawOStream DP(Stream);
+  DiagnosticPrinterRawOStream DP(errs());
+  errs() << getDiagnosticMessagePrefix(DI.getSeverity()) << ": ";
   DI.print(DP);
-  Stream.flush();
-  switch (DI.getSeverity()) {
-  case DS_Error:
-    errs() << "error: " << MsgStorage << "\n";
+  errs() << "\n";
+  if (DI.getSeverity() == DS_Error)
     exit(1);
-  case DS_Warning:
-    errs() << "warning: " << MsgStorage << "\n";
-    break;
-  case DS_Remark:
-    errs() << "remark: " << MsgStorage << "\n";
-    break;
-  case DS_Note:
-    errs() << "note: " << MsgStorage << "\n";
-    break;
-  }
 }
 
 void LLVMContext::emitError(unsigned LocCookie, const Twine &ErrorStr) {
diff --git a/contrib/llvm/lib/IR/LLVMContextImpl.cpp b/contrib/llvm/lib/IR/LLVMContextImpl.cpp
index 1e20807..d3d2fcd 100644
--- a/contrib/llvm/lib/IR/LLVMContextImpl.cpp
+++ b/contrib/llvm/lib/IR/LLVMContextImpl.cpp
@@ -65,7 +65,7 @@ struct DropFirst {
     P.first->dropAllReferences();
   }
 };
-}
+} // namespace
 
 LLVMContextImpl::~LLVMContextImpl() {
   // NOTE: We need to delete the contents of OwnedModules, but Module's dtor
@@ -199,7 +199,7 @@ namespace llvm {
 /// does not cause MDOperand to be transparent.  In particular, a bare pointer
 /// doesn't get hashed before it's combined, whereas \a MDOperand would.
 static const Metadata *get_hashable_data(const MDOperand &X) { return X.get(); }
-}
+} // namespace llvm
 
 unsigned MDNodeOpsKey::calculateHash(MDNode *N, unsigned Offset) {
   unsigned Hash = hash_combine_range(N->op_begin() + Offset, N->op_end());
diff --git a/contrib/llvm/lib/IR/LLVMContextImpl.h b/contrib/llvm/lib/IR/LLVMContextImpl.h
index 3a57336..41a898b 100644
--- a/contrib/llvm/lib/IR/LLVMContextImpl.h
+++ b/contrib/llvm/lib/IR/LLVMContextImpl.h
@@ -1025,6 +1025,6 @@ public:
   void dropTriviallyDeadConstantArrays();
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/IR/LegacyPassManager.cpp b/contrib/llvm/lib/IR/LegacyPassManager.cpp
index 27d98a2..881d780 100644
--- a/contrib/llvm/lib/IR/LegacyPassManager.cpp
+++ b/contrib/llvm/lib/IR/LegacyPassManager.cpp
@@ -275,8 +275,8 @@ public:
 void FunctionPassManagerImpl::anchor() {}
 
 char FunctionPassManagerImpl::ID = 0;
-} // End of legacy namespace
-} // End of llvm namespace
+} // namespace legacy
+} // namespace llvm
 
 namespace {
 //===----------------------------------------------------------------------===//
@@ -439,8 +439,8 @@ public:
 void PassManagerImpl::anchor() {}
 
 char PassManagerImpl::ID = 0;
-} // End of legacy namespace
-} // End of llvm namespace
+} // namespace legacy
+} // namespace llvm
 
 namespace {
 
@@ -486,7 +486,7 @@ public:
   }
 };
 
-} // End of anon namespace
+} // namespace
 
 static TimingInfo *TheTimeInfo;
 
diff --git a/contrib/llvm/lib/IR/Metadata.cpp b/contrib/llvm/lib/IR/Metadata.cpp
index 75b4046..1abcf0d 100644
--- a/contrib/llvm/lib/IR/Metadata.cpp
+++ b/contrib/llvm/lib/IR/Metadata.cpp
@@ -381,20 +381,35 @@ StringRef MDString::getString() const {
 // MDNode implementation.
 //
 
+// Assert that the MDNode types will not be unaligned by the objects
+// prepended to them.
+#define HANDLE_MDNODE_LEAF(CLASS)                                              \
+  static_assert(                                                               \
+      llvm::AlignOf<uint64_t>::Alignment >= llvm::AlignOf<CLASS>::Alignment,   \
+      "Alignment is insufficient after objects prepended to " #CLASS);
+#include "llvm/IR/Metadata.def"
+
 void *MDNode::operator new(size_t Size, unsigned NumOps) {
-  void *Ptr = ::operator new(Size + NumOps * sizeof(MDOperand));
+  size_t OpSize = NumOps * sizeof(MDOperand);
+  // uint64_t is the most aligned type we need support (ensured by static_assert
+  // above)
+  OpSize = RoundUpToAlignment(OpSize, llvm::alignOf<uint64_t>());
+  void *Ptr = reinterpret_cast<char *>(::operator new(OpSize + Size)) + OpSize;
   MDOperand *O = static_cast<MDOperand *>(Ptr);
-  for (MDOperand *E = O + NumOps; O != E; ++O)
-    (void)new (O) MDOperand;
-  return O;
+  for (MDOperand *E = O - NumOps; O != E; --O)
+    (void)new (O - 1) MDOperand;
+  return Ptr;
 }
 
 void MDNode::operator delete(void *Mem) {
   MDNode *N = static_cast<MDNode *>(Mem);
+  size_t OpSize = N->NumOperands * sizeof(MDOperand);
+  OpSize = RoundUpToAlignment(OpSize, llvm::alignOf<uint64_t>());
+
   MDOperand *O = static_cast<MDOperand *>(Mem);
   for (MDOperand *E = O - N->NumOperands; O != E; --O)
     (O - 1)->~MDOperand();
-  ::operator delete(O);
+  ::operator delete(reinterpret_cast<char *>(Mem) - OpSize);
 }
 
 MDNode::MDNode(LLVMContext &Context, unsigned ID, StorageType Storage,
diff --git a/contrib/llvm/lib/IR/Operator.cpp b/contrib/llvm/lib/IR/Operator.cpp
index 77dc680..bea1f80 100644
--- a/contrib/llvm/lib/IR/Operator.cpp
+++ b/contrib/llvm/lib/IR/Operator.cpp
@@ -41,4 +41,4 @@ bool GEPOperator::accumulateConstantOffset(const DataLayout &DL,
   }
   return true;
 }
-}
+} // namespace llvm
diff --git a/contrib/llvm/lib/IR/Pass.cpp b/contrib/llvm/lib/IR/Pass.cpp
index df45460..2fa1e7c 100644
--- a/contrib/llvm/lib/IR/Pass.cpp
+++ b/contrib/llvm/lib/IR/Pass.cpp
@@ -249,7 +249,7 @@ namespace {
         CFGOnlyList.push_back(P->getTypeInfo());
     }
   };
-}
+} // namespace
 
 // setPreservesCFG - This function should be called to by the pass, iff they do
 // not:
diff --git a/contrib/llvm/lib/IR/SymbolTableListTraitsImpl.h b/contrib/llvm/lib/IR/SymbolTableListTraitsImpl.h
index a18f982..f94def7 100644
--- a/contrib/llvm/lib/IR/SymbolTableListTraitsImpl.h
+++ b/contrib/llvm/lib/IR/SymbolTableListTraitsImpl.h
@@ -113,6 +113,6 @@ void SymbolTableListTraits<ValueSubClass,ItemParentClass>
   }
 }
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/IR/TypeFinder.cpp b/contrib/llvm/lib/IR/TypeFinder.cpp
index 1d2b808..7accc5b 100644
--- a/contrib/llvm/lib/IR/TypeFinder.cpp
+++ b/contrib/llvm/lib/IR/TypeFinder.cpp
@@ -50,6 +50,9 @@ void TypeFinder::run(const Module &M, bool onlyNamed) {
     if (FI->hasPrologueData())
       incorporateValue(FI->getPrologueData());
 
+    if (FI->hasPersonalityFn())
+      incorporateValue(FI->getPersonalityFn());
+
     // First incorporate the arguments.
     for (Function::const_arg_iterator AI = FI->arg_begin(),
            AE = FI->arg_end(); AI != AE; ++AI)
diff --git a/contrib/llvm/lib/IR/Use.cpp b/contrib/llvm/lib/IR/Use.cpp
index cae845d..fd06fdb 100644
--- a/contrib/llvm/lib/IR/Use.cpp
+++ b/contrib/llvm/lib/IR/Use.cpp
@@ -124,4 +124,4 @@ const Use *Use::getImpliedUser() const {
   }
 }
 
-} // End llvm namespace
+} // namespace llvm
diff --git a/contrib/llvm/lib/IR/User.cpp b/contrib/llvm/lib/IR/User.cpp
index ee83eacf..21f4849 100644
--- a/contrib/llvm/lib/IR/User.cpp
+++ b/contrib/llvm/lib/IR/User.cpp
@@ -13,6 +13,7 @@
 #include "llvm/IR/Operator.h"
 
 namespace llvm {
+class BasicBlock;
 
 //===----------------------------------------------------------------------===//
 //                                 User Class
@@ -39,41 +40,100 @@ void User::replaceUsesOfWith(Value *From, Value *To) {
 //                         User allocHungoffUses Implementation
 //===----------------------------------------------------------------------===//
 
-Use *User::allocHungoffUses(unsigned N) const {
+void User::allocHungoffUses(unsigned N, bool IsPhi) {
+  assert(HasHungOffUses && "alloc must have hung off uses");
+
+  static_assert(AlignOf<Use>::Alignment >= AlignOf<Use::UserRef>::Alignment,
+                "Alignment is insufficient for 'hung-off-uses' pieces");
+  static_assert(AlignOf<Use::UserRef>::Alignment >=
+                    AlignOf<BasicBlock *>::Alignment,
+                "Alignment is insufficient for 'hung-off-uses' pieces");
+
   // Allocate the array of Uses, followed by a pointer (with bottom bit set) to
   // the User.
   size_t size = N * sizeof(Use) + sizeof(Use::UserRef);
+  if (IsPhi)
+    size += N * sizeof(BasicBlock *);
   Use *Begin = static_cast<Use*>(::operator new(size));
   Use *End = Begin + N;
   (void) new(End) Use::UserRef(const_cast<User*>(this), 1);
-  return Use::initTags(Begin, End);
+  setOperandList(Use::initTags(Begin, End));
+}
+
+void User::growHungoffUses(unsigned NewNumUses, bool IsPhi) {
+  assert(HasHungOffUses && "realloc must have hung off uses");
+
+  unsigned OldNumUses = getNumOperands();
+
+  // We don't support shrinking the number of uses.  We wouldn't have enough
+  // space to copy the old uses in to the new space.
+  assert(NewNumUses > OldNumUses && "realloc must grow num uses");
+
+  Use *OldOps = getOperandList();
+  allocHungoffUses(NewNumUses, IsPhi);
+  Use *NewOps = getOperandList();
+
+  // Now copy from the old operands list to the new one.
+  std::copy(OldOps, OldOps + OldNumUses, NewOps);
+
+  // If this is a Phi, then we need to copy the BB pointers too.
+  if (IsPhi) {
+    auto *OldPtr =
+        reinterpret_cast<char *>(OldOps + OldNumUses) + sizeof(Use::UserRef);
+    auto *NewPtr =
+        reinterpret_cast<char *>(NewOps + NewNumUses) + sizeof(Use::UserRef);
+    std::copy(OldPtr, OldPtr + (OldNumUses * sizeof(BasicBlock *)), NewPtr);
+  }
+  Use::zap(OldOps, OldOps + OldNumUses, true);
 }
 
 //===----------------------------------------------------------------------===//
 //                         User operator new Implementations
 //===----------------------------------------------------------------------===//
 
-void *User::operator new(size_t s, unsigned Us) {
-  void *Storage = ::operator new(s + sizeof(Use) * Us);
+void *User::operator new(size_t Size, unsigned Us) {
+  assert(Us < (1u << NumUserOperandsBits) && "Too many operands");
+  void *Storage = ::operator new(Size + sizeof(Use) * Us);
   Use *Start = static_cast<Use*>(Storage);
   Use *End = Start + Us;
   User *Obj = reinterpret_cast<User*>(End);
-  Obj->OperandList = Start;
-  Obj->NumOperands = Us;
+  Obj->NumUserOperands = Us;
+  Obj->HasHungOffUses = false;
   Use::initTags(Start, End);
   return Obj;
 }
 
+void *User::operator new(size_t Size) {
+  // Allocate space for a single Use*
+  void *Storage = ::operator new(Size + sizeof(Use *));
+  Use **HungOffOperandList = static_cast<Use **>(Storage);
+  User *Obj = reinterpret_cast<User *>(HungOffOperandList + 1);
+  Obj->NumUserOperands = 0;
+  Obj->HasHungOffUses = true;
+  *HungOffOperandList = nullptr;
+  return Obj;
+}
+
 //===----------------------------------------------------------------------===//
 //                         User operator delete Implementation
 //===----------------------------------------------------------------------===//
 
 void User::operator delete(void *Usr) {
-  User *Start = static_cast<User*>(Usr);
-  Use *Storage = static_cast<Use*>(Usr) - Start->NumOperands;
-  // If there were hung-off uses, they will have been freed already and
-  // NumOperands reset to 0, so here we just free the User itself.
-  ::operator delete(Storage);
+  // Hung off uses use a single Use* before the User, while other subclasses
+  // use a Use[] allocated prior to the user.
+  User *Obj = static_cast<User *>(Usr);
+  if (Obj->HasHungOffUses) {
+    Use **HungOffOperandList = static_cast<Use **>(Usr) - 1;
+    // drop the hung off uses.
+    Use::zap(*HungOffOperandList, *HungOffOperandList + Obj->NumUserOperands,
+             /* Delete */ true);
+    ::operator delete(HungOffOperandList);
+  } else {
+    Use *Storage = static_cast<Use *>(Usr) - Obj->NumUserOperands;
+    Use::zap(Storage, Storage + Obj->NumUserOperands,
+             /* Delete */ false);
+    ::operator delete(Storage);
+  }
 }
 
 //===----------------------------------------------------------------------===//
@@ -84,4 +144,4 @@ Operator::~Operator() {
   llvm_unreachable("should never destroy an Operator");
 }
 
-} // End llvm namespace
+} // namespace llvm
diff --git a/contrib/llvm/lib/IR/Value.cpp b/contrib/llvm/lib/IR/Value.cpp
index dcf0ad5..eb5c225 100644
--- a/contrib/llvm/lib/IR/Value.cpp
+++ b/contrib/llvm/lib/IR/Value.cpp
@@ -39,6 +39,7 @@ using namespace llvm;
 //===----------------------------------------------------------------------===//
 //                                Value Class
 //===----------------------------------------------------------------------===//
+const unsigned Value::NumUserOperandsBits;
 
 static inline Type *checkType(Type *Ty) {
   assert(Ty && "Value defined with a null type: Error!");
@@ -48,7 +49,7 @@ static inline Type *checkType(Type *Ty) {
 Value::Value(Type *ty, unsigned scid)
     : VTy(checkType(ty)), UseList(nullptr), SubclassID(scid),
       HasValueHandle(0), SubclassOptionalData(0), SubclassData(0),
-      NumOperands(0), IsUsedByMD(false), HasName(false) {
+      NumUserOperands(0), IsUsedByMD(false), HasName(false) {
   // FIXME: Why isn't this in the subclass gunk??
   // Note, we cannot call isa<CallInst> before the CallInst has been
   // constructed.
diff --git a/contrib/llvm/lib/IR/Verifier.cpp b/contrib/llvm/lib/IR/Verifier.cpp
index 5ed137a..19b11b4 100644
--- a/contrib/llvm/lib/IR/Verifier.cpp
+++ b/contrib/llvm/lib/IR/Verifier.cpp
@@ -181,11 +181,6 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
   /// \brief Track unresolved string-based type references.
   SmallDenseMap<const MDString *, const MDNode *, 32> UnresolvedTypeRefs;
 
-  /// \brief The personality function referenced by the LandingPadInsts.
-  /// All LandingPadInsts within the same function must use the same
-  /// personality function.
-  const Value *PersonalityFn;
-
   /// \brief Whether we've seen a call to @llvm.frameescape in this function
   /// already.
   bool SawFrameEscape;
@@ -196,8 +191,7 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
 
 public:
   explicit Verifier(raw_ostream &OS)
-      : VerifierSupport(OS), Context(nullptr), PersonalityFn(nullptr),
-        SawFrameEscape(false) {}
+      : VerifierSupport(OS), Context(nullptr), SawFrameEscape(false) {}
 
   bool verify(const Function &F) {
     M = F.getParent();
@@ -231,7 +225,6 @@ public:
     // FIXME: We strip const here because the inst visitor strips const.
     visit(const_cast<Function &>(F));
     InstsInThisBlock.clear();
-    PersonalityFn = nullptr;
     SawFrameEscape = false;
 
     return !Broken;
@@ -584,7 +577,6 @@ void Verifier::visitAliaseeSubExpr(SmallPtrSetImpl<const GlobalAlias*> &Visited,
 }
 
 void Verifier::visitGlobalAlias(const GlobalAlias &GA) {
-  Assert(!GA.getName().empty(), "Alias name cannot be empty!", &GA);
   Assert(GlobalAlias::isValidLinkage(GA.getLinkage()),
          "Alias should have private, internal, linkonce, weak, linkonce_odr, "
          "weak_odr, or external linkage!",
@@ -1086,7 +1078,7 @@ void Verifier::visitDIExpression(const DIExpression &N) {
 void Verifier::visitDIObjCProperty(const DIObjCProperty &N) {
   Assert(N.getTag() == dwarf::DW_TAG_APPLE_property, "invalid tag", &N);
   if (auto *T = N.getRawType())
-    Assert(isa<DIType>(T), "invalid type ref", &N, T);
+    Assert(isTypeRef(N, T), "invalid type ref", &N, T);
   if (auto *F = N.getRawFile())
     Assert(isa<DIFile>(F), "invalid file", &N, F);
 }
@@ -1251,6 +1243,7 @@ void Verifier::VerifyAttributeTypes(AttributeSet Attrs, unsigned Idx,
         I->getKindAsEnum() == Attribute::StackProtect ||
         I->getKindAsEnum() == Attribute::StackProtectReq ||
         I->getKindAsEnum() == Attribute::StackProtectStrong ||
+        I->getKindAsEnum() == Attribute::SafeStack ||
         I->getKindAsEnum() == Attribute::NoRedZone ||
         I->getKindAsEnum() == Attribute::NoImplicitFloat ||
         I->getKindAsEnum() == Attribute::Naked ||
@@ -1757,6 +1750,8 @@ void Verifier::visitFunction(const Function &F) {
            "invalid linkage type for function declaration", &F);
     Assert(MDs.empty(), "function without a body cannot have metadata", &F,
            MDs.empty() ? nullptr : MDs.front().second);
+    Assert(!F.hasPersonalityFn(),
+           "Function declaration shouldn't have a personality routine", &F);
   } else {
     // Verify that this function (which has a body) is not named "llvm.*".  It
     // is not legal to define intrinsics.
@@ -2795,22 +2790,16 @@ void Verifier::visitLandingPadInst(LandingPadInst &LPI) {
            &LPI);
   }
 
+  Function *F = LPI.getParent()->getParent();
+  Assert(F->hasPersonalityFn(),
+         "LandingPadInst needs to be in a function with a personality.", &LPI);
+
   // The landingpad instruction must be the first non-PHI instruction in the
   // block.
   Assert(LPI.getParent()->getLandingPadInst() == &LPI,
          "LandingPadInst not the first non-PHI instruction in the block.",
          &LPI);
 
-  // The personality functions for all landingpad instructions within the same
-  // function should match.
-  if (PersonalityFn)
-    Assert(LPI.getPersonalityFn() == PersonalityFn,
-           "Personality function doesn't match others in function", &LPI);
-  PersonalityFn = LPI.getPersonalityFn();
-
-  // All operands must be constants.
-  Assert(isa<Constant>(PersonalityFn), "Personality function is not constant!",
-         &LPI);
   for (unsigned i = 0, e = LPI.getNumClauses(); i < e; ++i) {
     Constant *Clause = LPI.getClause(i);
     if (LPI.isCatch(i)) {
@@ -3702,7 +3691,7 @@ struct VerifierLegacyPass : public FunctionPass {
     AU.setPreservesAll();
   }
 };
-}
+} // namespace
 
 char VerifierLegacyPass::ID = 0;
 INITIALIZE_PASS(VerifierLegacyPass, "verify", "Module Verifier", false, false)
diff --git a/contrib/llvm/lib/IRReader/IRReader.cpp b/contrib/llvm/lib/IRReader/IRReader.cpp
index 7bc6f07..43fee65 100644
--- a/contrib/llvm/lib/IRReader/IRReader.cpp
+++ b/contrib/llvm/lib/IRReader/IRReader.cpp
@@ -34,14 +34,14 @@ getLazyIRModule(std::unique_ptr<MemoryBuffer> Buffer, SMDiagnostic &Err,
                 LLVMContext &Context) {
   if (isBitcode((const unsigned char *)Buffer->getBufferStart(),
                 (const unsigned char *)Buffer->getBufferEnd())) {
-    ErrorOr<Module *> ModuleOrErr =
+    ErrorOr<std::unique_ptr<Module>> ModuleOrErr =
         getLazyBitcodeModule(std::move(Buffer), Context);
     if (std::error_code EC = ModuleOrErr.getError()) {
       Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error,
                          EC.message());
       return nullptr;
     }
-    return std::unique_ptr<Module>(ModuleOrErr.get());
+    return std::move(ModuleOrErr.get());
   }
 
   return parseAssembly(Buffer->getMemBufferRef(), Err, Context);
@@ -67,13 +67,14 @@ std::unique_ptr<Module> llvm::parseIR(MemoryBufferRef Buffer, SMDiagnostic &Err,
                      TimePassesIsEnabled);
   if (isBitcode((const unsigned char *)Buffer.getBufferStart(),
                 (const unsigned char *)Buffer.getBufferEnd())) {
-    ErrorOr<Module *> ModuleOrErr = parseBitcodeFile(Buffer, Context);
+    ErrorOr<std::unique_ptr<Module>> ModuleOrErr =
+        parseBitcodeFile(Buffer, Context);
     if (std::error_code EC = ModuleOrErr.getError()) {
       Err = SMDiagnostic(Buffer.getBufferIdentifier(), SourceMgr::DK_Error,
                          EC.message());
       return nullptr;
     }
-    return std::unique_ptr<Module>(ModuleOrErr.get());
+    return std::move(ModuleOrErr.get());
   }
 
   return parseAssembly(Buffer, Err, Context);
diff --git a/contrib/llvm/lib/LTO/LTOModule.cpp b/contrib/llvm/lib/LTO/LTOModule.cpp
index 5cdbca6..bbb3b6d 100644
--- a/contrib/llvm/lib/LTO/LTOModule.cpp
+++ b/contrib/llvm/lib/LTO/LTOModule.cpp
@@ -147,9 +147,10 @@ LTOModule *LTOModule::createInContext(const void *mem, size_t length,
   return makeLTOModule(Buffer, options, errMsg, Context);
 }
 
-static Module *parseBitcodeFileImpl(MemoryBufferRef Buffer,
-                                    LLVMContext &Context, bool ShouldBeLazy,
-                                    std::string &ErrMsg) {
+static std::unique_ptr<Module> parseBitcodeFileImpl(MemoryBufferRef Buffer,
+                                                    LLVMContext &Context,
+                                                    bool ShouldBeLazy,
+                                                    std::string &ErrMsg) {
 
   // Find the buffer.
   ErrorOr<MemoryBufferRef> MBOrErr =
@@ -168,22 +169,22 @@ static Module *parseBitcodeFileImpl(MemoryBufferRef Buffer,
 
   if (!ShouldBeLazy) {
     // Parse the full file.
-    ErrorOr<Module *> M =
+    ErrorOr<std::unique_ptr<Module>> M =
         parseBitcodeFile(*MBOrErr, Context, DiagnosticHandler);
     if (!M)
       return nullptr;
-    return *M;
+    return std::move(*M);
   }
 
   // Parse lazily.
   std::unique_ptr<MemoryBuffer> LightweightBuf =
       MemoryBuffer::getMemBuffer(*MBOrErr, false);
-  ErrorOr<Module *> M = getLazyBitcodeModule(std::move(LightweightBuf), Context,
-                                             DiagnosticHandler,
-                                             true/*ShouldLazyLoadMetadata*/);
+  ErrorOr<std::unique_ptr<Module>> M =
+      getLazyBitcodeModule(std::move(LightweightBuf), Context,
+                           DiagnosticHandler, true /*ShouldLazyLoadMetadata*/);
   if (!M)
     return nullptr;
-  return *M;
+  return std::move(*M);
 }
 
 LTOModule *LTOModule::makeLTOModule(MemoryBufferRef Buffer,
@@ -197,9 +198,9 @@ LTOModule *LTOModule::makeLTOModule(MemoryBufferRef Buffer,
 
   // If we own a context, we know this is being used only for symbol
   // extraction, not linking.  Be lazy in that case.
-  std::unique_ptr<Module> M(parseBitcodeFileImpl(
+  std::unique_ptr<Module> M = parseBitcodeFileImpl(
       Buffer, *Context,
-      /* ShouldBeLazy */ static_cast<bool>(OwnedContext), errMsg));
+      /* ShouldBeLazy */ static_cast<bool>(OwnedContext), errMsg);
   if (!M)
     return nullptr;
 
@@ -468,6 +469,9 @@ void LTOModule::addDefinedSymbol(const char *Name, const GlobalValue *def,
   else
     attr |= LTO_SYMBOL_SCOPE_DEFAULT;
 
+  if (def->hasComdat())
+    attr |= LTO_SYMBOL_COMDAT;
+
   auto Iter = _defines.insert(Name).first;
 
   // fill information structure
diff --git a/contrib/llvm/lib/LibDriver/LibDriver.cpp b/contrib/llvm/lib/LibDriver/LibDriver.cpp
new file mode 100644
index 0000000..c9857b04
--- /dev/null
+++ b/contrib/llvm/lib/LibDriver/LibDriver.cpp
@@ -0,0 +1,157 @@
+//===- LibDriver.cpp - lib.exe-compatible driver --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines an interface to a lib.exe-compatible driver that also understands
+// bitcode files. Used by llvm-lib and lld-link2 /lib.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/LibDriver/LibDriver.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Object/ArchiveWriter.h"
+#include "llvm/Option/Arg.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Option/Option.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/StringSaver.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+enum {
+  OPT_INVALID = 0,
+#define OPTION(_1, _2, ID, _4, _5, _6, _7, _8, _9, _10, _11) OPT_##ID,
+#include "Options.inc"
+#undef OPTION
+};
+
+#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE;
+#include "Options.inc"
+#undef PREFIX
+
+static const llvm::opt::OptTable::Info infoTable[] = {
+#define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X6, X7, X8, X9, X10)    \
+  {                                                                    \
+    X1, X2, X9, X10, OPT_##ID, llvm::opt::Option::KIND##Class, X8, X7, \
+    OPT_##GROUP, OPT_##ALIAS, X6                                       \
+  },
+#include "Options.inc"
+#undef OPTION
+};
+
+class LibOptTable : public llvm::opt::OptTable {
+public:
+  LibOptTable() : OptTable(infoTable, llvm::array_lengthof(infoTable), true) {}
+};
+
+} // namespace
+
+static std::string getOutputPath(llvm::opt::InputArgList *Args) {
+  if (auto *Arg = Args->getLastArg(OPT_out))
+    return Arg->getValue();
+  for (auto *Arg : Args->filtered(OPT_INPUT)) {
+    if (!StringRef(Arg->getValue()).endswith_lower(".obj"))
+      continue;
+    SmallString<128> Val = StringRef(Arg->getValue());
+    llvm::sys::path::replace_extension(Val, ".lib");
+    return Val.str();
+  }
+  llvm_unreachable("internal error");
+}
+
+static std::vector<StringRef> getSearchPaths(llvm::opt::InputArgList *Args,
+                                             StringSaver &Saver) {
+  std::vector<StringRef> Ret;
+  // Add current directory as first item of the search path.
+  Ret.push_back("");
+
+  // Add /libpath flags.
+  for (auto *Arg : Args->filtered(OPT_libpath))
+    Ret.push_back(Arg->getValue());
+
+  // Add $LIB.
+  Optional<std::string> EnvOpt = sys::Process::GetEnv("LIB");
+  if (!EnvOpt.hasValue())
+    return Ret;
+  StringRef Env = Saver.save(*EnvOpt);
+  while (!Env.empty()) {
+    StringRef Path;
+    std::tie(Path, Env) = Env.split(';');
+    Ret.push_back(Path);
+  }
+  return Ret;
+}
+
+static Optional<std::string> findInputFile(StringRef File,
+                                           ArrayRef<StringRef> Paths) {
+  for (auto Dir : Paths) {
+    SmallString<128> Path = Dir;
+    sys::path::append(Path, File);
+    if (sys::fs::exists(Path))
+      return Path.str().str();
+  }
+  return Optional<std::string>();
+}
+
+int llvm::libDriverMain(int Argc, const char **Argv) {
+  SmallVector<const char *, 20> NewArgv(Argv, Argv + Argc);
+  BumpPtrAllocator Alloc;
+  BumpPtrStringSaver Saver(Alloc);
+  cl::ExpandResponseFiles(Saver, cl::TokenizeWindowsCommandLine, NewArgv);
+  Argv = &NewArgv[0];
+  Argc = static_cast<int>(NewArgv.size());
+
+  LibOptTable Table;
+  unsigned MissingIndex;
+  unsigned MissingCount;
+  std::unique_ptr<llvm::opt::InputArgList> Args(
+      Table.ParseArgs(&Argv[1], &Argv[Argc], MissingIndex, MissingCount));
+  if (MissingCount) {
+    llvm::errs() << "missing arg value for \""
+                 << Args->getArgString(MissingIndex)
+                 << "\", expected " << MissingCount
+                 << (MissingCount == 1 ? " argument.\n" : " arguments.\n");
+    return 1;
+  }
+  for (auto *Arg : Args->filtered(OPT_UNKNOWN))
+    llvm::errs() << "ignoring unknown argument: " << Arg->getSpelling() << "\n";
+
+  if (Args->filtered_begin(OPT_INPUT) == Args->filtered_end()) {
+    llvm::errs() << "no input files.\n";
+    return 1;
+  }
+
+  std::vector<StringRef> SearchPaths = getSearchPaths(Args.get(), Saver);
+
+  std::vector<llvm::NewArchiveIterator> Members;
+  for (auto *Arg : Args->filtered(OPT_INPUT)) {
+    Optional<std::string> Path = findInputFile(Arg->getValue(), SearchPaths);
+    if (!Path.hasValue()) {
+      llvm::errs() << Arg->getValue() << ": no such file or directory\n";
+      return 1;
+    }
+    Members.emplace_back(Saver.save(*Path),
+                         llvm::sys::path::filename(Arg->getValue()));
+  }
+
+  std::pair<StringRef, std::error_code> Result = llvm::writeArchive(
+      getOutputPath(Args.get()), Members, /*WriteSymtab=*/true);
+  if (Result.second) {
+    if (Result.first.empty())
+      Result.first = Argv[0];
+    llvm::errs() << Result.first << ": " << Result.second.message() << "\n";
+    return 1;
+  }
+
+  return 0;
+}
diff --git a/contrib/llvm/lib/LibDriver/Options.td b/contrib/llvm/lib/LibDriver/Options.td
new file mode 100644
index 0000000..0aa1aff
--- /dev/null
+++ b/contrib/llvm/lib/LibDriver/Options.td
@@ -0,0 +1,23 @@
+include "llvm/Option/OptParser.td"
+
+// lib.exe accepts options starting with either a dash or a slash.
+
+// Flag that takes no arguments.
+class F<string name> : Flag<["/", "-", "-?"], name>;
+
+// Flag that takes one argument after ":".
+class P<string name, string help> :
+      Joined<["/", "-", "-?"], name#":">, HelpText<help>;
+
+def libpath: P<"libpath", "Object file search path">;
+def out    : P<"out", "Path to file to write output">;
+
+//==============================================================================
+// The flags below do nothing. They are defined only for lib.exe compatibility.
+//==============================================================================
+
+class QF<string name> : Joined<["/", "-", "-?"], name#":">;
+
+def ignore : QF<"ignore">;
+def machine: QF<"machine">;
+def nologo : F<"nologo">;
diff --git a/contrib/llvm/lib/Linker/LinkModules.cpp b/contrib/llvm/lib/Linker/LinkModules.cpp
index 1b7a331..f80f6bc 100644
--- a/contrib/llvm/lib/Linker/LinkModules.cpp
+++ b/contrib/llvm/lib/Linker/LinkModules.cpp
@@ -99,7 +99,7 @@ private:
 
   bool areTypesIsomorphic(Type *DstTy, Type *SrcTy);
 };
-}
+} // namespace
 
 void TypeMapTy::addTypeMapping(Type *DstTy, Type *SrcTy) {
   assert(SpeculativeTypes.empty());
@@ -507,7 +507,7 @@ private:
   void linkNamedMDNodes();
   void stripReplacedSubprograms();
 };
-}
+} // namespace
 
 /// The LLVM SymbolTable class autorenames globals that conflict in the symbol
 /// table. This is good for all clients except for us. Go through the trouble
@@ -1194,6 +1194,11 @@ bool ModuleLinker::linkFunctionBody(Function &Dst, Function &Src) {
     Dst.setPrologueData(MapValue(Src.getPrologueData(), ValueMap, RF_None,
                                  &TypeMap, &ValMaterializer));
 
+  // Link in the personality function.
+  if (Src.hasPersonalityFn())
+    Dst.setPersonalityFn(MapValue(Src.getPersonalityFn(), ValueMap, RF_None,
+                                  &TypeMap, &ValMaterializer));
+
   // Go through and convert function arguments over, remembering the mapping.
   Function::arg_iterator DI = Dst.arg_begin();
   for (Argument &Arg : Src.args()) {
@@ -1254,15 +1259,15 @@ bool ModuleLinker::linkGlobalValueBody(GlobalValue &Src) {
 /// Insert all of the named MDNodes in Src into the Dest module.
 void ModuleLinker::linkNamedMDNodes() {
   const NamedMDNode *SrcModFlags = SrcM->getModuleFlagsMetadata();
-  for (Module::const_named_metadata_iterator I = SrcM->named_metadata_begin(),
-       E = SrcM->named_metadata_end(); I != E; ++I) {
+  for (const NamedMDNode &NMD : SrcM->named_metadata()) {
     // Don't link module flags here. Do them separately.
-    if (&*I == SrcModFlags) continue;
-    NamedMDNode *DestNMD = DstM->getOrInsertNamedMetadata(I->getName());
+    if (&NMD == SrcModFlags)
+      continue;
+    NamedMDNode *DestNMD = DstM->getOrInsertNamedMetadata(NMD.getName());
     // Add Src elements into Dest node.
-    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
-      DestNMD->addOperand(MapMetadata(I->getOperand(i), ValueMap, RF_None,
-                                      &TypeMap, &ValMaterializer));
+    for (const MDNode *op : NMD.operands())
+      DestNMD->addOperand(
+          MapMetadata(op, ValueMap, RF_None, &TypeMap, &ValMaterializer));
   }
 }
 
@@ -1542,9 +1547,8 @@ bool ModuleLinker::run() {
 
   // Insert all of the globals in src into the DstM module... without linking
   // initializers (which could refer to functions not yet mapped over).
-  for (Module::global_iterator I = SrcM->global_begin(),
-       E = SrcM->global_end(); I != E; ++I)
-    if (linkGlobalValueProto(I))
+  for (GlobalVariable &GV : SrcM->globals())
+    if (linkGlobalValueProto(&GV))
       return true;
 
   // Link the functions together between the two modules, without doing function
@@ -1552,18 +1556,17 @@ bool ModuleLinker::run() {
   // function...  We do this so that when we begin processing function bodies,
   // all of the global values that may be referenced are available in our
   // ValueMap.
-  for (Module::iterator I = SrcM->begin(), E = SrcM->end(); I != E; ++I)
-    if (linkGlobalValueProto(I))
+  for (Function &F :*SrcM)
+    if (linkGlobalValueProto(&F))
       return true;
 
   // If there were any aliases, link them now.
-  for (Module::alias_iterator I = SrcM->alias_begin(),
-       E = SrcM->alias_end(); I != E; ++I)
-    if (linkGlobalValueProto(I))
+  for (GlobalAlias &GA : SrcM->aliases())
+    if (linkGlobalValueProto(&GA))
       return true;
 
-  for (unsigned i = 0, e = AppendingVars.size(); i != e; ++i)
-    linkAppendingVarInit(AppendingVars[i]);
+  for (const AppendingVarInfo &AppendingVar : AppendingVars)
+    linkAppendingVarInit(AppendingVar);
 
   for (const auto &Entry : DstM->getComdatSymbolTable()) {
     const Comdat &C = Entry.getValue();
@@ -1802,7 +1805,9 @@ LLVMBool LLVMLinkModules(LLVMModuleRef Dest, LLVMModuleRef Src,
   LLVMBool Result = Linker::LinkModules(
       D, unwrap(Src), [&](const DiagnosticInfo &DI) { DI.print(DP); });
 
-  if (OutMessages && Result)
+  if (OutMessages && Result) {
+    Stream.flush();
     *OutMessages = strdup(Message.c_str());
+  }
   return Result;
 }
diff --git a/contrib/llvm/lib/MC/ELFObjectWriter.cpp b/contrib/llvm/lib/MC/ELFObjectWriter.cpp
index 0765937..c9df8fc 100644
--- a/contrib/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/contrib/llvm/lib/MC/ELFObjectWriter.cpp
@@ -144,6 +144,7 @@ class ELFObjectWriter : public MCObjectWriter {
       Renames.clear();
       Relocations.clear();
       StrTabBuilder.clear();
+      SymtabShndxSectionIndex = 0;
       SectionTable.clear();
       MCObjectWriter::reset();
     }
@@ -231,7 +232,7 @@ class ELFObjectWriter : public MCObjectWriter {
                       uint32_t GroupSymbolIndex, uint64_t Offset, uint64_t Size,
                       const MCSectionELF &Section);
   };
-}
+} // namespace
 
 void ELFObjectWriter::align(unsigned Alignment) {
   uint64_t Padding = OffsetToAlignment(OS.tell(), Alignment);
diff --git a/contrib/llvm/lib/MC/MCAsmStreamer.cpp b/contrib/llvm/lib/MC/MCAsmStreamer.cpp
index 0f405ad..9a65a31 100644
--- a/contrib/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCAsmStreamer.cpp
@@ -1308,7 +1308,10 @@ void MCAsmStreamer::EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &S
     GetCommentOS() << "\n";
   }
 
-  InstPrinter->printInst(&Inst, OS, "", STI);
+  if(getTargetStreamer())
+    getTargetStreamer()->prettyPrintAsm(*InstPrinter, OS, Inst, STI);
+  else
+    InstPrinter->printInst(&Inst, OS, "", STI);
 
   EmitEOL();
 }
diff --git a/contrib/llvm/lib/MC/MCAssembler.cpp b/contrib/llvm/lib/MC/MCAssembler.cpp
index 55f5009..34211aa 100644
--- a/contrib/llvm/lib/MC/MCAssembler.cpp
+++ b/contrib/llvm/lib/MC/MCAssembler.cpp
@@ -54,8 +54,8 @@ STATISTIC(FragmentLayouts, "Number of fragment layouts");
 STATISTIC(ObjectBytes, "Number of emitted object file bytes");
 STATISTIC(RelaxationSteps, "Number of assembler layout and relaxation steps");
 STATISTIC(RelaxedInstructions, "Number of relaxed instructions");
-}
-}
+} // namespace stats
+} // namespace
 
 // FIXME FIXME FIXME: There are number of places in this file where we convert
 // what is a 64-bit assembler value used for computation into a value in the
@@ -262,26 +262,64 @@ uint64_t llvm::computeBundlePadding(const MCAssembler &Assembler,
 
 /* *** */
 
-MCFragment::MCFragment() : Kind(FragmentType(~0)) {
+void ilist_node_traits<MCFragment>::deleteNode(MCFragment *V) {
+  V->destroy();
 }
 
-MCFragment::~MCFragment() {
+MCFragment::MCFragment() : Kind(FragmentType(~0)), HasInstructions(false),
+                           AlignToBundleEnd(false), BundlePadding(0) {
 }
 
-MCFragment::MCFragment(FragmentType Kind, MCSection *Parent)
-    : Kind(Kind), Parent(Parent), Atom(nullptr), Offset(~UINT64_C(0)) {
+MCFragment::~MCFragment() { }
+
+MCFragment::MCFragment(FragmentType Kind, bool HasInstructions,
+                       uint8_t BundlePadding, MCSection *Parent)
+    : Kind(Kind), HasInstructions(HasInstructions), AlignToBundleEnd(false),
+      BundlePadding(BundlePadding), Parent(Parent), Atom(nullptr),
+      Offset(~UINT64_C(0)) {
   if (Parent)
     Parent->getFragmentList().push_back(this);
 }
 
-/* *** */
-
-MCEncodedFragment::~MCEncodedFragment() {
-}
-
-/* *** */
+void MCFragment::destroy() {
+  // First check if we are the sentinal.
+  if (Kind == FragmentType(~0)) {
+    delete this;
+    return;
+  }
 
-MCEncodedFragmentWithFixups::~MCEncodedFragmentWithFixups() {
+  switch (Kind) {
+    case FT_Align:
+      delete cast<MCAlignFragment>(this);
+      return;
+    case FT_Data:
+      delete cast<MCDataFragment>(this);
+      return;
+    case FT_CompactEncodedInst:
+      delete cast<MCCompactEncodedInstFragment>(this);
+      return;
+    case FT_Fill:
+      delete cast<MCFillFragment>(this);
+      return;
+    case FT_Relaxable:
+      delete cast<MCRelaxableFragment>(this);
+      return;
+    case FT_Org:
+      delete cast<MCOrgFragment>(this);
+      return;
+    case FT_Dwarf:
+      delete cast<MCDwarfLineAddrFragment>(this);
+      return;
+    case FT_DwarfFrame:
+      delete cast<MCDwarfCallFrameFragment>(this);
+      return;
+    case FT_LEB:
+      delete cast<MCLEBFragment>(this);
+      return;
+    case FT_SafeSEH:
+      delete cast<MCSafeSEHFragment>(this);
+      return;
+  }
 }
 
 /* *** */
@@ -345,16 +383,6 @@ bool MCAssembler::isThumbFunc(const MCSymbol *Symbol) const {
   return true;
 }
 
-void MCAssembler::addLocalUsedInReloc(const MCSymbol &Sym) {
-  assert(Sym.isTemporary());
-  LocalsUsedInReloc.insert(&Sym);
-}
-
-bool MCAssembler::isLocalUsedInReloc(const MCSymbol &Sym) const {
-  assert(Sym.isTemporary());
-  return LocalsUsedInReloc.count(&Sym);
-}
-
 bool MCAssembler::isSymbolLinkerVisible(const MCSymbol &Symbol) const {
   // Non-temporary labels should always be visible to the linker.
   if (!Symbol.isTemporary())
@@ -364,7 +392,7 @@ bool MCAssembler::isSymbolLinkerVisible(const MCSymbol &Symbol) const {
   if (!Symbol.isInSection())
     return false;
 
-  if (isLocalUsedInReloc(Symbol))
+  if (Symbol.isUsedInReloc())
     return true;
 
   return false;
@@ -464,9 +492,11 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout,
                                           const MCFragment &F) const {
   switch (F.getKind()) {
   case MCFragment::FT_Data:
+    return cast<MCDataFragment>(F).getContents().size();
   case MCFragment::FT_Relaxable:
+    return cast<MCRelaxableFragment>(F).getContents().size();
   case MCFragment::FT_CompactEncodedInst:
-    return cast<MCEncodedFragment>(F).getContents().size();
+    return cast<MCCompactEncodedInstFragment>(F).getContents().size();
   case MCFragment::FT_Fill:
     return cast<MCFillFragment>(F).getSize();
 
@@ -572,13 +602,6 @@ void MCAsmLayout::layoutFragment(MCFragment *F) {
   }
 }
 
-/// \brief Write the contents of a fragment to the given object writer. Expects
-///        a MCEncodedFragment.
-static void writeFragmentContents(const MCFragment &F, MCObjectWriter *OW) {
-  const MCEncodedFragment &EF = cast<MCEncodedFragment>(F);
-  OW->writeBytes(EF.getContents());
-}
-
 void MCAssembler::registerSymbol(const MCSymbol &Symbol, bool *Created) {
   bool New = !Symbol.isRegistered();
   if (Created)
@@ -681,17 +704,17 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout,
 
   case MCFragment::FT_Data: 
     ++stats::EmittedDataFragments;
-    writeFragmentContents(F, OW);
+    OW->writeBytes(cast<MCDataFragment>(F).getContents());
     break;
 
   case MCFragment::FT_Relaxable:
     ++stats::EmittedRelaxableFragments;
-    writeFragmentContents(F, OW);
+    OW->writeBytes(cast<MCRelaxableFragment>(F).getContents());
     break;
 
   case MCFragment::FT_CompactEncodedInst:
     ++stats::EmittedCompactEncodedInstFragments;
-    writeFragmentContents(F, OW);
+    OW->writeBytes(cast<MCCompactEncodedInstFragment>(F).getContents());
     break;
 
   case MCFragment::FT_Fill: {
@@ -880,18 +903,29 @@ void MCAssembler::Finish() {
   for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) {
     for (MCSection::iterator it2 = it->begin(), ie2 = it->end(); it2 != ie2;
          ++it2) {
-      MCEncodedFragmentWithFixups *F =
-        dyn_cast<MCEncodedFragmentWithFixups>(it2);
-      if (F) {
-        for (MCEncodedFragmentWithFixups::fixup_iterator it3 = F->fixup_begin(),
-             ie3 = F->fixup_end(); it3 != ie3; ++it3) {
-          MCFixup &Fixup = *it3;
-          uint64_t FixedValue;
-          bool IsPCRel;
-          std::tie(FixedValue, IsPCRel) = handleFixup(Layout, *F, Fixup);
-          getBackend().applyFixup(Fixup, F->getContents().data(),
-                                  F->getContents().size(), FixedValue, IsPCRel);
-        }
+      MCEncodedFragment *F = dyn_cast<MCEncodedFragment>(it2);
+      // Data and relaxable fragments both have fixups.  So only process
+      // those here.
+      // FIXME: Is there a better way to do this?  MCEncodedFragmentWithFixups
+      // being templated makes this tricky.
+      if (!F || isa<MCCompactEncodedInstFragment>(F))
+        continue;
+      ArrayRef<MCFixup> Fixups;
+      MutableArrayRef<char> Contents;
+      if (auto *FragWithFixups = dyn_cast<MCDataFragment>(F)) {
+        Fixups = FragWithFixups->getFixups();
+        Contents = FragWithFixups->getContents();
+      } else if (auto *FragWithFixups = dyn_cast<MCRelaxableFragment>(F)) {
+        Fixups = FragWithFixups->getFixups();
+        Contents = FragWithFixups->getContents();
+      } else
+        llvm_unreachable("Unknow fragment with fixups!");
+      for (const MCFixup &Fixup : Fixups) {
+        uint64_t FixedValue;
+        bool IsPCRel;
+        std::tie(FixedValue, IsPCRel) = handleFixup(Layout, *F, Fixup);
+        getBackend().applyFixup(Fixup, Contents.data(),
+                                Contents.size(), FixedValue, IsPCRel);
       }
     }
   }
@@ -1228,17 +1262,3 @@ void MCAssembler::dump() {
   OS << "]>\n";
 }
 #endif
-
-// anchors for MC*Fragment vtables
-void MCEncodedFragment::anchor() { }
-void MCEncodedFragmentWithFixups::anchor() { }
-void MCDataFragment::anchor() { }
-void MCCompactEncodedInstFragment::anchor() { }
-void MCRelaxableFragment::anchor() { }
-void MCAlignFragment::anchor() { }
-void MCFillFragment::anchor() { }
-void MCOrgFragment::anchor() { }
-void MCLEBFragment::anchor() { }
-void MCSafeSEHFragment::anchor() { }
-void MCDwarfLineAddrFragment::anchor() { }
-void MCDwarfCallFrameFragment::anchor() { }
diff --git a/contrib/llvm/lib/MC/MCContext.cpp b/contrib/llvm/lib/MC/MCContext.cpp
index 1e52eed..c601c56 100644
--- a/contrib/llvm/lib/MC/MCContext.cpp
+++ b/contrib/llvm/lib/MC/MCContext.cpp
@@ -135,7 +135,7 @@ MCSymbolELF *MCContext::getOrCreateSectionSymbol(const MCSectionELF &Section) {
   }
 
   auto NameIter = UsedNames.insert(std::make_pair(Name, true)).first;
-  Sym = new (*this) MCSymbolELF(&*NameIter, /*isTemporary*/ false);
+  Sym = new (&*NameIter, *this) MCSymbolELF(&*NameIter, /*isTemporary*/ false);
 
   if (!OldSym)
     OldSym = Sym;
@@ -164,25 +164,26 @@ MCSymbol *MCContext::createSymbolImpl(const StringMapEntry<bool> *Name,
   if (MOFI) {
     switch (MOFI->getObjectFileType()) {
     case MCObjectFileInfo::IsCOFF:
-      return new (*this) MCSymbolCOFF(Name, IsTemporary);
+      return new (Name, *this) MCSymbolCOFF(Name, IsTemporary);
     case MCObjectFileInfo::IsELF:
-      return new (*this) MCSymbolELF(Name, IsTemporary);
+      return new (Name, *this) MCSymbolELF(Name, IsTemporary);
     case MCObjectFileInfo::IsMachO:
-      return new (*this) MCSymbolMachO(Name, IsTemporary);
+      return new (Name, *this) MCSymbolMachO(Name, IsTemporary);
     }
   }
-  return new (*this) MCSymbol(MCSymbol::SymbolKindUnset, Name, IsTemporary);
+  return new (Name, *this) MCSymbol(MCSymbol::SymbolKindUnset, Name,
+                                    IsTemporary);
 }
 
 MCSymbol *MCContext::createSymbol(StringRef Name, bool AlwaysAddSuffix,
-                                  bool IsTemporary) {
-  if (IsTemporary && !UseNamesOnTempLabels)
+                                  bool CanBeUnnamed) {
+  if (CanBeUnnamed && !UseNamesOnTempLabels)
     return createSymbolImpl(nullptr, true);
 
   // Determine whether this is an user writter assembler temporary or normal
   // label, if used.
-  IsTemporary = false;
-  if (AllowTemporaryLabels)
+  bool IsTemporary = CanBeUnnamed;
+  if (AllowTemporaryLabels && !IsTemporary)
     IsTemporary = Name.startswith(MAI->getPrivateGlobalPrefix());
 
   SmallString<128> NewName = Name;
@@ -205,10 +206,11 @@ MCSymbol *MCContext::createSymbol(StringRef Name, bool AlwaysAddSuffix,
   llvm_unreachable("Infinite loop");
 }
 
-MCSymbol *MCContext::createTempSymbol(const Twine &Name, bool AlwaysAddSuffix) {
+MCSymbol *MCContext::createTempSymbol(const Twine &Name, bool AlwaysAddSuffix,
+                                      bool CanBeUnnamed) {
   SmallString<128> NameSV;
   raw_svector_ostream(NameSV) << MAI->getPrivateGlobalPrefix() << Name;
-  return createSymbol(NameSV, AlwaysAddSuffix, true);
+  return createSymbol(NameSV, AlwaysAddSuffix, CanBeUnnamed);
 }
 
 MCSymbol *MCContext::createLinkerPrivateTempSymbol() {
@@ -217,8 +219,8 @@ MCSymbol *MCContext::createLinkerPrivateTempSymbol() {
   return createSymbol(NameSV, true, false);
 }
 
-MCSymbol *MCContext::createTempSymbol() {
-  return createTempSymbol("tmp", true);
+MCSymbol *MCContext::createTempSymbol(bool CanBeUnnamed) {
+  return createTempSymbol("tmp", true, CanBeUnnamed);
 }
 
 unsigned MCContext::NextInstance(unsigned LocalLabelVal) {
@@ -239,7 +241,7 @@ MCSymbol *MCContext::getOrCreateDirectionalLocalSymbol(unsigned LocalLabelVal,
                                                        unsigned Instance) {
   MCSymbol *&Sym = LocalSymbols[std::make_pair(LocalLabelVal, Instance)];
   if (!Sym)
-    Sym = createTempSymbol();
+    Sym = createTempSymbol(false);
   return Sym;
 }
 
diff --git a/contrib/llvm/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp b/contrib/llvm/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp
index 68948d3..b9aebfc 100644
--- a/contrib/llvm/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp
+++ b/contrib/llvm/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp
@@ -193,4 +193,4 @@ MCSymbolizer *createMCSymbolizer(StringRef TT, LLVMOpInfoCallback GetOpInfo,
   return new MCExternalSymbolizer(*Ctx, std::move(RelInfo), GetOpInfo,
                                   SymbolLookUp, DisInfo);
 }
-}
+} // namespace llvm
diff --git a/contrib/llvm/lib/MC/MCDisassembler/MCRelocationInfo.cpp b/contrib/llvm/lib/MC/MCDisassembler/MCRelocationInfo.cpp
index ff0c27f..43005e7 100644
--- a/contrib/llvm/lib/MC/MCDisassembler/MCRelocationInfo.cpp
+++ b/contrib/llvm/lib/MC/MCDisassembler/MCRelocationInfo.cpp
@@ -34,6 +34,7 @@ MCRelocationInfo::createExprForCAPIVariantKind(const MCExpr *SubExpr,
   return SubExpr;
 }
 
-MCRelocationInfo *llvm::createMCRelocationInfo(StringRef TT, MCContext &Ctx) {
+MCRelocationInfo *llvm::createMCRelocationInfo(const Triple &TT,
+                                               MCContext &Ctx) {
   return new MCRelocationInfo(Ctx);
 }
diff --git a/contrib/llvm/lib/MC/MCDwarf.cpp b/contrib/llvm/lib/MC/MCDwarf.cpp
index 90f96e2..4ae2bcf 100644
--- a/contrib/llvm/lib/MC/MCDwarf.cpp
+++ b/contrib/llvm/lib/MC/MCDwarf.cpp
@@ -1461,7 +1461,7 @@ namespace {
     bool IsSignalFrame;
     bool IsSimple;
   };
-}
+} // namespace
 
 namespace llvm {
   template <>
@@ -1488,7 +1488,7 @@ namespace llvm {
         LHS.IsSimple == RHS.IsSimple;
     }
   };
-}
+} // namespace llvm
 
 void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB,
                                bool IsEH) {
@@ -1590,18 +1590,17 @@ void MCDwarfFrameEmitter::EncodeAdvanceLoc(MCContext &Context,
     OS << uint8_t(dwarf::DW_CFA_advance_loc1);
     OS << uint8_t(AddrDelta);
   } else if (isUInt<16>(AddrDelta)) {
-    // FIXME: check what is the correct behavior on a big endian machine.
     OS << uint8_t(dwarf::DW_CFA_advance_loc2);
-    OS << uint8_t( AddrDelta       & 0xff);
-    OS << uint8_t((AddrDelta >> 8) & 0xff);
+    if (Context.getAsmInfo()->isLittleEndian())
+      support::endian::Writer<support::little>(OS).write<uint16_t>(AddrDelta);
+    else
+      support::endian::Writer<support::big>(OS).write<uint16_t>(AddrDelta);
   } else {
-    // FIXME: check what is the correct behavior on a big endian machine.
     assert(isUInt<32>(AddrDelta));
     OS << uint8_t(dwarf::DW_CFA_advance_loc4);
-    OS << uint8_t( AddrDelta        & 0xff);
-    OS << uint8_t((AddrDelta >> 8)  & 0xff);
-    OS << uint8_t((AddrDelta >> 16) & 0xff);
-    OS << uint8_t((AddrDelta >> 24) & 0xff);
-
+    if (Context.getAsmInfo()->isLittleEndian())
+      support::endian::Writer<support::little>(OS).write<uint32_t>(AddrDelta);
+    else
+      support::endian::Writer<support::big>(OS).write<uint32_t>(AddrDelta);
   }
 }
diff --git a/contrib/llvm/lib/MC/MCELFStreamer.cpp b/contrib/llvm/lib/MC/MCELFStreamer.cpp
index e0f4a2a..fe9ac21 100644
--- a/contrib/llvm/lib/MC/MCELFStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCELFStreamer.cpp
@@ -45,7 +45,7 @@ MCELFStreamer::~MCELFStreamer() {
 }
 
 void MCELFStreamer::mergeFragment(MCDataFragment *DF,
-                                  MCEncodedFragmentWithFixups *EF) {
+                                  MCDataFragment *EF) {
   MCAssembler &Assembler = getAssembler();
 
   if (Assembler.isBundlingEnabled() && Assembler.getRelaxAll()) {
diff --git a/contrib/llvm/lib/MC/MCNullStreamer.cpp b/contrib/llvm/lib/MC/MCNullStreamer.cpp
index eb2d912..e0f610b 100644
--- a/contrib/llvm/lib/MC/MCNullStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCNullStreamer.cpp
@@ -36,7 +36,7 @@ namespace {
     void EmitGPRel32Value(const MCExpr *Value) override {}
   };
 
-}
+} // namespace
 
 MCStreamer *llvm::createNullStreamer(MCContext &Context) {
   return new MCNullStreamer(Context);
diff --git a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
index 83a08e2..aa3d965 100644
--- a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
+++ b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
@@ -238,6 +238,9 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(Triple T) {
   StackMapSection = Ctx->getMachOSection("__LLVM_STACKMAPS", "__llvm_stackmaps",
                                          0, SectionKind::getMetadata());
 
+  FaultMapSection = Ctx->getMachOSection("__LLVM_FAULTMAPS", "__llvm_faultmaps",
+                                         0, SectionKind::getMetadata());
+
   TLSExtraDataSection = TLSTLVSection;
 }
 
@@ -518,6 +521,9 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(Triple T) {
 
   StackMapSection =
       Ctx->getELFSection(".llvm_stackmaps", ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
+
+  FaultMapSection =
+      Ctx->getELFSection(".llvm_faultmaps", ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
 }
 
 void MCObjectFileInfo::initCOFFMCObjectFileInfo(Triple T) {
@@ -729,7 +735,8 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(Triple T) {
       SectionKind::getDataRel());
 }
 
-void MCObjectFileInfo::InitMCObjectFileInfo(StringRef T, Reloc::Model relocm,
+void MCObjectFileInfo::InitMCObjectFileInfo(const Triple &TheTriple,
+                                            Reloc::Model relocm,
                                             CodeModel::Model cm,
                                             MCContext &ctx) {
   RelocM = relocm;
@@ -753,7 +760,7 @@ void MCObjectFileInfo::InitMCObjectFileInfo(StringRef T, Reloc::Model relocm,
   DwarfAccelNamespaceSection = nullptr; // Used only by selected targets.
   DwarfAccelTypesSection = nullptr;     // Used only by selected targets.
 
-  TT = Triple(T);
+  TT = TheTriple;
 
   Triple::ArchType Arch = TT.getArch();
   // FIXME: Checking for Arch here to filter out bogus triples such as
@@ -777,6 +784,12 @@ void MCObjectFileInfo::InitMCObjectFileInfo(StringRef T, Reloc::Model relocm,
   }
 }
 
+void MCObjectFileInfo::InitMCObjectFileInfo(StringRef TT, Reloc::Model RM,
+                                            CodeModel::Model CM,
+                                            MCContext &ctx) {
+  InitMCObjectFileInfo(Triple(TT), RM, CM, ctx);
+}
+
 MCSection *MCObjectFileInfo::getDwarfTypesSection(uint64_t Hash) const {
   return Ctx->getELFSection(".debug_types", ELF::SHT_PROGBITS, ELF::SHF_GROUP,
                             0, utostr(Hash));
diff --git a/contrib/llvm/lib/MC/MCObjectStreamer.cpp b/contrib/llvm/lib/MC/MCObjectStreamer.cpp
index 6de02bc..a73c171 100644
--- a/contrib/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCObjectStreamer.cpp
@@ -54,21 +54,18 @@ void MCObjectStreamer::flushPendingLabels(MCFragment *F, uint64_t FOffset) {
   }
 }
 
-bool MCObjectStreamer::emitAbsoluteSymbolDiff(const MCSymbol *Hi,
+void MCObjectStreamer::emitAbsoluteSymbolDiff(const MCSymbol *Hi,
                                               const MCSymbol *Lo,
                                               unsigned Size) {
-  // Must both be assigned to the same (valid) fragment.
-  if (!Hi->getFragment() || Hi->getFragment() != Lo->getFragment())
-    return false;
-
-  // Must be a data fragment.
-  if (!isa<MCDataFragment>(Hi->getFragment()))
-    return false;
+  // If not assigned to the same (valid) fragment, fallback.
+  if (!Hi->getFragment() || Hi->getFragment() != Lo->getFragment()) {
+    MCStreamer::emitAbsoluteSymbolDiff(Hi, Lo, Size);
+    return;
+  }
 
   assert(Hi->getOffset() >= Lo->getOffset() &&
          "Expected Hi to be greater than Lo");
   EmitIntValue(Hi->getOffset() - Lo->getOffset(), Size);
-  return true;
 }
 
 void MCObjectStreamer::reset() {
diff --git a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
index 20366dc..9c1062f 100644
--- a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -484,7 +484,7 @@ private:
 
   void initializeDirectiveKindMap();
 };
-}
+} // namespace
 
 namespace llvm {
 
@@ -1306,8 +1306,10 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
     MCSymbol *Sym;
     if (LocalLabelVal == -1) {
       if (ParsingInlineAsm && SI) {
-        StringRef RewrittenLabel = SI->LookupInlineAsmLabel(IDVal, getSourceManager(), IDLoc, true);
-        assert(RewrittenLabel.size() && "We should have an internal name here.");
+        StringRef RewrittenLabel =
+            SI->LookupInlineAsmLabel(IDVal, getSourceManager(), IDLoc, true);
+        assert(RewrittenLabel.size() &&
+               "We should have an internal name here.");
         Info.AsmRewrites->push_back(AsmRewrite(AOK_Label, IDLoc,
                                                IDVal.size(), RewrittenLabel));
         IDVal = RewrittenLabel;
@@ -1942,7 +1944,7 @@ public:
 private:
   AsmLexer &Lexer;
 };
-}
+} // namespace
 
 bool AsmParser::parseMacroArgument(MCAsmMacroArgument &MA, bool Vararg) {
 
diff --git a/contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp
index f09bce0..1480f5b 100644
--- a/contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp
@@ -145,7 +145,7 @@ public:
   COFFAsmParser() {}
 };
 
-} // end annonomous namespace.
+} // namespace
 
 static SectionKind computeSectionKind(unsigned Flags) {
   if (Flags & COFF::IMAGE_SCN_MEM_EXECUTE)
diff --git a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index e3585bd..e131b23 100644
--- a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -154,7 +154,7 @@ private:
   unsigned parseSunStyleSectionFlags();
 };
 
-}
+} // namespace
 
 /// ParseDirectiveSymbolAttribute
 ///  ::= { ".local", ".weak", ... } [ identifier ( , identifier )* ]
diff --git a/contrib/llvm/lib/MC/MCStreamer.cpp b/contrib/llvm/lib/MC/MCStreamer.cpp
index 011969a..7fbbbd9 100644
--- a/contrib/llvm/lib/MC/MCStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCStreamer.cpp
@@ -15,6 +15,7 @@
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSection.h"
@@ -601,6 +602,11 @@ void MCStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
     TS->emitAssignment(Symbol, Value);
 }
 
+void MCTargetStreamer::prettyPrintAsm(MCInstPrinter &InstPrinter, raw_ostream &OS,
+                              const MCInst &Inst, const MCSubtargetInfo &STI) {
+  InstPrinter.printInst(&Inst, OS, "", STI);
+}
+
 void MCStreamer::visitUsedSymbol(const MCSymbol &Sym) {
 }
 
@@ -638,6 +644,25 @@ void MCStreamer::EmitInstruction(const MCInst &Inst,
       visitUsedExpr(*Inst.getOperand(i).getExpr());
 }
 
+void MCStreamer::emitAbsoluteSymbolDiff(const MCSymbol *Hi, const MCSymbol *Lo,
+                                        unsigned Size) {
+  // Get the Hi-Lo expression.
+  const MCExpr *Diff =
+      MCBinaryExpr::createSub(MCSymbolRefExpr::create(Hi, Context),
+                              MCSymbolRefExpr::create(Lo, Context), Context);
+
+  const MCAsmInfo *MAI = Context.getAsmInfo();
+  if (!MAI->doesSetDirectiveSuppressesReloc()) {
+    EmitValue(Diff, Size);
+    return;
+  }
+
+  // Otherwise, emit with .set (aka assignment).
+  MCSymbol *SetLabel = Context.createTempSymbol("set", true);
+  EmitAssignment(SetLabel, Diff);
+  EmitSymbolValue(SetLabel, Size);
+}
+
 void MCStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {}
 void MCStreamer::EmitThumbFunc(MCSymbol *Func) {}
 void MCStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {}
diff --git a/contrib/llvm/lib/MC/MCSubtargetInfo.cpp b/contrib/llvm/lib/MC/MCSubtargetInfo.cpp
index 7954a02..ece775c 100644
--- a/contrib/llvm/lib/MC/MCSubtargetInfo.cpp
+++ b/contrib/llvm/lib/MC/MCSubtargetInfo.cpp
@@ -34,17 +34,12 @@ MCSubtargetInfo::InitCPUSchedModel(StringRef CPU) {
     CPUSchedModel = MCSchedModel::GetDefaultSchedModel();
 }
 
-void
-MCSubtargetInfo::InitMCSubtargetInfo(StringRef TT, StringRef C, StringRef FS,
-                                     ArrayRef<SubtargetFeatureKV> PF,
-                                     ArrayRef<SubtargetFeatureKV> PD,
-                                     const SubtargetInfoKV *ProcSched,
-                                     const MCWriteProcResEntry *WPR,
-                                     const MCWriteLatencyEntry *WL,
-                                     const MCReadAdvanceEntry *RA,
-                                     const InstrStage *IS,
-                                     const unsigned *OC,
-                                     const unsigned *FP) {
+void MCSubtargetInfo::InitMCSubtargetInfo(
+    const Triple &TT, StringRef C, StringRef FS,
+    ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetFeatureKV> PD,
+    const SubtargetInfoKV *ProcSched, const MCWriteProcResEntry *WPR,
+    const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA,
+    const InstrStage *IS, const unsigned *OC, const unsigned *FP) {
   TargetTriple = TT;
   CPU = C;
   ProcFeatures = PF;
diff --git a/contrib/llvm/lib/MC/MCSymbol.cpp b/contrib/llvm/lib/MC/MCSymbol.cpp
index 8d07b76..4484221 100644
--- a/contrib/llvm/lib/MC/MCSymbol.cpp
+++ b/contrib/llvm/lib/MC/MCSymbol.cpp
@@ -9,6 +9,7 @@
 
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -18,6 +19,24 @@ using namespace llvm;
 // Sentinel value for the absolute pseudo section.
 MCSection *MCSymbol::AbsolutePseudoSection = reinterpret_cast<MCSection *>(1);
 
+void *MCSymbol::operator new(size_t s, const StringMapEntry<bool> *Name,
+                             MCContext &Ctx) {
+  // We may need more space for a Name to account for alignment.  So allocate
+  // space for the storage type and not the name pointer.
+  size_t Size = s + (Name ? sizeof(NameEntryStorageTy) : 0);
+
+  // For safety, ensure that the alignment of a pointer is enough for an
+  // MCSymbol.  This also ensures we don't need padding between the name and
+  // symbol.
+  static_assert((unsigned)AlignOf<MCSymbol>::Alignment <=
+                AlignOf<NameEntryStorageTy>::Alignment,
+                "Bad alignment of MCSymbol");
+  void *Storage = Ctx.allocate(Size, alignOf<NameEntryStorageTy>());
+  NameEntryStorageTy *Start = static_cast<NameEntryStorageTy*>(Storage);
+  NameEntryStorageTy *End = Start + (Name ? 1 : 0);
+  return End;
+}
+
 void MCSymbol::setVariableValue(const MCExpr *Value) {
   assert(!IsUsed && "Cannot set a variable that has already been used.");
   assert(Value && "Invalid variable value!");
diff --git a/contrib/llvm/lib/MC/MCSymbolELF.cpp b/contrib/llvm/lib/MC/MCSymbolELF.cpp
index c362065..6ec70ed 100644
--- a/contrib/llvm/lib/MC/MCSymbolELF.cpp
+++ b/contrib/llvm/lib/MC/MCSymbolELF.cpp
@@ -36,12 +36,9 @@ enum {
   ELF_WeakrefUsedInReloc_Shift = 11,
 
   // One bit.
-  ELF_UsedInReloc_Shift = 12,
-
-  // One bit.
-  ELF_BindingSet_Shift = 13
+  ELF_BindingSet_Shift = 12
 };
-}
+} // namespace
 
 void MCSymbolELF::setBinding(unsigned Binding) const {
   setIsBindingSet();
@@ -175,15 +172,6 @@ unsigned MCSymbolELF::getOther() const {
   return Other << 5;
 }
 
-void MCSymbolELF::setUsedInReloc() const {
-  uint32_t OtherFlags = getFlags() & ~(0x1 << ELF_UsedInReloc_Shift);
-  setFlags(OtherFlags | (1 << ELF_UsedInReloc_Shift));
-}
-
-bool MCSymbolELF::isUsedInReloc() const {
-  return getFlags() & (0x1 << ELF_UsedInReloc_Shift);
-}
-
 void MCSymbolELF::setIsWeakrefUsedInReloc() const {
   uint32_t OtherFlags = getFlags() & ~(0x1 << ELF_WeakrefUsedInReloc_Shift);
   setFlags(OtherFlags | (1 << ELF_WeakrefUsedInReloc_Shift));
@@ -210,4 +198,4 @@ void MCSymbolELF::setIsBindingSet() const {
 bool MCSymbolELF::isBindingSet() const {
   return getFlags() & (0x1 << ELF_BindingSet_Shift);
 }
-}
+} // namespace llvm
diff --git a/contrib/llvm/lib/MC/MCWin64EH.cpp b/contrib/llvm/lib/MC/MCWin64EH.cpp
index 1b73b7a..d8280c7 100644
--- a/contrib/llvm/lib/MC/MCWin64EH.cpp
+++ b/contrib/llvm/lib/MC/MCWin64EH.cpp
@@ -247,6 +247,6 @@ void UnwindEmitter::EmitUnwindInfo(MCStreamer &Streamer,
 
   llvm::EmitUnwindInfo(Streamer, info);
 }
-}
+} // namespace Win64EH
 } // End of namespace llvm
 
diff --git a/contrib/llvm/lib/MC/MCWinEH.cpp b/contrib/llvm/lib/MC/MCWinEH.cpp
index d5d9ead..9cf2edf 100644
--- a/contrib/llvm/lib/MC/MCWinEH.cpp
+++ b/contrib/llvm/lib/MC/MCWinEH.cpp
@@ -74,6 +74,6 @@ MCSection *UnwindEmitter::getXDataSection(const MCSymbol *Function,
   return getUnwindInfoSection(".xdata", XData, Function, Context);
 }
 
-}
-}
+} // namespace WinEH
+} // namespace llvm
 
diff --git a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
index 423c7dc..5bc1404 100644
--- a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
+++ b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
@@ -191,7 +191,7 @@ public:
 
   void writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
 };
-}
+} // namespace
 
 static inline void write_uint32_le(void *Data, uint32_t Value) {
   support::endian::write<uint32_t, support::little, support::unaligned>(Data,
@@ -526,13 +526,12 @@ bool WinCOFFObjectWriter::ExportSymbol(const MCSymbol &Symbol,
   if (!Symbol.isTemporary())
     return true;
 
-  // Absolute temporary labels are never visible.
-  if (!Symbol.isInSection())
+  // Temporary variable symbols are invisible.
+  if (Symbol.isVariable())
     return false;
 
-  // For now, all non-variable symbols are exported,
-  // the linker will sort the rest out for us.
-  return !Symbol.isVariable();
+  // Absolute temporary labels are never visible.
+  return !Symbol.isAbsolute();
 }
 
 bool WinCOFFObjectWriter::IsPhysicalSection(COFFSection *S) {
diff --git a/contrib/llvm/lib/MC/WinCOFFStreamer.cpp b/contrib/llvm/lib/MC/WinCOFFStreamer.cpp
index 41fc8e4..4ecdc3b 100644
--- a/contrib/llvm/lib/MC/WinCOFFStreamer.cpp
+++ b/contrib/llvm/lib/MC/WinCOFFStreamer.cpp
@@ -164,7 +164,8 @@ void MCWinCOFFStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) {
       Triple::x86)
     return;
 
-  if (cast<MCSymbolCOFF>(Symbol)->isSafeSEH())
+  const MCSymbolCOFF *CSymbol = cast<MCSymbolCOFF>(Symbol);
+  if (CSymbol->isSafeSEH())
     return;
 
   MCSection *SXData = getContext().getObjectFileInfo()->getSXDataSection();
@@ -175,7 +176,12 @@ void MCWinCOFFStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) {
   new MCSafeSEHFragment(Symbol, SXData);
 
   getAssembler().registerSymbol(*Symbol);
-  cast<MCSymbolCOFF>(Symbol)->setIsSafeSEH();
+  CSymbol->setIsSafeSEH();
+
+  // The Microsoft linker requires that the symbol type of a handler be
+  // function. Go ahead and oblige it here.
+  CSymbol->setType(COFF::IMAGE_SYM_DTYPE_FUNCTION
+                   << COFF::SCT_COMPLEX_TYPE_SHIFT);
 }
 
 void MCWinCOFFStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) {
@@ -285,5 +291,5 @@ LLVM_ATTRIBUTE_NORETURN
 void MCWinCOFFStreamer::FatalError(const Twine &Msg) const {
   getContext().reportFatalError(SMLoc(), Msg);
 }
-}
+} // namespace llvm
 
diff --git a/contrib/llvm/lib/Object/ArchiveWriter.cpp b/contrib/llvm/lib/Object/ArchiveWriter.cpp
index 90a736f..00a56d1 100644
--- a/contrib/llvm/lib/Object/ArchiveWriter.cpp
+++ b/contrib/llvm/lib/Object/ArchiveWriter.cpp
@@ -18,6 +18,8 @@
 #include "llvm/Object/Archive.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Object/SymbolicFile.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/Errc.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/Path.h"
@@ -70,7 +72,7 @@ NewArchiveIterator::getFD(sys::fs::file_status &NewStatus) const {
   // Linux cannot open directories with open(2), although
   // cygwin and *bsd can.
   if (NewStatus.type() == sys::fs::file_type::directory_file)
-    return make_error_code(std::errc::is_a_directory);
+    return make_error_code(errc::is_a_directory);
 
   return NewFD;
 }
@@ -82,9 +84,7 @@ static void printWithSpacePadding(raw_fd_ostream &OS, T Data, unsigned Size,
   OS << Data;
   unsigned SizeSoFar = OS.tell() - OldPos;
   if (Size > SizeSoFar) {
-    unsigned Remaining = Size - SizeSoFar;
-    for (unsigned I = 0; I < Remaining; ++I)
-      OS << ' ';
+    OS.indent(Size - SizeSoFar);
   } else if (Size < SizeSoFar) {
     assert(MayTruncate && "Data doesn't fit in Size");
     // Some of the data this is used for (like UID) can be larger than the
@@ -93,12 +93,8 @@ static void printWithSpacePadding(raw_fd_ostream &OS, T Data, unsigned Size,
   }
 }
 
-static void print32BE(raw_fd_ostream &Out, unsigned Val) {
-  // FIXME: Should use Endian.h here.
-  for (int I = 3; I >= 0; --I) {
-    char V = (Val >> (8 * I)) & 0xff;
-    Out << V;
-  }
+static void print32BE(raw_ostream &Out, uint32_t Val) {
+  support::endian::Writer<support::big>(Out).write(Val);
 }
 
 static void printRestOfMemberHeader(raw_fd_ostream &Out,
diff --git a/contrib/llvm/lib/Object/COFFObjectFile.cpp b/contrib/llvm/lib/Object/COFFObjectFile.cpp
index 1055b98..e2f559e 100644
--- a/contrib/llvm/lib/Object/COFFObjectFile.cpp
+++ b/contrib/llvm/lib/Object/COFFObjectFile.cpp
@@ -991,19 +991,6 @@ symbol_iterator COFFObjectFile::getRelocationSymbol(DataRefImpl Rel) const {
   return symbol_iterator(SymbolRef(Ref, this));
 }
 
-section_iterator COFFObjectFile::getRelocationSection(DataRefImpl Rel) const {
-  symbol_iterator Sym = getRelocationSymbol(Rel);
-  if (Sym == symbol_end())
-    return section_end();
-  COFFSymbolRef Symb = getCOFFSymbol(*Sym);
-  if (!Symb.isSection())
-    return section_end();
-  section_iterator Res(section_end());
-  if (getSymbolSection(Sym->getRawDataRefImpl(),Res))
-    return section_end();
-  return Res;
-}
-
 std::error_code COFFObjectFile::getRelocationType(DataRefImpl Rel,
                                                   uint64_t &Res) const {
   const coff_relocation* R = toRel(Rel);
diff --git a/contrib/llvm/lib/Object/COFFYAML.cpp b/contrib/llvm/lib/Object/COFFYAML.cpp
index 9a24b53..dda4b7f 100644
--- a/contrib/llvm/lib/Object/COFFYAML.cpp
+++ b/contrib/llvm/lib/Object/COFFYAML.cpp
@@ -335,7 +335,7 @@ struct NDLLCharacteristics {
   COFF::DLLCharacteristics Characteristics;
 };
 
-}
+} // namespace
 
 void MappingTraits<COFFYAML::Relocation>::mapping(IO &IO,
                                                   COFFYAML::Relocation &Rel) {
@@ -497,5 +497,5 @@ void MappingTraits<COFFYAML::Object>::mapping(IO &IO, COFFYAML::Object &Obj) {
   IO.mapRequired("symbols", Obj.Symbols);
 }
 
-}
-}
+} // namespace yaml
+} // namespace llvm
diff --git a/contrib/llvm/lib/Object/ELFYAML.cpp b/contrib/llvm/lib/Object/ELFYAML.cpp
index 78087d6..50730a9 100644
--- a/contrib/llvm/lib/Object/ELFYAML.cpp
+++ b/contrib/llvm/lib/Object/ELFYAML.cpp
@@ -44,7 +44,7 @@ ScalarEnumerationTraits<ELFYAML::ELF_EM>::enumeration(IO &IO,
   ECase(EM_386)
   ECase(EM_68K)
   ECase(EM_88K)
-  ECase(EM_486)
+  ECase(EM_IAMCU)
   ECase(EM_860)
   ECase(EM_MIPS)
   ECase(EM_S370)
@@ -590,7 +590,7 @@ struct NormalizedOther {
   ELFYAML::ELF_STV Visibility;
   ELFYAML::ELF_STO Other;
 };
-}
+} // namespace
 
 void MappingTraits<ELFYAML::Symbol>::mapping(IO &IO, ELFYAML::Symbol &Symbol) {
   IO.mapOptional("Name", Symbol.Name, StringRef());
@@ -723,7 +723,7 @@ struct NormalizedMips64RelType {
   ELFYAML::ELF_REL Type3;
   ELFYAML::ELF_RSS SpecSym;
 };
-}
+} // namespace
 
 void MappingTraits<ELFYAML::Relocation>::mapping(IO &IO,
                                                  ELFYAML::Relocation &Rel) {
diff --git a/contrib/llvm/lib/Object/IRObjectFile.cpp b/contrib/llvm/lib/Object/IRObjectFile.cpp
index e89cb8e..e90e08d 100644
--- a/contrib/llvm/lib/Object/IRObjectFile.cpp
+++ b/contrib/llvm/lib/Object/IRObjectFile.cpp
@@ -45,22 +45,22 @@ IRObjectFile::IRObjectFile(MemoryBufferRef Object, std::unique_ptr<Module> Mod)
   if (InlineAsm.empty())
     return;
 
-  StringRef Triple = M->getTargetTriple();
+  Triple TT(M->getTargetTriple());
   std::string Err;
-  const Target *T = TargetRegistry::lookupTarget(Triple, Err);
+  const Target *T = TargetRegistry::lookupTarget(TT.str(), Err);
   if (!T)
     return;
 
-  std::unique_ptr<MCRegisterInfo> MRI(T->createMCRegInfo(Triple));
+  std::unique_ptr<MCRegisterInfo> MRI(T->createMCRegInfo(TT.str()));
   if (!MRI)
     return;
 
-  std::unique_ptr<MCAsmInfo> MAI(T->createMCAsmInfo(*MRI, Triple));
+  std::unique_ptr<MCAsmInfo> MAI(T->createMCAsmInfo(*MRI, TT.str()));
   if (!MAI)
     return;
 
   std::unique_ptr<MCSubtargetInfo> STI(
-      T->createMCSubtargetInfo(Triple, "", ""));
+      T->createMCSubtargetInfo(TT.str(), "", ""));
   if (!STI)
     return;
 
@@ -70,7 +70,7 @@ IRObjectFile::IRObjectFile(MemoryBufferRef Object, std::unique_ptr<Module> Mod)
 
   MCObjectFileInfo MOFI;
   MCContext MCCtx(MAI.get(), MRI.get(), &MOFI);
-  MOFI.InitMCObjectFileInfo(Triple, Reloc::Default, CodeModel::Default, MCCtx);
+  MOFI.InitMCObjectFileInfo(TT, Reloc::Default, CodeModel::Default, MCCtx);
   std::unique_ptr<RecordStreamer> Streamer(new RecordStreamer(MCCtx));
   T->createNullTargetStreamer(*Streamer);
 
@@ -198,6 +198,9 @@ std::error_code IRObjectFile::printSymbolName(raw_ostream &OS,
     return std::error_code();
   }
 
+  if (GV->hasDLLImportStorageClass())
+    OS << "__imp_";
+
   if (Mang)
     Mang->getNameWithPrefix(OS, GV, false);
   else
@@ -301,12 +304,12 @@ llvm::object::IRObjectFile::create(MemoryBufferRef Object,
   std::unique_ptr<MemoryBuffer> Buff(
       MemoryBuffer::getMemBuffer(BCOrErr.get(), false));
 
-  ErrorOr<Module *> MOrErr =
+  ErrorOr<std::unique_ptr<Module>> MOrErr =
       getLazyBitcodeModule(std::move(Buff), Context, nullptr,
                            /*ShouldLazyLoadMetadata*/ true);
   if (std::error_code EC = MOrErr.getError())
     return EC;
 
-  std::unique_ptr<Module> M(MOrErr.get());
+  std::unique_ptr<Module> &M = MOrErr.get();
   return llvm::make_unique<IRObjectFile>(Object, std::move(M));
 }
diff --git a/contrib/llvm/lib/Object/MachOObjectFile.cpp b/contrib/llvm/lib/Object/MachOObjectFile.cpp
index d02ca48a..f76dd0d 100644
--- a/contrib/llvm/lib/Object/MachOObjectFile.cpp
+++ b/contrib/llvm/lib/Object/MachOObjectFile.cpp
@@ -1232,6 +1232,7 @@ bool MachOObjectFile::isValidArch(StringRef ArchFlag) {
       .Case("armv5e", true)
       .Case("armv6", true)
       .Case("armv6m", true)
+      .Case("armv7", true)
       .Case("armv7em", true)
       .Case("armv7k", true)
       .Case("armv7m", true)
@@ -2011,9 +2012,11 @@ MachOObjectFile::getAnyRelocationSection(
                                    const MachO::any_relocation_info &RE) const {
   if (isRelocationScattered(RE) || getPlainRelocationExternal(RE))
     return *section_end();
-  unsigned SecNum = getPlainRelocationSymbolNum(RE) - 1;
+  unsigned SecNum = getPlainRelocationSymbolNum(RE);
+  if (SecNum == MachO::R_ABS || SecNum > Sections.size())
+    return *section_end();
   DataRefImpl DRI;
-  DRI.d.a = SecNum;
+  DRI.d.a = SecNum - 1;
   return SectionRef(DRI, this);
 }
 
diff --git a/contrib/llvm/lib/Object/RecordStreamer.h b/contrib/llvm/lib/Object/RecordStreamer.h
index d861061..d694a9f 100644
--- a/contrib/llvm/lib/Object/RecordStreamer.h
+++ b/contrib/llvm/lib/Object/RecordStreamer.h
@@ -38,5 +38,5 @@ public:
   void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                         unsigned ByteAlignment) override;
 };
-}
+} // namespace llvm
 #endif
diff --git a/contrib/llvm/lib/Option/OptTable.cpp b/contrib/llvm/lib/Option/OptTable.cpp
index 96ba183..c37f193 100644
--- a/contrib/llvm/lib/Option/OptTable.cpp
+++ b/contrib/llvm/lib/Option/OptTable.cpp
@@ -79,8 +79,8 @@ static inline bool operator<(const OptTable::Info &A, const OptTable::Info &B) {
 static inline bool operator<(const OptTable::Info &I, const char *Name) {
   return StrCmpOptionNameIgnoreCase(I.Name, Name) < 0;
 }
-}
-}
+} // namespace opt
+} // namespace llvm
 
 OptSpecifier::OptSpecifier(const Option *Opt) : ID(Opt->getID()) {}
 
diff --git a/contrib/llvm/lib/ProfileData/CoverageMapping.cpp b/contrib/llvm/lib/ProfileData/CoverageMapping.cpp
index bbac5c2..b6c2489 100644
--- a/contrib/llvm/lib/ProfileData/CoverageMapping.cpp
+++ b/contrib/llvm/lib/ProfileData/CoverageMapping.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ProfileData/CoverageMappingReader.h"
 #include "llvm/ProfileData/InstrProfReader.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Errc.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Path.h"
@@ -154,11 +155,11 @@ ErrorOr<int64_t> CounterMappingContext::evaluate(const Counter &C) const {
     return 0;
   case Counter::CounterValueReference:
     if (C.getCounterID() >= CounterValues.size())
-      return std::make_error_code(std::errc::argument_out_of_domain);
+      return make_error_code(errc::argument_out_of_domain);
     return CounterValues[C.getCounterID()];
   case Counter::Expression: {
     if (C.getExpressionID() >= Expressions.size())
-      return std::make_error_code(std::errc::argument_out_of_domain);
+      return make_error_code(errc::argument_out_of_domain);
     const auto &E = Expressions[C.getExpressionID()];
     ErrorOr<int64_t> LHS = evaluate(E.LHS);
     if (!LHS)
@@ -349,7 +350,7 @@ public:
     return Segments;
   }
 };
-}
+} // namespace
 
 std::vector<StringRef> CoverageMapping::getUniqueSourceFiles() const {
   std::vector<StringRef> Filenames;
@@ -520,7 +521,7 @@ class CoverageMappingErrorCategoryType : public std::error_category {
     llvm_unreachable("A value of coveragemap_error has no message.");
   }
 };
-}
+} // namespace
 
 static ManagedStatic<CoverageMappingErrorCategoryType> ErrorCategory;
 
diff --git a/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp b/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp
index ec531c3..32de0ba 100644
--- a/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp
+++ b/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp
@@ -315,7 +315,7 @@ struct SectionData {
     return std::error_code();
   }
 };
-}
+} // namespace
 
 template <typename T, support::endianness Endian>
 std::error_code readCoverageMappingData(
diff --git a/contrib/llvm/lib/ProfileData/CoverageMappingWriter.cpp b/contrib/llvm/lib/ProfileData/CoverageMappingWriter.cpp
index d90d2f5..128003c 100644
--- a/contrib/llvm/lib/ProfileData/CoverageMappingWriter.cpp
+++ b/contrib/llvm/lib/ProfileData/CoverageMappingWriter.cpp
@@ -74,7 +74,7 @@ public:
     return C;
   }
 };
-}
+} // namespace
 
 /// \brief Encode the counter.
 ///
diff --git a/contrib/llvm/lib/ProfileData/InstrProf.cpp b/contrib/llvm/lib/ProfileData/InstrProf.cpp
index 92822a7..805d6d1 100644
--- a/contrib/llvm/lib/ProfileData/InstrProf.cpp
+++ b/contrib/llvm/lib/ProfileData/InstrProf.cpp
@@ -54,7 +54,7 @@ class InstrProfErrorCategoryType : public std::error_category {
     llvm_unreachable("A value of instrprof_error has no message.");
   }
 };
-}
+} // namespace
 
 static ManagedStatic<InstrProfErrorCategoryType> ErrorCategory;
 
diff --git a/contrib/llvm/lib/ProfileData/InstrProfIndexed.h b/contrib/llvm/lib/ProfileData/InstrProfIndexed.h
index ebca7b2..afd8cfb 100644
--- a/contrib/llvm/lib/ProfileData/InstrProfIndexed.h
+++ b/contrib/llvm/lib/ProfileData/InstrProfIndexed.h
@@ -49,7 +49,7 @@ static inline uint64_t ComputeHash(HashT Type, StringRef K) {
 const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81"
 const uint64_t Version = 2;
 const HashT HashType = HashT::MD5;
-}
+} // namespace IndexedInstrProf
 
 } // end namespace llvm
 
diff --git a/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp b/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp
index 2188543..efac292 100644
--- a/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -69,7 +69,7 @@ public:
     }
   }
 };
-}
+} // namespace
 
 std::error_code
 InstrProfWriter::addFunctionCounts(StringRef FunctionName,
diff --git a/contrib/llvm/lib/ProfileData/SampleProf.cpp b/contrib/llvm/lib/ProfileData/SampleProf.cpp
index 920c48a..e2894c6 100644
--- a/contrib/llvm/lib/ProfileData/SampleProf.cpp
+++ b/contrib/llvm/lib/ProfileData/SampleProf.cpp
@@ -42,7 +42,7 @@ class SampleProfErrorCategoryType : public std::error_category {
     llvm_unreachable("A value of sampleprof_error has no message.");
   }
 };
-}
+} // namespace
 
 static ManagedStatic<SampleProfErrorCategoryType> ErrorCategory;
 
diff --git a/contrib/llvm/lib/Support/APFloat.cpp b/contrib/llvm/lib/Support/APFloat.cpp
index 4b0a0e5..48830e8 100644
--- a/contrib/llvm/lib/Support/APFloat.cpp
+++ b/contrib/llvm/lib/Support/APFloat.cpp
@@ -90,7 +90,7 @@ namespace llvm {
   const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
   const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
                                                 / (351 * integerPartWidth));
-}
+} // namespace llvm
 
 /* A bunch of private, handy routines.  */
 
@@ -3539,7 +3539,7 @@ namespace {
     exp += FirstSignificant;
     buffer.erase(&buffer[0], &buffer[FirstSignificant]);
   }
-}
+} // namespace
 
 void APFloat::toString(SmallVectorImpl<char> &Str,
                        unsigned FormatPrecision,
diff --git a/contrib/llvm/lib/Support/APInt.cpp b/contrib/llvm/lib/Support/APInt.cpp
index 23f89bb..aa026d4 100644
--- a/contrib/llvm/lib/Support/APInt.cpp
+++ b/contrib/llvm/lib/Support/APInt.cpp
@@ -2331,7 +2331,7 @@ namespace {
   {
     return findFirstSet(value, ZB_Max);
   }
-}
+} // namespace
 
 /* Sets the least significant part of a bignum to the input value, and
    zeroes out higher parts.  */
diff --git a/contrib/llvm/lib/Support/ARMBuildAttrs.cpp b/contrib/llvm/lib/Support/ARMBuildAttrs.cpp
index 960a0f1..9c8bb15 100644
--- a/contrib/llvm/lib/Support/ARMBuildAttrs.cpp
+++ b/contrib/llvm/lib/Support/ARMBuildAttrs.cpp
@@ -66,7 +66,7 @@ const struct {
   { ARMBuildAttrs::ABI_align_needed, "Tag_ABI_align8_needed" },
   { ARMBuildAttrs::ABI_align_preserved, "Tag_ABI_align8_preserved" },
 };
-}
+} // namespace
 
 namespace llvm {
 namespace ARMBuildAttrs {
@@ -90,6 +90,6 @@ int AttrTypeFromString(StringRef Tag) {
       return ARMAttributeTags[TI].Attr;
   return -1;
 }
-}
-}
+} // namespace ARMBuildAttrs
+} // namespace llvm
 
diff --git a/contrib/llvm/lib/Support/ARMWinEH.cpp b/contrib/llvm/lib/Support/ARMWinEH.cpp
index 03c150f..8d21ca5 100644
--- a/contrib/llvm/lib/Support/ARMWinEH.cpp
+++ b/contrib/llvm/lib/Support/ARMWinEH.cpp
@@ -32,7 +32,7 @@ std::pair<uint16_t, uint32_t> SavedRegisterMask(const RuntimeFunction &RF) {
 
   return std::make_pair(GPRMask, VFPMask);
 }
-}
-}
-}
+} // namespace WinEH
+} // namespace ARM
+} // namespace llvm
 
diff --git a/contrib/llvm/lib/Support/Allocator.cpp b/contrib/llvm/lib/Support/Allocator.cpp
index f48edac..021037a 100644
--- a/contrib/llvm/lib/Support/Allocator.cpp
+++ b/contrib/llvm/lib/Support/Allocator.cpp
@@ -37,4 +37,4 @@ void PrintRecyclerStats(size_t Size,
          << "Number of elements free for recycling: " << FreeListSize << '\n';
 }
 
-}
+} // namespace llvm
diff --git a/contrib/llvm/lib/Support/CommandLine.cpp b/contrib/llvm/lib/Support/CommandLine.cpp
index 3cabc54..3638f0d 100644
--- a/contrib/llvm/lib/Support/CommandLine.cpp
+++ b/contrib/llvm/lib/Support/CommandLine.cpp
@@ -32,6 +32,7 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/StringSaver.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cstdlib>
 #include <map>
@@ -60,8 +61,8 @@ TEMPLATE_INSTANTIATION(class opt<int>);
 TEMPLATE_INSTANTIATION(class opt<std::string>);
 TEMPLATE_INSTANTIATION(class opt<char>);
 TEMPLATE_INSTANTIATION(class opt<bool>);
-}
-} // end namespace llvm::cl
+} // namespace cl
+} // namespace llvm
 
 // Pin the vtables to this file.
 void GenericOptionValue::anchor() {}
@@ -78,7 +79,6 @@ void parser<double>::anchor() {}
 void parser<float>::anchor() {}
 void parser<std::string>::anchor() {}
 void parser<char>::anchor() {}
-void StringSaver::anchor() {}
 
 //===----------------------------------------------------------------------===//
 
@@ -564,7 +564,7 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
     // End the token if this is whitespace.
     if (isWhitespace(Src[I])) {
       if (!Token.empty())
-        NewArgv.push_back(Saver.SaveString(Token.c_str()));
+        NewArgv.push_back(Saver.save(Token.c_str()));
       Token.clear();
       continue;
     }
@@ -575,7 +575,7 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
 
   // Append the last token after hitting EOF with no whitespace.
   if (!Token.empty())
-    NewArgv.push_back(Saver.SaveString(Token.c_str()));
+    NewArgv.push_back(Saver.save(Token.c_str()));
   // Mark the end of response files
   if (MarkEOLs)
     NewArgv.push_back(nullptr);
@@ -656,7 +656,7 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver,
     if (State == UNQUOTED) {
       // Whitespace means the end of the token.
       if (isWhitespace(Src[I])) {
-        NewArgv.push_back(Saver.SaveString(Token.c_str()));
+        NewArgv.push_back(Saver.save(Token.c_str()));
         Token.clear();
         State = INIT;
         // Mark the end of lines in response files
@@ -691,7 +691,7 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver,
   }
   // Append the last token after hitting EOF with no whitespace.
   if (!Token.empty())
-    NewArgv.push_back(Saver.SaveString(Token.c_str()));
+    NewArgv.push_back(Saver.save(Token.c_str()));
   // Mark the end of response files
   if (MarkEOLs)
     NewArgv.push_back(nullptr);
@@ -779,26 +779,6 @@ bool cl::ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer,
   return AllExpanded;
 }
 
-namespace {
-class StrDupSaver : public StringSaver {
-  std::vector<char *> Dups;
-
-public:
-  ~StrDupSaver() override {
-    for (std::vector<char *>::iterator I = Dups.begin(), E = Dups.end(); I != E;
-         ++I) {
-      char *Dup = *I;
-      free(Dup);
-    }
-  }
-  const char *SaveString(const char *Str) override {
-    char *Dup = strdup(Str);
-    Dups.push_back(Dup);
-    return Dup;
-  }
-};
-}
-
 /// ParseEnvironmentOptions - An alternative entry point to the
 /// CommandLine library, which allows you to read the program's name
 /// from the caller (as PROGNAME) and its command-line arguments from
@@ -818,8 +798,9 @@ void cl::ParseEnvironmentOptions(const char *progName, const char *envVar,
   // Get program's "name", which we wouldn't know without the caller
   // telling us.
   SmallVector<const char *, 20> newArgv;
-  StrDupSaver Saver;
-  newArgv.push_back(Saver.SaveString(progName));
+  BumpPtrAllocator A;
+  BumpPtrStringSaver Saver(A);
+  newArgv.push_back(Saver.save(progName));
 
   // Parse the value of the environment variable into a "command line"
   // and hand it off to ParseCommandLineOptions().
@@ -840,7 +821,8 @@ void CommandLineParser::ParseCommandLineOptions(int argc,
 
   // Expand response files.
   SmallVector<const char *, 20> newArgv(argv, argv + argc);
-  StrDupSaver Saver;
+  BumpPtrAllocator A;
+  BumpPtrStringSaver Saver(A);
   ExpandResponseFiles(Saver, TokenizeGNUCommandLine, newArgv);
   argv = &newArgv[0];
   argc = static_cast<int>(newArgv.size());
diff --git a/contrib/llvm/lib/Support/CrashRecoveryContext.cpp b/contrib/llvm/lib/Support/CrashRecoveryContext.cpp
index aba0f1d..929f5da 100644
--- a/contrib/llvm/lib/Support/CrashRecoveryContext.cpp
+++ b/contrib/llvm/lib/Support/CrashRecoveryContext.cpp
@@ -60,7 +60,7 @@ public:
   }
 };
 
-}
+} // namespace
 
 static ManagedStatic<sys::Mutex> gCrashRecoveryContextMutex;
 static bool gCrashRecoveryEnabled = false;
diff --git a/contrib/llvm/lib/Support/DAGDeltaAlgorithm.cpp b/contrib/llvm/lib/Support/DAGDeltaAlgorithm.cpp
index f1a334b..0f44780 100644
--- a/contrib/llvm/lib/Support/DAGDeltaAlgorithm.cpp
+++ b/contrib/llvm/lib/Support/DAGDeltaAlgorithm.cpp
@@ -175,7 +175,7 @@ public:
       : DDAI(DDAI), Required(Required) {}
 };
 
-}
+} // namespace
 
 DAGDeltaAlgorithmImpl::DAGDeltaAlgorithmImpl(
     DAGDeltaAlgorithm &DDA, const changeset_ty &Changes,
diff --git a/contrib/llvm/lib/Support/DataStream.cpp b/contrib/llvm/lib/Support/DataStream.cpp
index c243155..ad05494 100644
--- a/contrib/llvm/lib/Support/DataStream.cpp
+++ b/contrib/llvm/lib/Support/DataStream.cpp
@@ -16,6 +16,7 @@
 
 #include "llvm/Support/DataStream.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Program.h"
 #include <string>
@@ -71,18 +72,15 @@ public:
   }
 };
 
-}
+} // namespace
 
-namespace llvm {
-DataStreamer *getDataFileStreamer(const std::string &Filename,
-                                  std::string *StrError) {
-  DataFileStreamer *s = new DataFileStreamer();
+std::unique_ptr<DataStreamer>
+llvm::getDataFileStreamer(const std::string &Filename, std::string *StrError) {
+  std::unique_ptr<DataFileStreamer> s = make_unique<DataFileStreamer>();
   if (std::error_code e = s->OpenFile(Filename)) {
     *StrError = std::string("Could not open ") + Filename + ": " +
         e.message() + "\n";
     return nullptr;
   }
-  return s;
-}
-
+  return std::move(s);
 }
diff --git a/contrib/llvm/lib/Support/Debug.cpp b/contrib/llvm/lib/Support/Debug.cpp
index 47751fc..2052662 100644
--- a/contrib/llvm/lib/Support/Debug.cpp
+++ b/contrib/llvm/lib/Support/Debug.cpp
@@ -99,7 +99,7 @@ struct DebugOnlyOpt {
   }
 };
 
-}
+} // namespace
 
 static DebugOnlyOpt DebugOnlyOptLoc;
 
diff --git a/contrib/llvm/lib/Support/FileOutputBuffer.cpp b/contrib/llvm/lib/Support/FileOutputBuffer.cpp
index 307ff09..6f064c9 100644
--- a/contrib/llvm/lib/Support/FileOutputBuffer.cpp
+++ b/contrib/llvm/lib/Support/FileOutputBuffer.cpp
@@ -109,4 +109,4 @@ std::error_code FileOutputBuffer::commit() {
   // Rename file to final name.
   return sys::fs::rename(Twine(TempPath), Twine(FinalPath));
 }
-} // namespace
+} // namespace llvm
diff --git a/contrib/llvm/lib/Support/Locale.cpp b/contrib/llvm/lib/Support/Locale.cpp
index 35ddf7f..d5cb72b 100644
--- a/contrib/llvm/lib/Support/Locale.cpp
+++ b/contrib/llvm/lib/Support/Locale.cpp
@@ -15,7 +15,7 @@ int columnWidth(StringRef Text) {
 
 bool isPrint(int UCS) {
 #if LLVM_ON_WIN32
-  // Restrict characters that we'll try to print to the the lower part of ASCII
+  // Restrict characters that we'll try to print to the lower part of ASCII
   // except for the control characters (0x20 - 0x7E). In general one can not
   // reliably output code points U+0080 and higher using narrow character C/C++
   // output functions in Windows, because the meaning of the upper 128 codes is
diff --git a/contrib/llvm/lib/Support/MD5.cpp b/contrib/llvm/lib/Support/MD5.cpp
index ceab580..6ed81fb 100644
--- a/contrib/llvm/lib/Support/MD5.cpp
+++ b/contrib/llvm/lib/Support/MD5.cpp
@@ -283,4 +283,4 @@ void MD5::stringifyResult(MD5Result &Result, SmallString<32> &Str) {
     Res << format("%.2x", Result[i]);
 }
 
-}
+} // namespace llvm
diff --git a/contrib/llvm/lib/Support/MathExtras.cpp b/contrib/llvm/lib/Support/MathExtras.cpp
index ba09245..9265a43 100644
--- a/contrib/llvm/lib/Support/MathExtras.cpp
+++ b/contrib/llvm/lib/Support/MathExtras.cpp
@@ -29,4 +29,4 @@ namespace llvm {
   const float huge_valf = HUGE_VALF;
 #endif
 
-}
+} // namespace llvm
diff --git a/contrib/llvm/lib/Support/MemoryBuffer.cpp b/contrib/llvm/lib/Support/MemoryBuffer.cpp
index 98862e9..1d69b96 100644
--- a/contrib/llvm/lib/Support/MemoryBuffer.cpp
+++ b/contrib/llvm/lib/Support/MemoryBuffer.cpp
@@ -94,7 +94,7 @@ public:
     return MemoryBuffer_Malloc;
   }
 };
-}
+} // namespace
 
 static ErrorOr<std::unique_ptr<MemoryBuffer>>
 getFileAux(const Twine &Filename, int64_t FileSize, uint64_t MapSize, 
@@ -220,7 +220,7 @@ public:
     return MemoryBuffer_MMap;
   }
 };
-}
+} // namespace
 
 static ErrorOr<std::unique_ptr<MemoryBuffer>>
 getMemoryBufferForStream(int FD, const Twine &BufferName) {
diff --git a/contrib/llvm/lib/Support/Mutex.cpp b/contrib/llvm/lib/Support/Mutex.cpp
index c8d3844..42867c9 100644
--- a/contrib/llvm/lib/Support/Mutex.cpp
+++ b/contrib/llvm/lib/Support/Mutex.cpp
@@ -110,7 +110,7 @@ MutexImpl::tryacquire()
   return errorcode == 0;
 }
 
-}
+} // namespace llvm
 
 #elif defined(LLVM_ON_UNIX)
 #include "Unix/Mutex.inc"
diff --git a/contrib/llvm/lib/Support/RWMutex.cpp b/contrib/llvm/lib/Support/RWMutex.cpp
index 3b6309c..21ba5a4 100644
--- a/contrib/llvm/lib/Support/RWMutex.cpp
+++ b/contrib/llvm/lib/Support/RWMutex.cpp
@@ -113,7 +113,7 @@ RWMutexImpl::writer_release()
   return errorcode == 0;
 }
 
-}
+} // namespace llvm
 
 #elif defined(LLVM_ON_UNIX)
 #include "Unix/RWMutex.inc"
diff --git a/contrib/llvm/lib/Support/SourceMgr.cpp b/contrib/llvm/lib/Support/SourceMgr.cpp
index d5e3157..6d44a4d 100644
--- a/contrib/llvm/lib/Support/SourceMgr.cpp
+++ b/contrib/llvm/lib/Support/SourceMgr.cpp
@@ -332,8 +332,8 @@ static bool isNonASCII(char c) {
   return c & 0x80;
 }
 
-void SMDiagnostic::print(const char *ProgName, raw_ostream &S,
-                         bool ShowColors) const {
+void SMDiagnostic::print(const char *ProgName, raw_ostream &S, bool ShowColors,
+                         bool ShowKindLabel) const {
   // Display colors only if OS supports colors.
   ShowColors &= S.has_colors();
 
@@ -357,27 +357,29 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S,
     S << ": ";
   }
 
-  switch (Kind) {
-  case SourceMgr::DK_Error:
-    if (ShowColors)
-      S.changeColor(raw_ostream::RED, true);
-    S << "error: ";
-    break;
-  case SourceMgr::DK_Warning:
-    if (ShowColors)
-      S.changeColor(raw_ostream::MAGENTA, true);
-    S << "warning: ";
-    break;
-  case SourceMgr::DK_Note:
-    if (ShowColors)
-      S.changeColor(raw_ostream::BLACK, true);
-    S << "note: ";
-    break;
-  }
+  if (ShowKindLabel) {
+    switch (Kind) {
+    case SourceMgr::DK_Error:
+      if (ShowColors)
+        S.changeColor(raw_ostream::RED, true);
+      S << "error: ";
+      break;
+    case SourceMgr::DK_Warning:
+      if (ShowColors)
+        S.changeColor(raw_ostream::MAGENTA, true);
+      S << "warning: ";
+      break;
+    case SourceMgr::DK_Note:
+      if (ShowColors)
+        S.changeColor(raw_ostream::BLACK, true);
+      S << "note: ";
+      break;
+    }
 
-  if (ShowColors) {
-    S.resetColor();
-    S.changeColor(raw_ostream::SAVEDCOLOR, true);
+    if (ShowColors) {
+      S.resetColor();
+      S.changeColor(raw_ostream::SAVEDCOLOR, true);
+    }
   }
 
   S << Message << '\n';
diff --git a/contrib/llvm/lib/Support/Statistic.cpp b/contrib/llvm/lib/Support/Statistic.cpp
index 56c3b0f..90f5fdb 100644
--- a/contrib/llvm/lib/Support/Statistic.cpp
+++ b/contrib/llvm/lib/Support/Statistic.cpp
@@ -60,7 +60,7 @@ public:
     Stats.push_back(S);
   }
 };
-}
+} // namespace
 
 static ManagedStatic<StatisticInfo> StatInfo;
 static ManagedStatic<sys::SmartMutex<true> > StatLock;
diff --git a/contrib/llvm/lib/Support/StreamingMemoryObject.cpp b/contrib/llvm/lib/Support/StreamingMemoryObject.cpp
index 6c5652a..891aa66 100644
--- a/contrib/llvm/lib/Support/StreamingMemoryObject.cpp
+++ b/contrib/llvm/lib/Support/StreamingMemoryObject.cpp
@@ -123,9 +123,10 @@ MemoryObject *getNonStreamedMemoryObject(const unsigned char *Start,
   return new RawMemoryObject(Start, End);
 }
 
-StreamingMemoryObject::StreamingMemoryObject(DataStreamer *streamer) :
-  Bytes(kChunkSize), Streamer(streamer), BytesRead(0), BytesSkipped(0),
-  ObjectSize(0), EOFReached(false) {
-  BytesRead = streamer->GetBytes(&Bytes[0], kChunkSize);
-}
+StreamingMemoryObject::StreamingMemoryObject(
+    std::unique_ptr<DataStreamer> Streamer)
+    : Bytes(kChunkSize), Streamer(std::move(Streamer)), BytesRead(0),
+      BytesSkipped(0), ObjectSize(0), EOFReached(false) {
+  BytesRead = this->Streamer->GetBytes(&Bytes[0], kChunkSize);
 }
+} // namespace llvm
diff --git a/contrib/llvm/lib/Support/StringSaver.cpp b/contrib/llvm/lib/Support/StringSaver.cpp
new file mode 100644
index 0000000..d6b84e5
--- /dev/null
+++ b/contrib/llvm/lib/Support/StringSaver.cpp
@@ -0,0 +1,19 @@
+//===-- StringSaver.cpp ---------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/StringSaver.h"
+
+using namespace llvm;
+
+const char *StringSaver::saveImpl(StringRef S) {
+  char *P = Alloc.Allocate<char>(S.size() + 1);
+  memcpy(P, S.data(), S.size());
+  P[S.size()] = '\0';
+  return P;
+}
diff --git a/contrib/llvm/lib/Support/TargetParser.cpp b/contrib/llvm/lib/Support/TargetParser.cpp
index 757483b..760cdc1 100644
--- a/contrib/llvm/lib/Support/TargetParser.cpp
+++ b/contrib/llvm/lib/Support/TargetParser.cpp
@@ -276,7 +276,8 @@ bool ARMTargetParser::getFPUFeatures(unsigned FPUKind,
 
   // FPU version subtarget features are inclusive of lower-numbered ones, so
   // enable the one corresponding to this version and disable all that are
-  // higher.
+  // higher. We also have to make sure to disable fp16 when vfp4 is disabled,
+  // as +vfp4 implies +fp16 but -vfp4 does not imply -fp16.
   switch (FPUNames[FPUKind].FPUVersion) {
   case 5:
     Features.push_back("+fp-armv8");
@@ -287,18 +288,21 @@ bool ARMTargetParser::getFPUFeatures(unsigned FPUKind,
     break;
   case 3:
     Features.push_back("+vfp3");
+    Features.push_back("-fp16");
     Features.push_back("-vfp4");
     Features.push_back("-fp-armv8");
     break;
   case 2:
     Features.push_back("+vfp2");
     Features.push_back("-vfp3");
+    Features.push_back("-fp16");
     Features.push_back("-vfp4");
     Features.push_back("-fp-armv8");
     break;
   case 0:
     Features.push_back("-vfp2");
     Features.push_back("-vfp3");
+    Features.push_back("-fp16");
     Features.push_back("-vfp4");
     Features.push_back("-fp-armv8");
     break;
diff --git a/contrib/llvm/lib/Support/TimeValue.cpp b/contrib/llvm/lib/Support/TimeValue.cpp
index 136b93e..caa5b5a 100644
--- a/contrib/llvm/lib/Support/TimeValue.cpp
+++ b/contrib/llvm/lib/Support/TimeValue.cpp
@@ -45,7 +45,7 @@ TimeValue::normalize( void ) {
   }
 }
 
-}
+} // namespace llvm
 
 /// Include the platform-specific portion of TimeValue class
 #ifdef LLVM_ON_UNIX
diff --git a/contrib/llvm/lib/Support/Timer.cpp b/contrib/llvm/lib/Support/Timer.cpp
index d7b6515..0ad253b 100644
--- a/contrib/llvm/lib/Support/Timer.cpp
+++ b/contrib/llvm/lib/Support/Timer.cpp
@@ -50,7 +50,7 @@ namespace {
   InfoOutputFilename("info-output-file", cl::value_desc("filename"),
                      cl::desc("File to append -stats and -timer output to"),
                    cl::Hidden, cl::location(getLibSupportInfoOutputFilename()));
-}
+} // namespace
 
 // CreateInfoOutputFile - Return a file stream to print our output on.
 raw_ostream *llvm::CreateInfoOutputFile() {
@@ -218,7 +218,7 @@ public:
   }
 };
 
-}
+} // namespace
 
 static ManagedStatic<Name2TimerMap> NamedTimers;
 static ManagedStatic<Name2PairMap> NamedGroupedTimers;
diff --git a/contrib/llvm/lib/Support/Triple.cpp b/contrib/llvm/lib/Support/Triple.cpp
index ad99386..072d4a0 100644
--- a/contrib/llvm/lib/Support/Triple.cpp
+++ b/contrib/llvm/lib/Support/Triple.cpp
@@ -59,6 +59,7 @@ const char *Triple::getArchTypeName(ArchType Kind) {
   case spir:        return "spir";
   case spir64:      return "spir64";
   case kalimba:     return "kalimba";
+  case shave:       return "shave";
   }
 
   llvm_unreachable("Invalid ArchType!");
@@ -120,6 +121,7 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
   case spir:
   case spir64:      return "spir";
   case kalimba:     return "kalimba";
+  case shave:       return "shave";
   }
 }
 
@@ -252,6 +254,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
     .Case("spir", spir)
     .Case("spir64", spir64)
     .Case("kalimba", kalimba)
+    .Case("shave", shave)
     .Default(UnknownArch);
 }
 
@@ -356,6 +359,7 @@ static Triple::ArchType parseArch(StringRef ArchName) {
     .Case("spir", Triple::spir)
     .Case("spir64", Triple::spir64)
     .StartsWith("kalimba", Triple::kalimba)
+    .Case("shave", Triple::shave)
     .Default(Triple::UnknownArch);
 }
 
@@ -1004,6 +1008,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
   case llvm::Triple::hsail:
   case llvm::Triple::spir:
   case llvm::Triple::kalimba:
+  case llvm::Triple::shave:
     return 32;
 
   case llvm::Triple::aarch64:
@@ -1075,6 +1080,7 @@ Triple Triple::get32BitArchVariant() const {
   case Triple::thumbeb:
   case Triple::x86:
   case Triple::xcore:
+  case Triple::shave:
     // Already 32-bit.
     break;
 
@@ -1107,6 +1113,7 @@ Triple Triple::get64BitArchVariant() const {
   case Triple::thumbeb:
   case Triple::xcore:
   case Triple::sparcel:
+  case Triple::shave:
     T.setArch(UnknownArch);
     break;
 
diff --git a/contrib/llvm/lib/Support/Unix/Process.inc b/contrib/llvm/lib/Support/Unix/Process.inc
index df13bd2..b15cedd 100644
--- a/contrib/llvm/lib/Support/Unix/Process.inc
+++ b/contrib/llvm/lib/Support/Unix/Process.inc
@@ -205,7 +205,7 @@ private:
   int &FD;
   bool KeepOpen;
 };
-}
+} // namespace
 
 std::error_code Process::FixupStandardFileDescriptors() {
   int NullFD = -1;
diff --git a/contrib/llvm/lib/Support/Unix/Program.inc b/contrib/llvm/lib/Support/Unix/Program.inc
index 5816fb8..dc633ab 100644
--- a/contrib/llvm/lib/Support/Unix/Program.inc
+++ b/contrib/llvm/lib/Support/Unix/Program.inc
@@ -20,6 +20,7 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Config/config.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/Errc.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
@@ -92,7 +93,7 @@ ErrorOr<std::string> sys::findProgramByName(StringRef Name,
     if (sys::fs::can_execute(FilePath.c_str()))
       return std::string(FilePath.str()); // Found the executable!
   }
-  return std::errc::no_such_file_or_directory;
+  return errc::no_such_file_or_directory;
 }
 
 static bool RedirectIO(const StringRef *Path, int FD, std::string* ErrMsg) {
@@ -175,7 +176,7 @@ static void SetMemoryLimits (unsigned size)
 #endif
 }
 
-}
+} // namespace llvm
 
 static bool Execute(ProcessInfo &PI, StringRef Program, const char **args,
                     const char **envp, const StringRef **redirects,
@@ -447,7 +448,7 @@ llvm::sys::writeFileWithEncoding(StringRef FileName, StringRef Contents,
   OS << Contents;
 
   if (OS.has_error())
-    return std::make_error_code(std::errc::io_error);
+    return make_error_code(errc::io_error);
 
   return EC;
 }
@@ -472,4 +473,4 @@ bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) {
   }
   return true;
 }
-}
+} // namespace llvm
diff --git a/contrib/llvm/lib/Support/Unix/ThreadLocal.inc b/contrib/llvm/lib/Support/Unix/ThreadLocal.inc
index 31c3f38..a04dd3e 100644
--- a/contrib/llvm/lib/Support/Unix/ThreadLocal.inc
+++ b/contrib/llvm/lib/Support/Unix/ThreadLocal.inc
@@ -56,7 +56,7 @@ void ThreadLocalImpl::removeInstance() {
   setInstance(nullptr);
 }
 
-}
+} // namespace llvm
 #else
 namespace llvm {
 using namespace sys;
diff --git a/contrib/llvm/lib/Support/Unix/TimeValue.inc b/contrib/llvm/lib/Support/Unix/TimeValue.inc
index 042e0da..2c4f04c 100644
--- a/contrib/llvm/lib/Support/Unix/TimeValue.inc
+++ b/contrib/llvm/lib/Support/Unix/TimeValue.inc
@@ -51,4 +51,4 @@ TimeValue TimeValue::now() {
       NANOSECONDS_PER_MICROSECOND ) );
 }
 
-}
+} // namespace llvm
diff --git a/contrib/llvm/lib/Support/Unix/Watchdog.inc b/contrib/llvm/lib/Support/Unix/Watchdog.inc
index 5d89c0e..9e335aa 100644
--- a/contrib/llvm/lib/Support/Unix/Watchdog.inc
+++ b/contrib/llvm/lib/Support/Unix/Watchdog.inc
@@ -28,5 +28,5 @@ namespace llvm {
       alarm(0);
 #endif
     }
-  }
-}
+  } // namespace sys
+} // namespace llvm
diff --git a/contrib/llvm/lib/Support/Windows/Memory.inc b/contrib/llvm/lib/Support/Windows/Memory.inc
index ae8371a..4b2ff2e 100644
--- a/contrib/llvm/lib/Support/Windows/Memory.inc
+++ b/contrib/llvm/lib/Support/Windows/Memory.inc
@@ -78,7 +78,15 @@ MemoryBlock Memory::allocateMappedMemory(size_t NumBytes,
   // While we'd be happy to allocate single pages, the Windows allocation
   // granularity may be larger than a single page (in practice, it is 64K)
   // so mapping less than that will create an unreachable fragment of memory.
-  static const size_t Granularity = getAllocationGranularity();
+  // Avoid using one-time initialization of static locals here, since they
+  // aren't thread safe with MSVC.
+  static volatile size_t GranularityCached;
+  size_t Granularity = GranularityCached;
+  if (Granularity == 0) {
+    Granularity = getAllocationGranularity();
+    GranularityCached = Granularity;
+  }
+
   const size_t NumBlocks = (NumBytes+Granularity-1)/Granularity;
 
   uintptr_t Start = NearBlock ? reinterpret_cast<uintptr_t>(NearBlock->base()) +
diff --git a/contrib/llvm/lib/Support/Windows/Program.inc b/contrib/llvm/lib/Support/Windows/Program.inc
index 75685de..c29d872 100644
--- a/contrib/llvm/lib/Support/Windows/Program.inc
+++ b/contrib/llvm/lib/Support/Windows/Program.inc
@@ -14,6 +14,7 @@
 #include "WindowsSupport.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/Errc.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/WindowsError.h"
 #include "llvm/Support/raw_ostream.h"
@@ -514,7 +515,7 @@ llvm::sys::writeFileWithEncoding(StringRef FileName, StringRef Contents,
   }
 
   if (OS.has_error())
-    return std::make_error_code(std::errc::io_error);
+    return make_error_code(errc::io_error);
 
   return EC;
 }
diff --git a/contrib/llvm/lib/Support/YAMLParser.cpp b/contrib/llvm/lib/Support/YAMLParser.cpp
index d55da5e..5ca28a0 100644
--- a/contrib/llvm/lib/Support/YAMLParser.cpp
+++ b/contrib/llvm/lib/Support/YAMLParser.cpp
@@ -144,8 +144,8 @@ struct Token : ilist_node<Token> {
 
   Token() : Kind(TK_Error) {}
 };
-}
-}
+} // namespace yaml
+} // namespace llvm
 
 namespace llvm {
 template<>
@@ -178,7 +178,7 @@ struct ilist_node_traits<Token> {
 
   BumpPtrAllocator Alloc;
 };
-}
+} // namespace llvm
 
 typedef ilist<Token> TokenQueueT;
 
@@ -203,7 +203,7 @@ struct SimpleKey {
     return Tok == Other.Tok;
   }
 };
-}
+} // namespace
 
 /// @brief The Unicode scalar value of a UTF-8 minimal well-formed code unit
 ///        subsequence and the subsequence's length in code units (uint8_t).
diff --git a/contrib/llvm/lib/TableGen/TGLexer.h b/contrib/llvm/lib/TableGen/TGLexer.h
index cbc30be..d97d1ca 100644
--- a/contrib/llvm/lib/TableGen/TGLexer.h
+++ b/contrib/llvm/lib/TableGen/TGLexer.h
@@ -60,7 +60,7 @@ namespace tgtok {
     // String valued tokens.
     Id, StrVal, VarName, CodeFragment
   };
-}
+} // namespace tgtok
 
 /// TGLexer - TableGen Lexer class.
 class TGLexer {
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
index bffd9e6..6c5a083 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
@@ -148,7 +148,7 @@ private:
   Color getColor(unsigned Register);
   Chain *getAndEraseNext(Color PreferredColor, std::vector<Chain*> &L);
 };
-}
+} // namespace
 
 char AArch64A57FPLoadBalancing::ID = 0;
 
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index da22d8d..ada995b 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -121,7 +121,7 @@ private:
 //===----------------------------------------------------------------------===//
 
 void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) {
-  Triple TT(TM.getTargetTriple());
+  const Triple &TT = TM.getTargetTriple();
   if (TT.isOSBinFormatMachO()) {
     // Funny Darwin hack: This flag tells the linker that no global symbols
     // contain code that falls through to other global symbols (e.g. the obvious
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64BranchRelaxation.cpp b/contrib/llvm/lib/Target/AArch64/AArch64BranchRelaxation.cpp
index d973234..176403c 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64BranchRelaxation.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64BranchRelaxation.cpp
@@ -102,7 +102,7 @@ public:
   }
 };
 char AArch64BranchRelaxation::ID = 0;
-}
+} // namespace
 
 /// verify - check BBOffsets, BBSizes, alignment of islands
 void AArch64BranchRelaxation::verify() {
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.h b/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.h
index 1e2d1c3..efc328a 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.h
@@ -136,6 +136,6 @@ static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
   return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign);
 }
 
-}
+} // namespace
 
 #endif
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp b/contrib/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
index 06ff9af..11eefc4 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
@@ -135,7 +135,7 @@ struct LDTLSCleanup : public MachineFunctionPass {
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 };
-}
+} // namespace
 
 char LDTLSCleanup::ID = 0;
 FunctionPass *llvm::createAArch64CleanupLocalDynamicTLSPass() {
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index c2470f7..acb3525 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -43,7 +43,7 @@ private:
                     unsigned BitSize);
 };
 char AArch64ExpandPseudo::ID = 0;
-}
+} // namespace
 
 /// \brief Transfer implicit operands on the pseudo instruction to the
 /// instructions created from the expansion.
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index 9977e2b..d1523e8 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -1678,6 +1678,9 @@ unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
 
 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
                                    bool WantZExt, MachineMemOperand *MMO) {
+  if(!TLI.allowsMisalignedMemoryAccesses(VT))
+    return 0;
+
   // Simplify this down to something we can handle.
   if (!simplifyAddress(Addr, VT))
     return 0;
@@ -1962,6 +1965,9 @@ bool AArch64FastISel::selectLoad(const Instruction *I) {
 
 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
                                 MachineMemOperand *MMO) {
+  if(!TLI.allowsMisalignedMemoryAccesses(VT))
+    return false;
+
   // Simplify this down to something we can handle.
   if (!simplifyAddress(Addr, VT))
     return false;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index b496fcc..11227ee 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -63,6 +63,6 @@ public:
                                             RegScavenger *RS) const override;
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 1616ff1..0165ef9 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -76,9 +76,6 @@ cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
     cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
     cl::init(false));
 
-/// Value type used for condition codes.
-static const MVT MVT_CC = MVT::i32;
-
 AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
                                              const AArch64Subtarget &STI)
     : TargetLowering(TM), Subtarget(&STI) {
@@ -810,9 +807,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case AArch64ISD::ADCS:              return "AArch64ISD::ADCS";
   case AArch64ISD::SBCS:              return "AArch64ISD::SBCS";
   case AArch64ISD::ANDS:              return "AArch64ISD::ANDS";
-  case AArch64ISD::CCMP:              return "AArch64ISD::CCMP";
-  case AArch64ISD::CCMN:              return "AArch64ISD::CCMN";
-  case AArch64ISD::FCCMP:             return "AArch64ISD::FCCMP";
   case AArch64ISD::FCMP:              return "AArch64ISD::FCMP";
   case AArch64ISD::FMIN:              return "AArch64ISD::FMIN";
   case AArch64ISD::FMAX:              return "AArch64ISD::FMAX";
@@ -1171,133 +1165,10 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
     LHS = LHS.getOperand(0);
   }
 
-  return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
+  return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS)
       .getValue(1);
 }
 
-static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
-                                         ISD::CondCode CC, SDValue CCOp,
-                                         SDValue Condition, unsigned NZCV,
-                                         SDLoc DL, SelectionDAG &DAG) {
-  unsigned Opcode = 0;
-  if (LHS.getValueType().isFloatingPoint())
-    Opcode = AArch64ISD::FCCMP;
-  else if (RHS.getOpcode() == ISD::SUB) {
-    SDValue SubOp0 = RHS.getOperand(0);
-    if (const ConstantSDNode *SubOp0C = dyn_cast<ConstantSDNode>(SubOp0))
-      if (SubOp0C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
-        // See emitComparison() on why we can only do this for SETEQ and SETNE.
-        Opcode = AArch64ISD::CCMN;
-        RHS = RHS.getOperand(1);
-      }
-  }
-  if (Opcode == 0)
-    Opcode = AArch64ISD::CCMP;
-
-  SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
-  return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
-}
-
-/// Returns true if @p Val is a tree of AND/OR/SETCC operations.
-static bool isConjunctionDisjunctionTree(const SDValue Val, unsigned Depth) {
-  if (!Val.hasOneUse())
-    return false;
-  if (Val->getOpcode() == ISD::SETCC)
-    return true;
-  // Protect against stack overflow.
-  if (Depth > 1000)
-    return false;
-  if (Val->getOpcode() == ISD::AND || Val->getOpcode() == ISD::OR) {
-    SDValue O0 = Val->getOperand(0);
-    SDValue O1 = Val->getOperand(1);
-    return isConjunctionDisjunctionTree(O0, Depth+1) &&
-           isConjunctionDisjunctionTree(O1, Depth+1);
-  }
-  return false;
-}
-
-/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
-/// of CCMP/CFCMP ops. For example (SETCC_0 & SETCC_1) with condition cond0 and
-/// cond1 can be transformed into "CMP; CCMP" with CCMP executing on cond_0
-/// and setting flags to inversed(cond_1) otherwise.
-/// This recursive function produces DAG nodes that produce condition flags
-/// suitable to determine the truth value of @p Val (which is AND/OR/SETCC)
-/// by testing the result for the condition set to @p OutCC. If @p Negate is
-/// set the opposite truth value is produced. If @p CCOp and @p Condition are
-/// given then conditional comparison are created so that false is reported
-/// when they are false.
-static SDValue emitConjunctionDisjunctionTree(
-    SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate,
-    SDValue CCOp = SDValue(), AArch64CC::CondCode Condition = AArch64CC::AL) {
-  assert(isConjunctionDisjunctionTree(Val, 0));
-  // We're at a tree leaf, produce a c?f?cmp.
-  unsigned Opcode = Val->getOpcode();
-  if (Opcode == ISD::SETCC) {
-    SDValue LHS = Val->getOperand(0);
-    SDValue RHS = Val->getOperand(1);
-    ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
-    bool isInteger = LHS.getValueType().isInteger();
-    if (Negate)
-      CC = getSetCCInverse(CC, isInteger);
-    SDLoc DL(Val);
-    // Determine OutCC and handle FP special case.
-    if (isInteger) {
-      OutCC = changeIntCCToAArch64CC(CC);
-    } else {
-      assert(LHS.getValueType().isFloatingPoint());
-      AArch64CC::CondCode ExtraCC;
-      changeFPCCToAArch64CC(CC, OutCC, ExtraCC);
-      // Surpisingly some floating point conditions can't be tested with a
-      // single condition code. Construct an additional comparison in this case.
-      // See comment below on how we deal with OR conditions.
-      if (ExtraCC != AArch64CC::AL) {
-        SDValue ExtraCmp;
-        if (!CCOp.getNode())
-          ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
-        else {
-          SDValue ConditionOp = DAG.getConstant(Condition, DL, MVT_CC);
-          // Note that we want the inverse of ExtraCC, so NZCV is not inversed.
-          unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(ExtraCC);
-          ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, ConditionOp,
-                                               NZCV, DL, DAG);
-        }
-        CCOp = ExtraCmp;
-        Condition = AArch64CC::getInvertedCondCode(ExtraCC);
-        OutCC = AArch64CC::getInvertedCondCode(OutCC);
-      }
-    }
-
-    // Produce a normal comparison if we are first in the chain
-    if (!CCOp.getNode())
-      return emitComparison(LHS, RHS, CC, DL, DAG);
-    // Otherwise produce a ccmp.
-    SDValue ConditionOp = DAG.getConstant(Condition, DL, MVT_CC);
-    AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
-    unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
-    return emitConditionalComparison(LHS, RHS, CC, CCOp, ConditionOp, NZCV, DL,
-                                     DAG);
-  }
-
-  // Construct comparison sequence for the left hand side.
-  SDValue LHS = Val->getOperand(0);
-  SDValue RHS = Val->getOperand(1);
-
-  // We can only implement AND-like behaviour here, but negation is free. So we
-  // use (not (and (not x) (not y))) to implement (or x y).
-  bool isOr = Val->getOpcode() == ISD::OR;
-  assert((isOr || Val->getOpcode() == ISD::AND) && "Should have AND or OR.");
-  Negate ^= isOr;
-
-  AArch64CC::CondCode RHSCC;
-  SDValue CmpR =
-      emitConjunctionDisjunctionTree(DAG, RHS, RHSCC, isOr, CCOp, Condition);
-  SDValue CmpL =
-      emitConjunctionDisjunctionTree(DAG, LHS, OutCC, isOr, CmpR, RHSCC);
-  if (Negate)
-    OutCC = AArch64CC::getInvertedCondCode(OutCC);
-  return CmpL;
-}
-
 static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
                              SDValue &AArch64cc, SelectionDAG &DAG, SDLoc dl) {
   SDValue Cmp;
@@ -1356,55 +1227,47 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
       }
     }
   }
+  // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
+  // For the i8 operand, the largest immediate is 255, so this can be easily
+  // encoded in the compare instruction. For the i16 operand, however, the
+  // largest immediate cannot be encoded in the compare.
+  // Therefore, use a sign extending load and cmn to avoid materializing the -1
+  // constant. For example,
+  // movz w1, #65535
+  // ldrh w0, [x0, #0]
+  // cmp w0, w1
+  // >
+  // ldrsh w0, [x0, #0]
+  // cmn w0, #1
+  // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
+  // if and only if (sext LHS) == (sext RHS). The checks are in place to ensure
+  // both the LHS and RHS are truely zero extended and to make sure the
+  // transformation is profitable.
   if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
-    const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
-
-    // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
-    // For the i8 operand, the largest immediate is 255, so this can be easily
-    // encoded in the compare instruction. For the i16 operand, however, the
-    // largest immediate cannot be encoded in the compare.
-    // Therefore, use a sign extending load and cmn to avoid materializing the
-    // -1 constant. For example,
-    // movz w1, #65535
-    // ldrh w0, [x0, #0]
-    // cmp w0, w1
-    // >
-    // ldrsh w0, [x0, #0]
-    // cmn w0, #1
-    // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
-    // if and only if (sext LHS) == (sext RHS). The checks are in place to
-    // ensure both the LHS and RHS are truely zero extended and to make sure the
-    // transformation is profitable.
-    if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
-        cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
-        cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
-        LHS.getNode()->hasNUsesOfValue(1, 0)) {
-      int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
-      if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
-        SDValue SExt =
-            DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
-                        DAG.getValueType(MVT::i16));
-        Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
-                                                   RHS.getValueType()),
-                             CC, dl, DAG);
-        AArch64CC = changeIntCCToAArch64CC(CC);
-        goto CreateCCNode;
+    if ((cast<ConstantSDNode>(RHS)->getZExtValue() >> 16 == 0) &&
+        isa<LoadSDNode>(LHS)) {
+      if (cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
+          cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
+          LHS.getNode()->hasNUsesOfValue(1, 0)) {
+        int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
+        if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
+          SDValue SExt =
+              DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
+                          DAG.getValueType(MVT::i16));
+          Cmp = emitComparison(SExt,
+                               DAG.getConstant(ValueofRHS, dl,
+                                               RHS.getValueType()),
+                               CC, dl, DAG);
+          AArch64CC = changeIntCCToAArch64CC(CC);
+          AArch64cc = DAG.getConstant(AArch64CC, dl, MVT::i32);
+          return Cmp;
+        }
       }
     }
-
-    if ((RHSC->isNullValue() || RHSC->isOne()) &&
-        isConjunctionDisjunctionTree(LHS, 0)) {
-      bool Negate = (CC == ISD::SETNE) ^ RHSC->isNullValue();
-      Cmp = emitConjunctionDisjunctionTree(DAG, LHS, AArch64CC, Negate);
-      goto CreateCCNode;
-    }
   }
-
   Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
   AArch64CC = changeIntCCToAArch64CC(CC);
-
-CreateCCNode:
-  AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
+  AArch64cc = DAG.getConstant(AArch64CC, dl, MVT::i32);
   return Cmp;
 }
 
@@ -1561,7 +1424,7 @@ static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
   ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
   ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
 
-  // The the values aren't constants, this isn't the pattern we're looking for.
+  // The values aren't constants, this isn't the pattern we're looking for.
   if (!CFVal || !CTVal)
     return Op;
 
@@ -2559,7 +2422,7 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
   // cannot rely on the linker replacing the tail call with a return.
   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
     const GlobalValue *GV = G->getGlobal();
-    const Triple TT(getTargetMachine().getTargetTriple());
+    const Triple &TT = getTargetMachine().getTargetTriple();
     if (GV->hasExternalWeakLinkage() &&
         (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
       return false;
@@ -3557,7 +3420,7 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
     EltVT = MVT::i64;
     VecVT = MVT::v2i64;
 
-    // We want to materialize a mask with the the high bit set, but the AdvSIMD
+    // We want to materialize a mask with the high bit set, but the AdvSIMD
     // immediate moves cannot materialize that in a single instruction for
     // 64-bit elements. Instead, materialize zero and then negate it.
     EltMask = 0;
@@ -7580,21 +7443,26 @@ static SDValue tryCombineFixedPointConvert(SDNode *N,
 //
 // This routine does the actual conversion of such DUPs, once outer routines
 // have determined that everything else is in order.
+// It also supports immediate DUP-like nodes (MOVI/MVNi), which we can fold
+// similarly here.
 static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) {
-  // We can handle most types of duplicate, but the lane ones have an extra
-  // operand saying *which* lane, so we need to know.
-  bool IsDUPLANE;
   switch (N.getOpcode()) {
   case AArch64ISD::DUP:
-    IsDUPLANE = false;
-    break;
   case AArch64ISD::DUPLANE8:
   case AArch64ISD::DUPLANE16:
   case AArch64ISD::DUPLANE32:
   case AArch64ISD::DUPLANE64:
-    IsDUPLANE = true;
+  case AArch64ISD::MOVI:
+  case AArch64ISD::MOVIshift:
+  case AArch64ISD::MOVIedit:
+  case AArch64ISD::MOVImsl:
+  case AArch64ISD::MVNIshift:
+  case AArch64ISD::MVNImsl:
     break;
   default:
+    // FMOV could be supported, but isn't very useful, as it would only occur
+    // if you passed a bitcast' floating point immediate to an eligible long
+    // integer op (addl, smull, ...).
     return SDValue();
   }
 
@@ -7604,17 +7472,11 @@ static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) {
 
   MVT ElementTy = NarrowTy.getVectorElementType();
   unsigned NumElems = NarrowTy.getVectorNumElements();
-  MVT NewDUPVT = MVT::getVectorVT(ElementTy, NumElems * 2);
+  MVT NewVT = MVT::getVectorVT(ElementTy, NumElems * 2);
 
   SDLoc dl(N);
-  SDValue NewDUP;
-  if (IsDUPLANE)
-    NewDUP = DAG.getNode(N.getOpcode(), dl, NewDUPVT, N.getOperand(0),
-                         N.getOperand(1));
-  else
-    NewDUP = DAG.getNode(AArch64ISD::DUP, dl, NewDUPVT, N.getOperand(0));
-
-  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NarrowTy, NewDUP,
+  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NarrowTy,
+                     DAG.getNode(N->getOpcode(), dl, NewVT, N->ops()),
                      DAG.getConstant(NumElems, dl, MVT::i64));
 }
 
@@ -8913,6 +8775,14 @@ static SDValue performSelectCCCombine(SDNode *N, SelectionDAG &DAG) {
   return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS);
 }
 
+/// Get rid of unnecessary NVCASTs (that don't change the type).
+static SDValue performNVCASTCombine(SDNode *N) {
+  if (N->getValueType(0) == N->getOperand(0).getValueType())
+    return N->getOperand(0);
+
+  return SDValue();
+}
+
 SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
                                                  DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -8955,6 +8825,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
     return performCONDCombine(N, DCI, DAG, 2, 3);
   case AArch64ISD::DUP:
     return performPostLD1Combine(N, DCI, false);
+  case AArch64ISD::NVCAST:
+    return performNVCASTCombine(N);
   case ISD::INSERT_VECTOR_ELT:
     return performPostLD1Combine(N, DCI, true);
   case ISD::INTRINSIC_VOID:
@@ -9260,8 +9132,3 @@ bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(
     Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
   return Ty->isArrayTy();
 }
-
-bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &,
-                                                            EVT) const {
-  return false;
-}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index db192c7..da42376 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -58,11 +58,6 @@ enum NodeType : unsigned {
   SBCS,
   ANDS,
 
-  // Conditional compares. Operands: left,right,falsecc,cc,flags
-  CCMP,
-  CCMN,
-  FCCMP,
-
   // Floating point comparison
   FCMP,
 
@@ -513,8 +508,6 @@ private:
   bool functionArgumentNeedsConsecutiveRegisters(Type *Ty,
                                                  CallingConv::ID CallConv,
                                                  bool isVarArg) const override;
-
-  bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
 };
 
 namespace AArch64 {
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 1fe9c7f..2c52f34 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -525,13 +525,6 @@ def imm0_31 : Operand<i64>, ImmLeaf<i64, [{
   let ParserMatchClass = Imm0_31Operand;
 }
 
-// True if the 32-bit immediate is in the range [0,31]
-def imm32_0_31 : Operand<i32>, ImmLeaf<i32, [{
-  return ((uint64_t)Imm) < 32;
-}]> {
-  let ParserMatchClass = Imm0_31Operand;
-}
-
 // imm0_15 predicate - True if the immediate is in the range [0,15]
 def imm0_15 : Operand<i64>, ImmLeaf<i64, [{
   return ((uint64_t)Imm) < 16;
@@ -549,9 +542,7 @@ def imm0_7 : Operand<i64>, ImmLeaf<i64, [{
 // imm32_0_15 predicate - True if the 32-bit immediate is in the range [0,15]
 def imm32_0_15 : Operand<i32>, ImmLeaf<i32, [{
   return ((uint32_t)Imm) < 16;
-}]> {
-  let ParserMatchClass = Imm0_15Operand;
-}
+}]>;
 
 // An arithmetic shifter operand:
 //  {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr
@@ -2077,12 +2068,9 @@ multiclass LogicalRegS<bits<2> opc, bit N, string mnemonic,
 //---
 
 let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseCondComparisonImm<bit op, RegisterClass regtype, ImmLeaf immtype,
-                            string mnemonic, SDNode OpNode>
-    : I<(outs), (ins regtype:$Rn, immtype:$imm, imm32_0_15:$nzcv, ccode:$cond),
-         mnemonic, "\t$Rn, $imm, $nzcv, $cond", "",
-         [(set NZCV, (OpNode regtype:$Rn, immtype:$imm, (i32 imm:$nzcv),
-                             (i32 imm:$cond), NZCV))]>,
+class BaseCondSetFlagsImm<bit op, RegisterClass regtype, string asm>
+    : I<(outs), (ins regtype:$Rn, imm0_31:$imm, imm0_15:$nzcv, ccode:$cond),
+         asm, "\t$Rn, $imm, $nzcv, $cond", "", []>,
       Sched<[WriteI, ReadI]> {
   let Uses = [NZCV];
   let Defs = [NZCV];
@@ -2102,13 +2090,19 @@ class BaseCondComparisonImm<bit op, RegisterClass regtype, ImmLeaf immtype,
   let Inst{3-0}   = nzcv;
 }
 
+multiclass CondSetFlagsImm<bit op, string asm> {
+  def Wi : BaseCondSetFlagsImm<op, GPR32, asm> {
+    let Inst{31} = 0;
+  }
+  def Xi : BaseCondSetFlagsImm<op, GPR64, asm> {
+    let Inst{31} = 1;
+  }
+}
+
 let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseCondComparisonReg<bit op, RegisterClass regtype, string mnemonic,
-                            SDNode OpNode>
-    : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm32_0_15:$nzcv, ccode:$cond),
-         mnemonic, "\t$Rn, $Rm, $nzcv, $cond", "",
-         [(set NZCV, (OpNode regtype:$Rn, regtype:$Rm, (i32 imm:$nzcv),
-                             (i32 imm:$cond), NZCV))]>,
+class BaseCondSetFlagsReg<bit op, RegisterClass regtype, string asm>
+    : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond),
+         asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>,
       Sched<[WriteI, ReadI, ReadI]> {
   let Uses = [NZCV];
   let Defs = [NZCV];
@@ -2128,19 +2122,11 @@ class BaseCondComparisonReg<bit op, RegisterClass regtype, string mnemonic,
   let Inst{3-0}   = nzcv;
 }
 
-multiclass CondComparison<bit op, string mnemonic, SDNode OpNode> {
-  // immediate operand variants
-  def Wi : BaseCondComparisonImm<op, GPR32, imm32_0_31, mnemonic, OpNode> {
+multiclass CondSetFlagsReg<bit op, string asm> {
+  def Wr : BaseCondSetFlagsReg<op, GPR32, asm> {
     let Inst{31} = 0;
   }
-  def Xi : BaseCondComparisonImm<op, GPR64, imm0_31, mnemonic, OpNode> {
-    let Inst{31} = 1;
-  }
-  // register operand variants
-  def Wr : BaseCondComparisonReg<op, GPR32, mnemonic, OpNode> {
-    let Inst{31} = 0;
-  }
-  def Xr : BaseCondComparisonReg<op, GPR64, mnemonic, OpNode> {
+  def Xr : BaseCondSetFlagsReg<op, GPR64, asm> {
     let Inst{31} = 1;
   }
 }
@@ -3948,14 +3934,11 @@ multiclass FPComparison<bit signalAllNans, string asm,
 //---
 
 let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseFPCondComparison<bit signalAllNans, RegisterClass regtype,
-                           string mnemonic, list<dag> pat>
-    : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm32_0_15:$nzcv, ccode:$cond),
-         mnemonic, "\t$Rn, $Rm, $nzcv, $cond", "", pat>,
+class BaseFPCondComparison<bit signalAllNans,
+                              RegisterClass regtype, string asm>
+    : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond),
+         asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>,
       Sched<[WriteFCmp]> {
-  let Uses = [NZCV];
-  let Defs = [NZCV];
-
   bits<5> Rn;
   bits<5> Rm;
   bits<4> nzcv;
@@ -3971,18 +3954,16 @@ class BaseFPCondComparison<bit signalAllNans, RegisterClass regtype,
   let Inst{3-0}   = nzcv;
 }
 
-multiclass FPCondComparison<bit signalAllNans, string mnemonic,
-                            SDPatternOperator OpNode = null_frag> {
-  def Srr : BaseFPCondComparison<signalAllNans, FPR32, mnemonic,
-      [(set NZCV, (OpNode (f32 FPR32:$Rn), (f32 FPR32:$Rm), (i32 imm:$nzcv),
-                          (i32 imm:$cond), NZCV))]> {
+multiclass FPCondComparison<bit signalAllNans, string asm> {
+  let Defs = [NZCV], Uses = [NZCV] in {
+  def Srr : BaseFPCondComparison<signalAllNans, FPR32, asm> {
     let Inst{22} = 0;
   }
-  def Drr : BaseFPCondComparison<signalAllNans, FPR64, mnemonic,
-      [(set NZCV, (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm), (i32 imm:$nzcv),
-                          (i32 imm:$cond), NZCV))]> {
+
+  def Drr : BaseFPCondComparison<signalAllNans, FPR64, asm> {
     let Inst{22} = 1;
   }
+  } // Defs = [NZCV], Uses = [NZCV]
 }
 
 //---
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 6941a6b..8d8864c 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -255,7 +255,7 @@ unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
 
 void AArch64InstrInfo::instantiateCondBranch(
     MachineBasicBlock &MBB, DebugLoc DL, MachineBasicBlock *TBB,
-    const SmallVectorImpl<MachineOperand> &Cond) const {
+    ArrayRef<MachineOperand> Cond) const {
   if (Cond[0].getImm() != -1) {
     // Regular Bcc
     BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
@@ -272,7 +272,7 @@ void AArch64InstrInfo::instantiateCondBranch(
 
 unsigned AArch64InstrInfo::InsertBranch(
     MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
-    const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const {
+    ArrayRef<MachineOperand> Cond, DebugLoc DL) const {
   // Shouldn't be a fall through.
   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
 
@@ -369,7 +369,7 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
 }
 
 bool AArch64InstrInfo::canInsertSelect(
-    const MachineBasicBlock &MBB, const SmallVectorImpl<MachineOperand> &Cond,
+    const MachineBasicBlock &MBB, ArrayRef<MachineOperand> Cond,
     unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles,
     int &FalseCycles) const {
   // Check register classes.
@@ -412,7 +412,7 @@ bool AArch64InstrInfo::canInsertSelect(
 void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator I, DebugLoc DL,
                                     unsigned DstReg,
-                                    const SmallVectorImpl<MachineOperand> &Cond,
+                                    ArrayRef<MachineOperand> Cond,
                                     unsigned TrueReg, unsigned FalseReg) const {
   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
 
@@ -629,8 +629,8 @@ AArch64InstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
   // base registers are identical, and the offset of a lower memory access +
   // the width doesn't overlap the offset of a higher memory access,
   // then the memory accesses are different.
-  if (getLdStBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
-      getLdStBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
+  if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
+      getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
     if (BaseRegA == BaseRegB) {
       int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
       int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
@@ -1310,9 +1310,9 @@ void AArch64InstrInfo::suppressLdStPair(MachineInstr *MI) const {
 }
 
 bool
-AArch64InstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
-                                       unsigned &Offset,
-                                       const TargetRegisterInfo *TRI) const {
+AArch64InstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
+                                        unsigned &Offset,
+                                        const TargetRegisterInfo *TRI) const {
   switch (LdSt->getOpcode()) {
   default:
     return false;
@@ -1336,7 +1336,7 @@ AArch64InstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
   };
 }
 
-bool AArch64InstrInfo::getLdStBaseRegImmOfsWidth(
+bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
     MachineInstr *LdSt, unsigned &BaseReg, int &Offset, int &Width,
     const TargetRegisterInfo *TRI) const {
   // Handle only loads/stores with base register followed by immediate offset.
@@ -1434,7 +1434,7 @@ bool AArch64InstrInfo::getLdStBaseRegImmOfsWidth(
 
 /// Detect opportunities for ldp/stp formation.
 ///
-/// Only called for LdSt for which getLdStBaseRegImmOfs returns true.
+/// Only called for LdSt for which getMemOpBaseRegImmOfs returns true.
 bool AArch64InstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt,
                                           MachineInstr *SecondLdSt,
                                           unsigned NumLoads) const {
@@ -1443,7 +1443,7 @@ bool AArch64InstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt,
     return false;
   if (FirstLdSt->getOpcode() != SecondLdSt->getOpcode())
     return false;
-  // getLdStBaseRegImmOfs guarantees that oper 2 isImm.
+  // getMemOpBaseRegImmOfs guarantees that oper 2 isImm.
   unsigned Ofs1 = FirstLdSt->getOperand(2).getImm();
   // Allow 6 bits of positive range.
   if (Ofs1 > 64)
@@ -2459,15 +2459,15 @@ static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
   return true;
 }
 
-/// hasPattern - return true when there is potentially a faster code sequence
+/// Return true when there is potentially a faster code sequence
 /// for an instruction chain ending in \p Root. All potential patterns are
 /// listed
 /// in the \p Pattern vector. Pattern should be sorted in priority order since
 /// the pattern evaluator stops checking as soon as it finds a faster sequence.
 
-bool AArch64InstrInfo::hasPattern(
+bool AArch64InstrInfo::getMachineCombinerPatterns(
     MachineInstr &Root,
-    SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Pattern) const {
+    SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Patterns) const {
   unsigned Opc = Root.getOpcode();
   MachineBasicBlock &MBB = *Root.getParent();
   bool Found = false;
@@ -2495,76 +2495,76 @@ bool AArch64InstrInfo::hasPattern(
            "ADDWrr does not have register operands");
     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
                           AArch64::WZR)) {
-      Pattern.push_back(MachineCombinerPattern::MC_MULADDW_OP1);
+      Patterns.push_back(MachineCombinerPattern::MC_MULADDW_OP1);
       Found = true;
     }
     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
                           AArch64::WZR)) {
-      Pattern.push_back(MachineCombinerPattern::MC_MULADDW_OP2);
+      Patterns.push_back(MachineCombinerPattern::MC_MULADDW_OP2);
       Found = true;
     }
     break;
   case AArch64::ADDXrr:
     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
                           AArch64::XZR)) {
-      Pattern.push_back(MachineCombinerPattern::MC_MULADDX_OP1);
+      Patterns.push_back(MachineCombinerPattern::MC_MULADDX_OP1);
       Found = true;
     }
     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
                           AArch64::XZR)) {
-      Pattern.push_back(MachineCombinerPattern::MC_MULADDX_OP2);
+      Patterns.push_back(MachineCombinerPattern::MC_MULADDX_OP2);
       Found = true;
     }
     break;
   case AArch64::SUBWrr:
     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
                           AArch64::WZR)) {
-      Pattern.push_back(MachineCombinerPattern::MC_MULSUBW_OP1);
+      Patterns.push_back(MachineCombinerPattern::MC_MULSUBW_OP1);
       Found = true;
     }
     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
                           AArch64::WZR)) {
-      Pattern.push_back(MachineCombinerPattern::MC_MULSUBW_OP2);
+      Patterns.push_back(MachineCombinerPattern::MC_MULSUBW_OP2);
       Found = true;
     }
     break;
   case AArch64::SUBXrr:
     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
                           AArch64::XZR)) {
-      Pattern.push_back(MachineCombinerPattern::MC_MULSUBX_OP1);
+      Patterns.push_back(MachineCombinerPattern::MC_MULSUBX_OP1);
       Found = true;
     }
     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
                           AArch64::XZR)) {
-      Pattern.push_back(MachineCombinerPattern::MC_MULSUBX_OP2);
+      Patterns.push_back(MachineCombinerPattern::MC_MULSUBX_OP2);
       Found = true;
     }
     break;
   case AArch64::ADDWri:
     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
                           AArch64::WZR)) {
-      Pattern.push_back(MachineCombinerPattern::MC_MULADDWI_OP1);
+      Patterns.push_back(MachineCombinerPattern::MC_MULADDWI_OP1);
       Found = true;
     }
     break;
   case AArch64::ADDXri:
     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
                           AArch64::XZR)) {
-      Pattern.push_back(MachineCombinerPattern::MC_MULADDXI_OP1);
+      Patterns.push_back(MachineCombinerPattern::MC_MULADDXI_OP1);
       Found = true;
     }
     break;
   case AArch64::SUBWri:
     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
                           AArch64::WZR)) {
-      Pattern.push_back(MachineCombinerPattern::MC_MULSUBWI_OP1);
+      Patterns.push_back(MachineCombinerPattern::MC_MULSUBWI_OP1);
       Found = true;
     }
     break;
   case AArch64::SUBXri:
     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
                           AArch64::XZR)) {
-      Pattern.push_back(MachineCombinerPattern::MC_MULSUBXI_OP1);
+      Patterns.push_back(MachineCombinerPattern::MC_MULSUBXI_OP1);
       Found = true;
     }
     break;
@@ -2667,7 +2667,7 @@ static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
   return MUL;
 }
 
-/// genAlternativeCodeSequence - when hasPattern() finds a pattern
+/// When getMachineCombinerPatterns() finds potential patterns,
 /// this function generates the instructions that could replace the
 /// original code sequence
 void AArch64InstrInfo::genAlternativeCodeSequence(
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index d296768..68c2a28 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -90,13 +90,13 @@ public:
   /// Hint that pairing the given load or store is unprofitable.
   void suppressLdStPair(MachineInstr *MI) const;
 
-  bool getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
-                            unsigned &Offset,
-                            const TargetRegisterInfo *TRI) const override;
+  bool getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
+                             unsigned &Offset,
+                             const TargetRegisterInfo *TRI) const override;
 
-  bool getLdStBaseRegImmOfsWidth(MachineInstr *LdSt, unsigned &BaseReg,
-                                 int &Offset, int &Width,
-                                 const TargetRegisterInfo *TRI) const;
+  bool getMemOpBaseRegImmOfsWidth(MachineInstr *LdSt, unsigned &BaseReg,
+                                  int &Offset, int &Width,
+                                  const TargetRegisterInfo *TRI) const;
 
   bool enableClusterLoads() const override { return true; }
 
@@ -140,17 +140,14 @@ public:
                      bool AllowModify = false) const override;
   unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
   unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
-                        MachineBasicBlock *FBB,
-                        const SmallVectorImpl<MachineOperand> &Cond,
+                        MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
                         DebugLoc DL) const override;
   bool
   ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
-  bool canInsertSelect(const MachineBasicBlock &,
-                       const SmallVectorImpl<MachineOperand> &Cond, unsigned,
-                       unsigned, int &, int &, int &) const override;
+  bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond,
+                       unsigned, unsigned, int &, int &, int &) const override;
   void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
-                    DebugLoc DL, unsigned DstReg,
-                    const SmallVectorImpl<MachineOperand> &Cond,
+                    DebugLoc DL, unsigned DstReg, ArrayRef<MachineOperand> Cond,
                     unsigned TrueReg, unsigned FalseReg) const override;
   void getNoopForMachoTarget(MCInst &NopInst) const override;
 
@@ -166,19 +163,17 @@ public:
                             unsigned SrcReg2, int CmpMask, int CmpValue,
                             const MachineRegisterInfo *MRI) const override;
   bool optimizeCondBranch(MachineInstr *MI) const override;
-  /// hasPattern - return true when there is potentially a faster code sequence
+  /// Return true when there is potentially a faster code sequence
   /// for an instruction chain ending in <Root>. All potential patterns are
-  /// listed
-  /// in the <Pattern> array.
-  bool hasPattern(MachineInstr &Root,
-                  SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Pattern)
+  /// listed in the <Patterns> array.
+  bool getMachineCombinerPatterns(MachineInstr &Root,
+                  SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Patterns)
       const override;
 
-  /// genAlternativeCodeSequence - when hasPattern() finds a pattern
-  /// this function generates the instructions that could replace the
-  /// original code sequence
+  /// When getMachineCombinerPatterns() finds patterns, this function generates
+  /// the instructions that could replace the original code sequence
   void genAlternativeCodeSequence(
-      MachineInstr &Root, MachineCombinerPattern::MC_PATTERN P,
+      MachineInstr &Root, MachineCombinerPattern::MC_PATTERN Pattern,
       SmallVectorImpl<MachineInstr *> &InsInstrs,
       SmallVectorImpl<MachineInstr *> &DelInstrs,
       DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const override;
@@ -189,7 +184,7 @@ public:
 private:
   void instantiateCondBranch(MachineBasicBlock &MBB, DebugLoc DL,
                              MachineBasicBlock *TBB,
-                             const SmallVectorImpl<MachineOperand> &Cond) const;
+                             ArrayRef<MachineOperand> Cond) const;
 };
 
 /// emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 2f1b893..653f802 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -66,20 +66,6 @@ def SDT_AArch64CSel  : SDTypeProfile<1, 4,
                                     SDTCisSameAs<0, 2>,
                                     SDTCisInt<3>,
                                     SDTCisVT<4, i32>]>;
-def SDT_AArch64CCMP : SDTypeProfile<1, 5,
-                                    [SDTCisVT<0, i32>,
-                                     SDTCisInt<1>,
-                                     SDTCisSameAs<1, 2>,
-                                     SDTCisInt<3>,
-                                     SDTCisInt<4>,
-                                     SDTCisVT<5, i32>]>;
-def SDT_AArch64FCCMP : SDTypeProfile<1, 5,
-                                     [SDTCisVT<0, i32>,
-                                      SDTCisFP<1>,
-                                      SDTCisSameAs<1, 2>,
-                                      SDTCisInt<3>,
-                                      SDTCisInt<4>,
-                                      SDTCisVT<5, i32>]>;
 def SDT_AArch64FCmp   : SDTypeProfile<0, 2,
                                    [SDTCisFP<0>,
                                     SDTCisSameAs<0, 1>]>;
@@ -174,10 +160,6 @@ def AArch64and_flag  : SDNode<"AArch64ISD::ANDS",  SDTBinaryArithWithFlagsOut,
 def AArch64adc_flag  : SDNode<"AArch64ISD::ADCS",  SDTBinaryArithWithFlagsInOut>;
 def AArch64sbc_flag  : SDNode<"AArch64ISD::SBCS",  SDTBinaryArithWithFlagsInOut>;
 
-def AArch64ccmp      : SDNode<"AArch64ISD::CCMP",  SDT_AArch64CCMP>;
-def AArch64ccmn      : SDNode<"AArch64ISD::CCMN",  SDT_AArch64CCMP>;
-def AArch64fccmp     : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>;
-
 def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>;
 
 def AArch64fcmp      : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>;
@@ -1036,10 +1018,13 @@ def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
 def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
 
 //===----------------------------------------------------------------------===//
-// Conditional comparison instructions.
+// Conditionally set flags instructions.
 //===----------------------------------------------------------------------===//
-defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>;
-defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>;
+defm CCMN : CondSetFlagsImm<0, "ccmn">;
+defm CCMP : CondSetFlagsImm<1, "ccmp">;
+
+defm CCMN : CondSetFlagsReg<0, "ccmn">;
+defm CCMP : CondSetFlagsReg<1, "ccmp">;
 
 //===----------------------------------------------------------------------===//
 // Conditional select instructions.
@@ -2569,7 +2554,7 @@ defm FCMP  : FPComparison<0, "fcmp", AArch64fcmp>;
 //===----------------------------------------------------------------------===//
 
 defm FCCMPE : FPCondComparison<1, "fccmpe">;
-defm FCCMP  : FPCondComparison<0, "fccmp", AArch64fccmp>;
+defm FCCMP  : FPCondComparison<0, "fccmp">;
 
 //===----------------------------------------------------------------------===//
 // Floating point conditional select instruction.
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 186e71a..82f77a7 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -623,7 +623,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
         // and first alias with the second, we can combine the second into the
         // first.
         if (!ModifiedRegs[MI->getOperand(0).getReg()] &&
-            !UsedRegs[MI->getOperand(0).getReg()] &&
+            !(MI->mayLoad() && UsedRegs[MI->getOperand(0).getReg()]) &&
             !mayAlias(MI, MemInsns, TII)) {
           MergeForward = false;
           return MBBI;
@@ -634,7 +634,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
         // first and the second alias with the first, we can combine the first
         // into the second.
         if (!ModifiedRegs[FirstMI->getOperand(0).getReg()] &&
-            !UsedRegs[FirstMI->getOperand(0).getReg()] &&
+            !(FirstMI->mayLoad() &&
+              UsedRegs[FirstMI->getOperand(0).getReg()]) &&
             !mayAlias(FirstMI, MemInsns, TII)) {
           MergeForward = true;
           return MBBI;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.h b/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.h
index 1e29b80..908f66f 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.h
@@ -47,6 +47,6 @@ public:
   MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
   MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index 536a8d0..2a0f0a4 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -158,6 +158,6 @@ private:
   MILOHContainer LOHContainerSet;
   SetOfInstructions LOHRelated;
 };
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp b/contrib/llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
index 5394875..bab8463 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
@@ -154,7 +154,7 @@ bool haveSameParity(unsigned reg1, unsigned reg2) {
   return isOdd(reg1) == isOdd(reg2);
 }
 
-}
+} // namespace
 
 bool A57ChainingConstraint::addIntraChainConstraint(PBQPRAGraph &G, unsigned Rd,
                                                  unsigned Ra) {
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.h b/contrib/llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.h
index 4f656f9..c83aea4 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.h
@@ -33,6 +33,6 @@ private:
   // Add constraints between existing chains
   void addInterChainConstraint(PBQPRAGraph &G, unsigned Rd, unsigned Ra);
 };
-}
+} // namespace llvm
 
 #endif // LLVM_LIB_TARGET_AARCH64_AARCH64PBQPREGALOC_H
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
index 11932d2..a993b60 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
@@ -28,6 +28,6 @@ public:
                                   unsigned Align, bool isVolatile,
                                   MachinePointerInfo DstPtrInfo) const override;
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp b/contrib/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
index 85b44a2..e8165a8 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
@@ -57,7 +57,7 @@ private:
   }
 };
 char AArch64StorePairSuppress::ID = 0;
-} // anonymous
+} // namespace
 
 FunctionPass *llvm::createAArch64StorePairSuppressPass() {
   return new AArch64StorePairSuppress();
@@ -142,7 +142,7 @@ bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &MF) {
         continue;
       unsigned BaseReg;
       unsigned Offset;
-      if (TII->getLdStBaseRegImmOfs(&MI, BaseReg, Offset, TRI)) {
+      if (TII->getMemOpBaseRegImmOfs(&MI, BaseReg, Offset, TRI)) {
         if (PrevBaseReg == BaseReg) {
           // If this block can take STPs, skip ahead to the next block.
           if (!SuppressSTP && shouldAddSTPToBlock(MI.getParent()))
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 0b97af8..554826b 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -42,14 +42,12 @@ AArch64Subtarget::initializeSubtargetDependencies(StringRef FS) {
   return *this;
 }
 
-AArch64Subtarget::AArch64Subtarget(const std::string &TT,
-                                   const std::string &CPU,
+AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
                                    const std::string &FS,
                                    const TargetMachine &TM, bool LittleEndian)
     : AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
-      HasV8_1aOps(false), 
-      HasFPARMv8(false), HasNEON(false), HasCrypto(false), HasCRC(false),
-      HasZeroCycleRegMove(false), HasZeroCycleZeroing(false),
+      HasV8_1aOps(false), HasFPARMv8(false), HasNEON(false), HasCrypto(false),
+      HasCRC(false), HasZeroCycleRegMove(false), HasZeroCycleZeroing(false),
       IsLittle(LittleEndian), CPUString(CPU), TargetTriple(TT), FrameLowering(),
       InstrInfo(initializeSubtargetDependencies(FS)),
       TSInfo(TM.getDataLayout()), TLInfo(TM, *this) {}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 5454b20..c9b54cc 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -29,6 +29,7 @@
 namespace llvm {
 class GlobalValue;
 class StringRef;
+class Triple;
 
 class AArch64Subtarget : public AArch64GenSubtargetInfo {
 protected:
@@ -71,7 +72,7 @@ private:
 public:
   /// This constructor initializes the data members to match that
   /// of the specified triple.
-  AArch64Subtarget(const std::string &TT, const std::string &CPU,
+  AArch64Subtarget(const Triple &TT, const std::string &CPU,
                    const std::string &FS, const TargetMachine &TM,
                    bool LittleEndian);
 
@@ -90,7 +91,7 @@ public:
   }
   const Triple &getTargetTriple() const { return TargetTriple; }
   bool enableMachineScheduler() const override { return true; }
-  bool enablePostMachineScheduler() const override {
+  bool enablePostRAScheduler() const override {
     return isCortexA53() || isCortexA57();
   }
 
@@ -150,6 +151,6 @@ public:
 
   std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override;
 };
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index f23dd33..5496a50 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -110,9 +110,8 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
 }
 
 // Helper function to build a DataLayout string
-static std::string computeDataLayout(StringRef TT, bool LittleEndian) {
-  Triple Triple(TT);
-  if (Triple.isOSBinFormatMachO())
+static std::string computeDataLayout(const Triple &TT, bool LittleEndian) {
+  if (TT.isOSBinFormatMachO())
     return "e-m:o-i64:64-i128:128-n32:64-S128";
   if (LittleEndian)
     return "e-m:e-i64:64-i128:128-n32:64-S128";
@@ -121,7 +120,7 @@ static std::string computeDataLayout(StringRef TT, bool LittleEndian) {
 
 /// TargetMachine ctor - Create an AArch64 architecture model.
 ///
-AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT,
+AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT,
                                            StringRef CPU, StringRef FS,
                                            const TargetOptions &Options,
                                            Reloc::Model RM, CodeModel::Model CM,
@@ -131,7 +130,7 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT,
     // initialized before TLInfo is constructed.
     : LLVMTargetMachine(T, computeDataLayout(TT, LittleEndian), TT, CPU, FS,
                         Options, RM, CM, OL),
-      TLOF(createTLOF(Triple(getTargetTriple()))),
+      TLOF(createTLOF(getTargetTriple())),
       isLittle(LittleEndian) {
   initAsmInfo();
 }
@@ -156,28 +155,27 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
     // creation will depend on the TM and the code generation flags on the
     // function that reside in TargetOptions.
     resetTargetOptions(F);
-    I = llvm::make_unique<AArch64Subtarget>(TargetTriple, CPU, FS, *this, isLittle);
+    I = llvm::make_unique<AArch64Subtarget>(TargetTriple, CPU, FS, *this,
+                                            isLittle);
   }
   return I.get();
 }
 
 void AArch64leTargetMachine::anchor() { }
 
-AArch64leTargetMachine::
-AArch64leTargetMachine(const Target &T, StringRef TT,
-                       StringRef CPU, StringRef FS, const TargetOptions &Options,
-                       Reloc::Model RM, CodeModel::Model CM,
-                       CodeGenOpt::Level OL)
-  : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
+AArch64leTargetMachine::AArch64leTargetMachine(
+    const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
+    const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
+    CodeGenOpt::Level OL)
+    : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
 
 void AArch64beTargetMachine::anchor() { }
 
-AArch64beTargetMachine::
-AArch64beTargetMachine(const Target &T, StringRef TT,
-                       StringRef CPU, StringRef FS, const TargetOptions &Options,
-                       Reloc::Model RM, CodeModel::Model CM,
-                       CodeGenOpt::Level OL)
-  : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
+AArch64beTargetMachine::AArch64beTargetMachine(
+    const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
+    const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
+    CodeGenOpt::Level OL)
+    : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
 
 namespace {
 /// AArch64 Code Generator Pass Configuration Options.
@@ -269,7 +267,7 @@ bool AArch64PassConfig::addInstSelector() {
 
   // For ELF, cleanup any local-dynamic TLS accesses (i.e. combine as many
   // references to _TLS_MODULE_BASE_ as possible.
-  if (Triple(TM->getTargetTriple()).isOSBinFormatELF() &&
+  if (TM->getTargetTriple().isOSBinFormatELF() &&
       getOptLevel() != CodeGenOpt::None)
     addPass(createAArch64CleanupLocalDynamicTLSPass());
 
@@ -324,6 +322,6 @@ void AArch64PassConfig::addPreEmitPass() {
   // range of their destination.
   addPass(createAArch64BranchRelaxation());
   if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH &&
-      Triple(TM->getTargetTriple()).isOSBinFormatMachO())
+      TM->getTargetTriple().isOSBinFormatMachO())
     addPass(createAArch64CollectLOHPass());
 }
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.h b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.h
index ec34fad..8d49a29 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.h
@@ -27,7 +27,7 @@ protected:
   mutable StringMap<std::unique_ptr<AArch64Subtarget>> SubtargetMap;
 
 public:
-  AArch64TargetMachine(const Target &T, StringRef TT, StringRef CPU,
+  AArch64TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
                        StringRef FS, const TargetOptions &Options,
                        Reloc::Model RM, CodeModel::Model CM,
                        CodeGenOpt::Level OL, bool IsLittleEndian);
@@ -54,7 +54,7 @@ private:
 class AArch64leTargetMachine : public AArch64TargetMachine {
   virtual void anchor();
 public:
-  AArch64leTargetMachine(const Target &T, StringRef TT, StringRef CPU,
+  AArch64leTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
                          StringRef FS, const TargetOptions &Options,
                          Reloc::Model RM, CodeModel::Model CM,
                          CodeGenOpt::Level OL);
@@ -65,7 +65,7 @@ public:
 class AArch64beTargetMachine : public AArch64TargetMachine {
   virtual void anchor();
 public:
-  AArch64beTargetMachine(const Target &T, StringRef TT, StringRef CPU,
+  AArch64beTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
                          StringRef FS, const TargetOptions &Options,
                          Reloc::Model RM, CodeModel::Model CM,
                          CodeGenOpt::Level OL);
diff --git a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
index eb05ed9..82bc949 100644
--- a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
@@ -52,7 +52,7 @@ getVariant(uint64_t LLVMDisassembler_VariantKind) {
 /// returns zero and isBranch is Success then a symbol look up for
 /// Address + Value is done and if a symbol is found an MCExpr is created with
 /// that, else an MCExpr with Address + Value is created.  If GetOpInfo()
-/// returns zero and isBranch is Fail then the the Opcode of the MCInst is
+/// returns zero and isBranch is Fail then the Opcode of the MCInst is
 /// tested and for ADRP an other instructions that help to load of pointers
 /// a symbol look up is done to see it is returns a specific reference type
 /// to add to the comment stream.  This function returns Success if it adds
diff --git a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
index 96fbe3a..7f56c2c 100644
--- a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -1358,7 +1358,7 @@ void AArch64InstPrinter::printSystemPStateField(const MCInst *MI, unsigned OpNo,
   StringRef Name =
       AArch64PState::PStateMapper().toString(Val, STI.getFeatureBits(), Valid);
   if (Valid)
-    O << StringRef(Name.str()).upper();
+    O << Name.upper();
   else
     O << "#" << Val;
 }
diff --git a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
index 15dee97..19544ac 100644
--- a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
+++ b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
@@ -181,6 +181,6 @@ public:
   static const char *getRegisterName(unsigned RegNo,
                                      unsigned AltIdx = AArch64::NoRegAltName);
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 6c15bf3..3e982ee 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -293,7 +293,7 @@ enum CompactUnwindEncodings {
   UNWIND_AArch64_FRAME_D14_D15_PAIR = 0x00000800
 };
 
-} // end CU namespace
+} // namespace CU
 
 // FIXME: This should be in a separate file.
 class DarwinAArch64AsmBackend : public AArch64AsmBackend {
@@ -517,14 +517,13 @@ void ELFAArch64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
   }
   AArch64AsmBackend::applyFixup (Fixup, Data, DataSize, Value, IsPCRel);
 }
-}
+} // namespace
 
 MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T,
-                                            const MCRegisterInfo &MRI,
-                                            StringRef TT, StringRef CPU) {
-  Triple TheTriple(TT);
-
-  if (TheTriple.isOSDarwin())
+                                              const MCRegisterInfo &MRI,
+                                              const Triple &TheTriple,
+                                              StringRef CPU) {
+  if (TheTriple.isOSBinFormatMachO())
     return new DarwinAArch64AsmBackend(T, MRI);
 
   assert(TheTriple.isOSBinFormatELF() && "Expect either MachO or ELF target");
@@ -533,10 +532,9 @@ MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T,
 }
 
 MCAsmBackend *llvm::createAArch64beAsmBackend(const Target &T,
-                                            const MCRegisterInfo &MRI,
-                                            StringRef TT, StringRef CPU) {
-  Triple TheTriple(TT);
-
+                                              const MCRegisterInfo &MRI,
+                                              const Triple &TheTriple,
+                                              StringRef CPU) {
   assert(TheTriple.isOSBinFormatELF() &&
          "Big endian is only supported for ELF targets!");
   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index 1f516d1..807679f 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -34,7 +34,7 @@ protected:
 
 private:
 };
-}
+} // namespace
 
 AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI,
                                                bool IsLittleEndian)
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index 78837de..bbcbf51 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -208,9 +208,9 @@ MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
 
 MCTargetStreamer *
 createAArch64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
-  Triple TT(STI.getTargetTriple());
+  const Triple &TT = STI.getTargetTriple();
   if (TT.getObjectFormat() == Triple::ELF)
     return new AArch64TargetELFStreamer(S);
   return nullptr;
 }
-}
+} // namespace llvm
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index f89a852..099d1b0 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -41,7 +41,7 @@ static MCInstrInfo *createAArch64MCInstrInfo() {
 }
 
 static MCSubtargetInfo *
-createAArch64MCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) {
+createAArch64MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
   MCSubtargetInfo *X = new MCSubtargetInfo();
 
   if (CPU.empty())
@@ -60,7 +60,7 @@ static MCRegisterInfo *createAArch64MCRegisterInfo(StringRef Triple) {
 static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI,
                                          const Triple &TheTriple) {
   MCAsmInfo *MAI;
-  if (TheTriple.isOSDarwin())
+  if (TheTriple.isOSBinFormatMachO())
     MAI = new AArch64MCAsmInfoDarwin();
   else {
     assert(TheTriple.isOSBinFormatELF() && "Only expect Darwin or ELF");
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
index 4705bdf..ca56f63 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -43,11 +43,11 @@ MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
                                           const MCRegisterInfo &MRI,
                                           MCContext &Ctx);
 MCAsmBackend *createAArch64leAsmBackend(const Target &T,
-                                        const MCRegisterInfo &MRI, StringRef TT,
-                                        StringRef CPU);
+                                        const MCRegisterInfo &MRI,
+                                        const Triple &TT, StringRef CPU);
 MCAsmBackend *createAArch64beAsmBackend(const Target &T,
-                                        const MCRegisterInfo &MRI, StringRef TT,
-                                        StringRef CPU);
+                                        const MCRegisterInfo &MRI,
+                                        const Triple &TT, StringRef CPU);
 
 MCObjectWriter *createAArch64ELFObjectWriter(raw_pwrite_stream &OS,
                                              uint8_t OSABI,
@@ -65,7 +65,7 @@ MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
 MCTargetStreamer *createAArch64ObjectTargetStreamer(MCStreamer &S,
                                                     const MCSubtargetInfo &STI);
 
-} // End llvm namespace
+} // namespace llvm
 
 // Defines symbolic names for AArch64 registers.  This defines a mapping from
 // register name to register number.
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index 67af810..b2f5bf3 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -38,7 +38,7 @@ public:
                         const MCFixup &Fixup, MCValue Target,
                         uint64_t &FixedValue) override;
 };
-}
+} // namespace
 
 bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
     const MCFixup &Fixup, unsigned &RelocType, const MCSymbolRefExpr *Sym,
@@ -287,7 +287,7 @@ void AArch64MachObjectWriter::recordRelocation(
     if (Symbol->isTemporary() && (Value || !CanUseLocalRelocation)) {
       const MCSection &Sec = Symbol->getSection();
       if (!Asm.getContext().getAsmInfo()->isSectionAtomizableBySymbols(Sec))
-        Asm.addLocalUsedInReloc(*Symbol);
+        Symbol->setUsedInReloc();
     }
 
     const MCSymbol *Base = Asm.getAtom(*Symbol);
diff --git a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 7e42f8e..40071f6 100644
--- a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -346,7 +346,7 @@ namespace AArch64AT {
     ATMapper();
   };
 
-}
+} // namespace AArch64AT
 namespace AArch64DB {
   enum DBValues {
     Invalid = -1,
@@ -369,7 +369,7 @@ namespace AArch64DB {
 
     DBarrierMapper();
   };
-}
+} // namespace AArch64DB
 
 namespace  AArch64DC {
   enum DCValues {
@@ -390,7 +390,7 @@ namespace  AArch64DC {
     DCMapper();
   };
 
-}
+} // namespace AArch64DC
 
 namespace  AArch64IC {
   enum ICValues {
@@ -410,7 +410,7 @@ namespace  AArch64IC {
   static inline bool NeedsRegister(ICValues Val) {
     return Val == IVAU;
   }
-}
+} // namespace AArch64IC
 
 namespace  AArch64ISB {
   enum ISBValues {
@@ -422,7 +422,7 @@ namespace  AArch64ISB {
 
     ISBMapper();
   };
-}
+} // namespace AArch64ISB
 
 namespace AArch64PRFM {
   enum PRFMValues {
@@ -452,7 +452,7 @@ namespace AArch64PRFM {
 
     PRFMMapper();
   };
-}
+} // namespace AArch64PRFM
 
 namespace AArch64PState {
   enum PStateValues {
@@ -471,7 +471,7 @@ namespace AArch64PState {
     PStateMapper();
   };
 
-}
+} // namespace AArch64PState
 
 namespace AArch64SE {
     enum ShiftExtSpecifiers {
@@ -492,7 +492,7 @@ namespace AArch64SE {
         SXTW,
         SXTX
     };
-}
+} // namespace AArch64SE
 
 namespace AArch64Layout {
     enum VectorLayout {
@@ -514,7 +514,7 @@ namespace AArch64Layout {
         VL_S,
         VL_D
     };
-}
+} // namespace AArch64Layout
 
 inline static const char *
 AArch64VectorLayoutToString(AArch64Layout::VectorLayout Layout) {
@@ -1221,7 +1221,7 @@ namespace AArch64SysReg {
   };
 
   uint32_t ParseGenericRegister(StringRef Name, bool &Valid);
-}
+} // namespace AArch64SysReg
 
 namespace AArch64TLBI {
   enum TLBIValues {
@@ -1283,7 +1283,7 @@ namespace AArch64TLBI {
       return true;
     }
   }
-} 
+} // namespace AArch64TLBI 
 
 namespace AArch64II {
   /// Target Operand Flag enum.
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h
new file mode 100644
index 0000000..0a05d25
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -0,0 +1,148 @@
+//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_R600_AMDGPU_H
+#define LLVM_LIB_TARGET_R600_AMDGPU_H
+
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class AMDGPUInstrPrinter;
+class AMDGPUSubtarget;
+class AMDGPUTargetMachine;
+class FunctionPass;
+class MCAsmInfo;
+class raw_ostream;
+class Target;
+class TargetMachine;
+
+// R600 Passes
+FunctionPass *createR600VectorRegMerger(TargetMachine &tm);
+FunctionPass *createR600TextureIntrinsicsReplacer();
+FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
+FunctionPass *createR600EmitClauseMarkers();
+FunctionPass *createR600ClauseMergePass(TargetMachine &tm);
+FunctionPass *createR600Packetizer(TargetMachine &tm);
+FunctionPass *createR600ControlFlowFinalizer(TargetMachine &tm);
+FunctionPass *createAMDGPUCFGStructurizerPass();
+
+// SI Passes
+FunctionPass *createSITypeRewriter();
+FunctionPass *createSIAnnotateControlFlowPass();
+FunctionPass *createSIFoldOperandsPass();
+FunctionPass *createSILowerI1CopiesPass();
+FunctionPass *createSIShrinkInstructionsPass();
+FunctionPass *createSILoadStoreOptimizerPass(TargetMachine &tm);
+FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
+FunctionPass *createSIFixControlFlowLiveIntervalsPass();
+FunctionPass *createSIFixSGPRCopiesPass(TargetMachine &tm);
+FunctionPass *createSIFixSGPRLiveRangesPass();
+FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
+FunctionPass *createSIInsertWaits(TargetMachine &tm);
+FunctionPass *createSIPrepareScratchRegs();
+
+void initializeSIFoldOperandsPass(PassRegistry &);
+extern char &SIFoldOperandsID;
+
+void initializeSILowerI1CopiesPass(PassRegistry &);
+extern char &SILowerI1CopiesID;
+
+void initializeSILoadStoreOptimizerPass(PassRegistry &);
+extern char &SILoadStoreOptimizerID;
+
+// Passes common to R600 and SI
+FunctionPass *createAMDGPUPromoteAlloca(const AMDGPUSubtarget &ST);
+Pass *createAMDGPUStructurizeCFGPass();
+FunctionPass *createAMDGPUISelDag(TargetMachine &tm);
+ModulePass *createAMDGPUAlwaysInlinePass();
+
+void initializeSIFixControlFlowLiveIntervalsPass(PassRegistry&);
+extern char &SIFixControlFlowLiveIntervalsID;
+
+void initializeSIFixSGPRLiveRangesPass(PassRegistry&);
+extern char &SIFixSGPRLiveRangesID;
+
+
+extern Target TheAMDGPUTarget;
+extern Target TheGCNTarget;
+
+namespace AMDGPU {
+enum TargetIndex {
+  TI_CONSTDATA_START,
+  TI_SCRATCH_RSRC_DWORD0,
+  TI_SCRATCH_RSRC_DWORD1,
+  TI_SCRATCH_RSRC_DWORD2,
+  TI_SCRATCH_RSRC_DWORD3
+};
+}
+
+#define END_OF_TEXT_LABEL_NAME "EndOfTextLabel"
+
+} // End namespace llvm
+
+namespace ShaderType {
+  enum Type {
+    PIXEL = 0,
+    VERTEX = 1,
+    GEOMETRY = 2,
+    COMPUTE = 3
+  };
+}
+
+/// OpenCL uses address spaces to differentiate between
+/// various memory regions on the hardware. On the CPU
+/// all of the address spaces point to the same memory,
+/// however on the GPU, each address space points to
+/// a separate piece of memory that is unique from other
+/// memory locations.
+namespace AMDGPUAS {
+enum AddressSpaces : unsigned {
+  PRIVATE_ADDRESS  = 0, ///< Address space for private memory.
+  GLOBAL_ADDRESS   = 1, ///< Address space for global memory (RAT0, VTX0).
+  CONSTANT_ADDRESS = 2, ///< Address space for constant memory
+  LOCAL_ADDRESS    = 3, ///< Address space for local memory.
+  FLAT_ADDRESS     = 4, ///< Address space for flat memory.
+  REGION_ADDRESS   = 5, ///< Address space for region memory.
+  PARAM_D_ADDRESS  = 6, ///< Address space for direct addressible parameter memory (CONST0)
+  PARAM_I_ADDRESS  = 7, ///< Address space for indirect addressible parameter memory (VTX1)
+
+  // Do not re-order the CONSTANT_BUFFER_* enums.  Several places depend on this
+  // order to be able to dynamically index a constant buffer, for example:
+  //
+  // ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx
+
+  CONSTANT_BUFFER_0 = 8,
+  CONSTANT_BUFFER_1 = 9,
+  CONSTANT_BUFFER_2 = 10,
+  CONSTANT_BUFFER_3 = 11,
+  CONSTANT_BUFFER_4 = 12,
+  CONSTANT_BUFFER_5 = 13,
+  CONSTANT_BUFFER_6 = 14,
+  CONSTANT_BUFFER_7 = 15,
+  CONSTANT_BUFFER_8 = 16,
+  CONSTANT_BUFFER_9 = 17,
+  CONSTANT_BUFFER_10 = 18,
+  CONSTANT_BUFFER_11 = 19,
+  CONSTANT_BUFFER_12 = 20,
+  CONSTANT_BUFFER_13 = 21,
+  CONSTANT_BUFFER_14 = 22,
+  CONSTANT_BUFFER_15 = 23,
+  ADDRESS_NONE = 24, ///< Address space for unknown memory.
+  LAST_ADDRESS = ADDRESS_NONE,
+
+  // Some places use this if the address space can't be determined.
+  UNKNOWN_ADDRESS_SPACE = ~0u
+};
+
+} // namespace AMDGPUAS
+
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td
new file mode 100644
index 0000000..2e7e39a
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -0,0 +1,266 @@
+//===-- AMDGPU.td - AMDGPU Tablegen files ------------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Subtarget Features
+//===----------------------------------------------------------------------===//
+
+// Debugging Features
+
+def FeatureDumpCode : SubtargetFeature <"DumpCode",
+        "DumpCode",
+        "true",
+        "Dump MachineInstrs in the CodeEmitter">;
+
+def FeatureDumpCodeLower : SubtargetFeature <"dumpcode",
+        "DumpCode",
+        "true",
+        "Dump MachineInstrs in the CodeEmitter">;
+
+def FeatureIRStructurizer : SubtargetFeature <"disable-irstructurizer",
+        "EnableIRStructurizer",
+        "false",
+        "Disable IR Structurizer">;
+
+def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca",
+        "EnablePromoteAlloca",
+        "true",
+        "Enable promote alloca pass">;
+
+// Target features
+
+def FeatureIfCvt : SubtargetFeature <"disable-ifcvt",
+        "EnableIfCvt",
+        "false",
+        "Disable the if conversion pass">;
+
+def FeatureFP64 : SubtargetFeature<"fp64",
+        "FP64",
+        "true",
+        "Enable double precision operations">;
+
+def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
+        "FP64Denormals",
+        "true",
+        "Enable double precision denormal handling",
+        [FeatureFP64]>;
+
+def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
+        "FastFMAF32",
+        "true",
+        "Assuming f32 fma is at least as fast as mul + add",
+        []>;
+
+// Some instructions do not support denormals despite this flag. Using
+// fp32 denormals also causes instructions to run at the double
+// precision rate for the device.
+def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
+        "FP32Denormals",
+        "true",
+        "Enable single precision denormal handling">;
+
+def Feature64BitPtr : SubtargetFeature<"64BitPtr",
+        "Is64bit",
+        "true",
+        "Specify if 64-bit addressing should be used">;
+
+def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
+        "R600ALUInst",
+        "false",
+        "Older version of ALU instructions encoding">;
+
+def FeatureVertexCache : SubtargetFeature<"HasVertexCache",
+        "HasVertexCache",
+        "true",
+        "Specify use of dedicated vertex cache">;
+
+def FeatureCaymanISA : SubtargetFeature<"caymanISA",
+        "CaymanISA",
+        "true",
+        "Use Cayman ISA">;
+
+def FeatureCFALUBug : SubtargetFeature<"cfalubug",
+        "CFALUBug",
+        "true",
+        "GPU has CF_ALU bug">;
+
+// XXX - This should probably be removed once enabled by default
+def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt",
+        "EnableLoadStoreOpt",
+        "true",
+        "Enable SI load/store optimizer pass">;
+
+def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
+        "FlatAddressSpace",
+        "true",
+        "Support flat address space">;
+
+def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
+        "EnableVGPRSpilling",
+        "true",
+        "Enable spilling of VGPRs to scratch memory">;
+
+def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
+        "SGPRInitBug",
+        "true",
+        "VI SGPR initilization bug requiring a fixed SGPR allocation size">;
+
+class SubtargetFeatureFetchLimit <string Value> :
+                          SubtargetFeature <"fetch"#Value,
+        "TexVTXClauseSize",
+        Value,
+        "Limit the maximum number of fetches in a clause to "#Value>;
+
+def FeatureFetchLimit8 : SubtargetFeatureFetchLimit <"8">;
+def FeatureFetchLimit16 : SubtargetFeatureFetchLimit <"16">;
+
+class SubtargetFeatureWavefrontSize <int Value> : SubtargetFeature<
+        "wavefrontsize"#Value,
+        "WavefrontSize",
+        !cast<string>(Value),
+        "The number of threads per wavefront">;
+
+def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>;
+def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>;
+def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>;
+
+class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
+      "ldsbankcount"#Value,
+      "LDSBankCount",
+      !cast<string>(Value),
+      "The number of LDS banks per compute unit.">;
+
+def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>;
+def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>;
+
+class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
+        "localmemorysize"#Value,
+        "LocalMemorySize",
+        !cast<string>(Value),
+        "The size of local memory in bytes">;
+
+def FeatureGCN : SubtargetFeature<"gcn",
+        "IsGCN",
+        "true",
+        "GCN or newer GPU">;
+
+def FeatureGCN1Encoding : SubtargetFeature<"gcn1-encoding",
+        "GCN1Encoding",
+        "true",
+        "Encoding format for SI and CI">;
+
+def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding",
+        "GCN3Encoding",
+        "true",
+        "Encoding format for VI">;
+
+def FeatureCIInsts : SubtargetFeature<"ci-insts",
+        "CIInsts",
+        "true",
+        "Additional intstructions for CI+">;
+
+// Dummy feature used to disable assembler instructions.
+def FeatureDisable : SubtargetFeature<"",
+                                      "FeatureDisable","true",
+                                      "Dummy feature to disable assembler"
+                                      " instructions">;
+
+class SubtargetFeatureGeneration <string Value,
+                                  list<SubtargetFeature> Implies> :
+        SubtargetFeature <Value, "Gen", "AMDGPUSubtarget::"#Value,
+                          Value#" GPU generation", Implies>;
+
+def FeatureLocalMemorySize0 : SubtargetFeatureLocalMemorySize<0>;
+def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>;
+def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>;
+
+def FeatureR600 : SubtargetFeatureGeneration<"R600",
+        [FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0]>;
+
+def FeatureR700 : SubtargetFeatureGeneration<"R700",
+        [FeatureFetchLimit16, FeatureLocalMemorySize0]>;
+
+def FeatureEvergreen : SubtargetFeatureGeneration<"EVERGREEN",
+        [FeatureFetchLimit16, FeatureLocalMemorySize32768]>;
+
+def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
+        [FeatureFetchLimit16, FeatureWavefrontSize64,
+         FeatureLocalMemorySize32768]
+>;
+
+def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
+        [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize32768,
+         FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding,
+         FeatureLDSBankCount32]>;
+
+def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
+        [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
+         FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
+         FeatureGCN1Encoding, FeatureCIInsts]>;
+
+def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
+        [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
+         FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
+         FeatureGCN3Encoding, FeatureCIInsts, FeatureLDSBankCount32]>;
+
+//===----------------------------------------------------------------------===//
+
+def AMDGPUInstrInfo : InstrInfo {
+  let guessInstructionProperties = 1;
+  let noNamedPositionallyEncodedOperands = 1;
+}
+
+def AMDGPUAsmParser : AsmParser {
+  // Some of the R600 registers have the same name, so this crashes.
+  // For example T0_XYZW and T0_XY both have the asm name T0.
+  let ShouldEmitMatchRegisterName = 0;
+}
+
+def AMDGPU : Target {
+  // Pull in Instruction Info:
+  let InstructionSet = AMDGPUInstrInfo;
+  let AssemblyParsers = [AMDGPUAsmParser];
+}
+
+// Dummy Instruction itineraries for pseudo instructions
+def ALU_NULL : FuncUnit;
+def NullALU : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// Predicate helper class
+//===----------------------------------------------------------------------===//
+
+def TruePredicate : Predicate<"true">;
+def isSICI : Predicate<
+  "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
+  "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
+>, AssemblerPredicate<"FeatureGCN1Encoding">;
+
+class PredicateControl {
+  Predicate SubtargetPredicate;
+  Predicate SIAssemblerPredicate = isSICI;
+  list<Predicate> AssemblerPredicates = [];
+  Predicate AssemblerPredicate = TruePredicate;
+  list<Predicate> OtherPredicates = [];
+  list<Predicate> Predicates = !listconcat([SubtargetPredicate, AssemblerPredicate],
+                                            AssemblerPredicates,
+                                            OtherPredicates);
+}
+
+// Include AMDGPU TD files
+include "R600Schedule.td"
+include "SISchedule.td"
+include "Processors.td"
+include "AMDGPUInstrInfo.td"
+include "AMDGPUIntrinsics.td"
+include "AMDGPURegisterInfo.td"
+include "AMDGPUInstructions.td"
+include "AMDGPUCallingConv.td"
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
new file mode 100644
index 0000000..0b426bc
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
@@ -0,0 +1,67 @@
+//===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass marks all internal functions as always_inline and creates
+/// duplicates of all other functions a marks the duplicates as always_inline.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUAlwaysInline : public ModulePass {
+
+  static char ID;
+
+public:
+  AMDGPUAlwaysInline() : ModulePass(ID) { }
+  bool runOnModule(Module &M) override;
+  const char *getPassName() const override { return "AMDGPU Always Inline Pass"; }
+};
+
+} // End anonymous namespace
+
+char AMDGPUAlwaysInline::ID = 0;
+
+bool AMDGPUAlwaysInline::runOnModule(Module &M) {
+
+  std::vector<Function*> FuncsToClone;
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+    Function &F = *I;
+    if (!F.hasLocalLinkage() && !F.isDeclaration() && !F.use_empty() &&
+        !F.hasFnAttribute(Attribute::NoInline))
+      FuncsToClone.push_back(&F);
+  }
+
+  for (Function *F : FuncsToClone) {
+    ValueToValueMapTy VMap;
+    Function *NewFunc = CloneFunction(F, VMap, false);
+    NewFunc->setLinkage(GlobalValue::InternalLinkage);
+    F->getParent()->getFunctionList().push_back(NewFunc);
+    F->replaceAllUsesWith(NewFunc);
+  }
+
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+    Function &F = *I;
+    if (F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::NoInline)) {
+      F.addFnAttr(Attribute::AlwaysInline);
+    }
+  }
+  return false;
+}
+
+ModulePass *llvm::createAMDGPUAlwaysInlinePass() {
+  return new AMDGPUAlwaysInline();
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
new file mode 100644
index 0000000..afc6bcb
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -0,0 +1,602 @@
+//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer  --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+///
+/// The AMDGPUAsmPrinter is used to print both assembly string and also binary
+/// code.  When passed an MCAsmStreamer it prints assembly and when passed
+/// an MCObjectStreamer it outputs binary code.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include "AMDGPUAsmPrinter.h"
+#include "InstPrinter/AMDGPUInstPrinter.h"
+#include "AMDGPU.h"
+#include "AMDKernelCodeT.h"
+#include "AMDGPUSubtarget.h"
+#include "R600Defines.h"
+#include "R600MachineFunctionInfo.h"
+#include "R600RegisterInfo.h"
+#include "SIDefines.h"
+#include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+using namespace llvm;
+
+// TODO: This should get the default rounding mode from the kernel. We just set
+// the default here, but this could change if the OpenCL rounding mode pragmas
+// are used.
+//
+// The denormal mode here should match what is reported by the OpenCL runtime
+// for the CL_FP_DENORM bit from CL_DEVICE_{HALF|SINGLE|DOUBLE}_FP_CONFIG, but
+// can also be override to flush with the -cl-denorms-are-zero compiler flag.
+//
+// AMD OpenCL only sets flush none and reports CL_FP_DENORM for double
+// precision, and leaves single precision to flush all and does not report
+// CL_FP_DENORM for CL_DEVICE_SINGLE_FP_CONFIG. Mesa's OpenCL currently reports
+// CL_FP_DENORM for both.
+//
+// FIXME: It seems some instructions do not support single precision denormals
+// regardless of the mode (exp_*_f32, rcp_*_f32, rsq_*_f32, rsq_*f32, sqrt_f32,
+// and sin_f32, cos_f32 on most parts).
+
+// We want to use these instructions, and using fp32 denormals also causes
+// instructions to run at the double precision rate for the device so it's
+// probably best to just report no single precision denormals.
+static uint32_t getFPMode(const MachineFunction &F) {
+  const AMDGPUSubtarget& ST = F.getSubtarget<AMDGPUSubtarget>();
+  // TODO: Is there any real use for the flush in only / flush out only modes?
+
+  uint32_t FP32Denormals =
+    ST.hasFP32Denormals() ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
+
+  uint32_t FP64Denormals =
+    ST.hasFP64Denormals() ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
+
+  return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) |
+         FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) |
+         FP_DENORM_MODE_SP(FP32Denormals) |
+         FP_DENORM_MODE_DP(FP64Denormals);
+}
+
+static AsmPrinter *
+createAMDGPUAsmPrinterPass(TargetMachine &tm,
+                           std::unique_ptr<MCStreamer> &&Streamer) {
+  return new AMDGPUAsmPrinter(tm, std::move(Streamer));
+}
+
+extern "C" void LLVMInitializeAMDGPUAsmPrinter() {
+  TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
+  TargetRegistry::RegisterAsmPrinter(TheGCNTarget, createAMDGPUAsmPrinterPass);
+}
+
+AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
+                                   std::unique_ptr<MCStreamer> Streamer)
+    : AsmPrinter(TM, std::move(Streamer)) {}
+
+void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
+
+  // This label is used to mark the end of the .text section.
+  const TargetLoweringObjectFile &TLOF = getObjFileLowering();
+  OutStreamer->SwitchSection(TLOF.getTextSection());
+  MCSymbol *EndOfTextLabel =
+      OutContext.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
+  OutStreamer->EmitLabel(EndOfTextLabel);
+}
+
+bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+
+  // The starting address of all shader programs must be 256 bytes aligned.
+  MF.setAlignment(8);
+
+  SetupMachineFunction(MF);
+
+  MCContext &Context = getObjFileLowering().getContext();
+  MCSectionELF *ConfigSection =
+      Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
+  OutStreamer->SwitchSection(ConfigSection);
+
+  const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
+  SIProgramInfo KernelInfo;
+  if (STM.isAmdHsaOS()) {
+    getSIProgramInfo(KernelInfo, MF);
+    EmitAmdKernelCodeT(MF, KernelInfo);
+    OutStreamer->EmitCodeAlignment(2 << (MF.getAlignment() - 1));
+  } else if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+    getSIProgramInfo(KernelInfo, MF);
+    EmitProgramInfoSI(MF, KernelInfo);
+  } else {
+    EmitProgramInfoR600(MF);
+  }
+
+  DisasmLines.clear();
+  HexLines.clear();
+  DisasmLineMaxLen = 0;
+
+  EmitFunctionBody();
+
+  if (isVerbose()) {
+    MCSectionELF *CommentSection =
+        Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);
+    OutStreamer->SwitchSection(CommentSection);
+
+    if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+      OutStreamer->emitRawComment(" Kernel info:", false);
+      OutStreamer->emitRawComment(" codeLenInByte = " + Twine(KernelInfo.CodeLen),
+                                  false);
+      OutStreamer->emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR),
+                                  false);
+      OutStreamer->emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR),
+                                  false);
+      OutStreamer->emitRawComment(" FloatMode: " + Twine(KernelInfo.FloatMode),
+                                  false);
+      OutStreamer->emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode),
+                                  false);
+      OutStreamer->emitRawComment(" ScratchSize: " + Twine(KernelInfo.ScratchSize),
+                                  false);
+    } else {
+      R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
+      OutStreamer->emitRawComment(
+        Twine("SQ_PGM_RESOURCES:STACK_SIZE = " + Twine(MFI->StackSize)));
+    }
+  }
+
+  if (STM.dumpCode()) {
+
+    OutStreamer->SwitchSection(
+        Context.getELFSection(".AMDGPU.disasm", ELF::SHT_NOTE, 0));
+
+    for (size_t i = 0; i < DisasmLines.size(); ++i) {
+      std::string Comment(DisasmLineMaxLen - DisasmLines[i].size(), ' ');
+      Comment += " ; " + HexLines[i] + "\n";
+
+      OutStreamer->EmitBytes(StringRef(DisasmLines[i]));
+      OutStreamer->EmitBytes(StringRef(Comment));
+    }
+  }
+
+  return false;
+}
+
+void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
+  unsigned MaxGPR = 0;
+  bool killPixel = false;
+  const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
+  const R600RegisterInfo *RI =
+      static_cast<const R600RegisterInfo *>(STM.getRegisterInfo());
+  const R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
+
+  for (const MachineBasicBlock &MBB : MF) {
+    for (const MachineInstr &MI : MBB) {
+      if (MI.getOpcode() == AMDGPU::KILLGT)
+        killPixel = true;
+      unsigned numOperands = MI.getNumOperands();
+      for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
+        const MachineOperand &MO = MI.getOperand(op_idx);
+        if (!MO.isReg())
+          continue;
+        unsigned HWReg = RI->getEncodingValue(MO.getReg()) & 0xff;
+
+        // Register with value > 127 aren't GPR
+        if (HWReg > 127)
+          continue;
+        MaxGPR = std::max(MaxGPR, HWReg);
+      }
+    }
+  }
+
+  unsigned RsrcReg;
+  if (STM.getGeneration() >= AMDGPUSubtarget::EVERGREEN) {
+    // Evergreen / Northern Islands
+    switch (MFI->getShaderType()) {
+    default: // Fall through
+    case ShaderType::COMPUTE:  RsrcReg = R_0288D4_SQ_PGM_RESOURCES_LS; break;
+    case ShaderType::GEOMETRY: RsrcReg = R_028878_SQ_PGM_RESOURCES_GS; break;
+    case ShaderType::PIXEL:    RsrcReg = R_028844_SQ_PGM_RESOURCES_PS; break;
+    case ShaderType::VERTEX:   RsrcReg = R_028860_SQ_PGM_RESOURCES_VS; break;
+    }
+  } else {
+    // R600 / R700
+    switch (MFI->getShaderType()) {
+    default: // Fall through
+    case ShaderType::GEOMETRY: // Fall through
+    case ShaderType::COMPUTE:  // Fall through
+    case ShaderType::VERTEX:   RsrcReg = R_028868_SQ_PGM_RESOURCES_VS; break;
+    case ShaderType::PIXEL:    RsrcReg = R_028850_SQ_PGM_RESOURCES_PS; break;
+    }
+  }
+
+  OutStreamer->EmitIntValue(RsrcReg, 4);
+  OutStreamer->EmitIntValue(S_NUM_GPRS(MaxGPR + 1) |
+                           S_STACK_SIZE(MFI->StackSize), 4);
+  OutStreamer->EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4);
+  OutStreamer->EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4);
+
+  if (MFI->getShaderType() == ShaderType::COMPUTE) {
+    OutStreamer->EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4);
+    OutStreamer->EmitIntValue(RoundUpToAlignment(MFI->LDSSize, 4) >> 2, 4);
+  }
+}
+
+void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
+                                        const MachineFunction &MF) const {
+  const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
+  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  uint64_t CodeSize = 0;
+  unsigned MaxSGPR = 0;
+  unsigned MaxVGPR = 0;
+  bool VCCUsed = false;
+  bool FlatUsed = false;
+  const SIRegisterInfo *RI =
+      static_cast<const SIRegisterInfo *>(STM.getRegisterInfo());
+
+  for (const MachineBasicBlock &MBB : MF) {
+    for (const MachineInstr &MI : MBB) {
+      // TODO: CodeSize should account for multiple functions.
+      CodeSize += MI.getDesc().Size;
+
+      unsigned numOperands = MI.getNumOperands();
+      for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
+        const MachineOperand &MO = MI.getOperand(op_idx);
+        unsigned width = 0;
+        bool isSGPR = false;
+
+        if (!MO.isReg()) {
+          continue;
+        }
+        unsigned reg = MO.getReg();
+        if (reg == AMDGPU::VCC || reg == AMDGPU::VCC_LO ||
+	    reg == AMDGPU::VCC_HI) {
+          VCCUsed = true;
+          continue;
+        } else if (reg == AMDGPU::FLAT_SCR ||
+                   reg == AMDGPU::FLAT_SCR_LO ||
+                   reg == AMDGPU::FLAT_SCR_HI) {
+          FlatUsed = true;
+          continue;
+        }
+
+        switch (reg) {
+        default: break;
+        case AMDGPU::SCC:
+        case AMDGPU::EXEC:
+        case AMDGPU::M0:
+          continue;
+        }
+
+        if (AMDGPU::SReg_32RegClass.contains(reg)) {
+          isSGPR = true;
+          width = 1;
+        } else if (AMDGPU::VGPR_32RegClass.contains(reg)) {
+          isSGPR = false;
+          width = 1;
+        } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
+          isSGPR = true;
+          width = 2;
+        } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
+          isSGPR = false;
+          width = 2;
+        } else if (AMDGPU::VReg_96RegClass.contains(reg)) {
+          isSGPR = false;
+          width = 3;
+        } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
+          isSGPR = true;
+          width = 4;
+        } else if (AMDGPU::VReg_128RegClass.contains(reg)) {
+          isSGPR = false;
+          width = 4;
+        } else if (AMDGPU::SReg_256RegClass.contains(reg)) {
+          isSGPR = true;
+          width = 8;
+        } else if (AMDGPU::VReg_256RegClass.contains(reg)) {
+          isSGPR = false;
+          width = 8;
+        } else if (AMDGPU::SReg_512RegClass.contains(reg)) {
+          isSGPR = true;
+          width = 16;
+        } else if (AMDGPU::VReg_512RegClass.contains(reg)) {
+          isSGPR = false;
+          width = 16;
+        } else {
+          llvm_unreachable("Unknown register class");
+        }
+        unsigned hwReg = RI->getEncodingValue(reg) & 0xff;
+        unsigned maxUsed = hwReg + width - 1;
+        if (isSGPR) {
+          MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
+        } else {
+          MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
+        }
+      }
+    }
+  }
+
+  if (VCCUsed)
+    MaxSGPR += 2;
+
+  if (FlatUsed)
+    MaxSGPR += 2;
+
+  // We found the maximum register index. They start at 0, so add one to get the
+  // number of registers.
+  ProgInfo.NumVGPR = MaxVGPR + 1;
+  ProgInfo.NumSGPR = MaxSGPR + 1;
+
+  if (STM.hasSGPRInitBug()) {
+    if (ProgInfo.NumSGPR > AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG) {
+      LLVMContext &Ctx = MF.getFunction()->getContext();
+      Ctx.emitError("too many SGPRs used with the SGPR init bug");
+    }
+
+    ProgInfo.NumSGPR = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
+  }
+
+  ProgInfo.VGPRBlocks = (ProgInfo.NumVGPR - 1) / 4;
+  ProgInfo.SGPRBlocks = (ProgInfo.NumSGPR - 1) / 8;
+  // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
+  // register.
+  ProgInfo.FloatMode = getFPMode(MF);
+
+  // XXX: Not quite sure what this does, but sc seems to unset this.
+  ProgInfo.IEEEMode = 0;
+
+  // Do not clamp NAN to 0.
+  ProgInfo.DX10Clamp = 0;
+
+  const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
+  ProgInfo.ScratchSize = FrameInfo->estimateStackSize(MF);
+
+  ProgInfo.FlatUsed = FlatUsed;
+  ProgInfo.VCCUsed = VCCUsed;
+  ProgInfo.CodeLen = CodeSize;
+
+  unsigned LDSAlignShift;
+  if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
+    // LDS is allocated in 64 dword blocks.
+    LDSAlignShift = 8;
+  } else {
+    // LDS is allocated in 128 dword blocks.
+    LDSAlignShift = 9;
+  }
+
+  unsigned LDSSpillSize = MFI->LDSWaveSpillSize *
+                          MFI->getMaximumWorkGroupSize(MF);
+
+  ProgInfo.LDSSize = MFI->LDSSize + LDSSpillSize;
+  ProgInfo.LDSBlocks =
+     RoundUpToAlignment(ProgInfo.LDSSize, 1 << LDSAlignShift) >> LDSAlignShift;
+
+  // Scratch is allocated in 256 dword blocks.
+  unsigned ScratchAlignShift = 10;
+  // We need to program the hardware with the amount of scratch memory that
+  // is used by the entire wave.  ProgInfo.ScratchSize is the amount of
+  // scratch memory used per thread.
+  ProgInfo.ScratchBlocks =
+    RoundUpToAlignment(ProgInfo.ScratchSize * STM.getWavefrontSize(),
+                       1 << ScratchAlignShift) >> ScratchAlignShift;
+
+  ProgInfo.ComputePGMRSrc1 =
+      S_00B848_VGPRS(ProgInfo.VGPRBlocks) |
+      S_00B848_SGPRS(ProgInfo.SGPRBlocks) |
+      S_00B848_PRIORITY(ProgInfo.Priority) |
+      S_00B848_FLOAT_MODE(ProgInfo.FloatMode) |
+      S_00B848_PRIV(ProgInfo.Priv) |
+      S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp) |
+      S_00B848_IEEE_MODE(ProgInfo.DebugMode) |
+      S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
+
+  ProgInfo.ComputePGMRSrc2 =
+      S_00B84C_SCRATCH_EN(ProgInfo.ScratchBlocks > 0) |
+      S_00B84C_USER_SGPR(MFI->NumUserSGPRs) |
+      S_00B84C_TGID_X_EN(1) |
+      S_00B84C_TGID_Y_EN(1) |
+      S_00B84C_TGID_Z_EN(1) |
+      S_00B84C_TG_SIZE_EN(1) |
+      S_00B84C_TIDIG_COMP_CNT(2) |
+      S_00B84C_LDS_SIZE(ProgInfo.LDSBlocks);
+}
+
+static unsigned getRsrcReg(unsigned ShaderType) {
+  switch (ShaderType) {
+  default: // Fall through
+  case ShaderType::COMPUTE:  return R_00B848_COMPUTE_PGM_RSRC1;
+  case ShaderType::GEOMETRY: return R_00B228_SPI_SHADER_PGM_RSRC1_GS;
+  case ShaderType::PIXEL:    return R_00B028_SPI_SHADER_PGM_RSRC1_PS;
+  case ShaderType::VERTEX:   return R_00B128_SPI_SHADER_PGM_RSRC1_VS;
+  }
+}
+
+void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
+                                         const SIProgramInfo &KernelInfo) {
+  const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
+  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  unsigned RsrcReg = getRsrcReg(MFI->getShaderType());
+
+  if (MFI->getShaderType() == ShaderType::COMPUTE) {
+    OutStreamer->EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4);
+
+    OutStreamer->EmitIntValue(KernelInfo.ComputePGMRSrc1, 4);
+
+    OutStreamer->EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
+    OutStreamer->EmitIntValue(KernelInfo.ComputePGMRSrc2, 4);
+
+    OutStreamer->EmitIntValue(R_00B860_COMPUTE_TMPRING_SIZE, 4);
+    OutStreamer->EmitIntValue(S_00B860_WAVESIZE(KernelInfo.ScratchBlocks), 4);
+
+    // TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =
+    // 0" comment but I don't see a corresponding field in the register spec.
+  } else {
+    OutStreamer->EmitIntValue(RsrcReg, 4);
+    OutStreamer->EmitIntValue(S_00B028_VGPRS(KernelInfo.VGPRBlocks) |
+                              S_00B028_SGPRS(KernelInfo.SGPRBlocks), 4);
+    if (STM.isVGPRSpillingEnabled(MFI)) {
+      OutStreamer->EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4);
+      OutStreamer->EmitIntValue(S_0286E8_WAVESIZE(KernelInfo.ScratchBlocks), 4);
+    }
+  }
+
+  if (MFI->getShaderType() == ShaderType::PIXEL) {
+    OutStreamer->EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
+    OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(KernelInfo.LDSBlocks), 4);
+    OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
+    OutStreamer->EmitIntValue(MFI->PSInputAddr, 4);
+  }
+}
+
+void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
+                                        const SIProgramInfo &KernelInfo) const {
+  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
+  amd_kernel_code_t header;
+
+  memset(&header, 0, sizeof(header));
+
+  header.amd_code_version_major = AMD_CODE_VERSION_MAJOR;
+  header.amd_code_version_minor = AMD_CODE_VERSION_MINOR;
+
+  header.struct_byte_size = sizeof(amd_kernel_code_t);
+
+  header.target_chip = STM.getAmdKernelCodeChipID();
+
+  header.kernel_code_entry_byte_offset = (1ULL << MF.getAlignment());
+
+  header.compute_pgm_resource_registers =
+      KernelInfo.ComputePGMRSrc1 |
+      (KernelInfo.ComputePGMRSrc2 << 32);
+
+  // Code Properties:
+  header.code_properties = AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR |
+                           AMD_CODE_PROPERTY_IS_PTR64;
+
+  if (KernelInfo.FlatUsed)
+    header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
+
+  if (KernelInfo.ScratchBlocks)
+    header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE;
+
+  header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
+  header.workgroup_group_segment_byte_size = KernelInfo.LDSSize;
+
+  // MFI->ABIArgOffset is the number of bytes for the kernel arguments
+  // plus 36.  36 is the number of bytes reserved at the begining of the
+  // input buffer to store work-group size information.
+  // FIXME: We should be adding the size of the implicit arguments
+  // to this value.
+  header.kernarg_segment_byte_size = MFI->ABIArgOffset;
+
+  header.wavefront_sgpr_count = KernelInfo.NumSGPR;
+  header.workitem_vgpr_count = KernelInfo.NumVGPR;
+
+  // FIXME: What values do I put for these alignments
+  header.kernarg_segment_alignment = 0;
+  header.group_segment_alignment = 0;
+  header.private_segment_alignment = 0;
+
+  header.code_type = 1; // HSA_EXT_CODE_KERNEL
+
+  header.wavefront_size = STM.getWavefrontSize();
+
+  MCSectionELF *VersionSection =
+      OutContext.getELFSection(".hsa.version", ELF::SHT_PROGBITS, 0);
+  OutStreamer->SwitchSection(VersionSection);
+  OutStreamer->EmitBytes(Twine("HSA Code Unit:" +
+                         Twine(header.hsail_version_major) + "." +
+                         Twine(header.hsail_version_minor) + ":" +
+                         "AMD:" +
+                         Twine(header.amd_code_version_major) + "." +
+                         Twine(header.amd_code_version_minor) +  ":" +
+                         "GFX8.1:0").str());
+
+  OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
+
+  if (isVerbose()) {
+    OutStreamer->emitRawComment("amd_code_version_major = " +
+                                Twine(header.amd_code_version_major), false);
+    OutStreamer->emitRawComment("amd_code_version_minor = " +
+                                Twine(header.amd_code_version_minor), false);
+    OutStreamer->emitRawComment("struct_byte_size = " +
+                                Twine(header.struct_byte_size), false);
+    OutStreamer->emitRawComment("target_chip = " +
+                                Twine(header.target_chip), false);
+    OutStreamer->emitRawComment(" compute_pgm_rsrc1: " +
+                                Twine::utohexstr(KernelInfo.ComputePGMRSrc1),
+                                false);
+    OutStreamer->emitRawComment(" compute_pgm_rsrc2: " +
+                                Twine::utohexstr(KernelInfo.ComputePGMRSrc2),
+                                false);
+    OutStreamer->emitRawComment("enable_sgpr_private_segment_buffer = " +
+      Twine((bool)(header.code_properties &
+                   AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE)), false);
+    OutStreamer->emitRawComment("enable_sgpr_kernarg_segment_ptr = " +
+      Twine((bool)(header.code_properties &
+                   AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR)), false);
+    OutStreamer->emitRawComment("private_element_size = 2 ", false);
+    OutStreamer->emitRawComment("is_ptr64 = " +
+        Twine((bool)(header.code_properties & AMD_CODE_PROPERTY_IS_PTR64)), false);
+    OutStreamer->emitRawComment("workitem_private_segment_byte_size = " +
+                                Twine(header.workitem_private_segment_byte_size),
+                                false);
+    OutStreamer->emitRawComment("workgroup_group_segment_byte_size = " +
+                                Twine(header.workgroup_group_segment_byte_size),
+                                false);
+    OutStreamer->emitRawComment("gds_segment_byte_size = " +
+                                Twine(header.gds_segment_byte_size), false);
+    OutStreamer->emitRawComment("kernarg_segment_byte_size = " +
+                                Twine(header.kernarg_segment_byte_size), false);
+    OutStreamer->emitRawComment("wavefront_sgpr_count = " +
+                                Twine(header.wavefront_sgpr_count), false);
+    OutStreamer->emitRawComment("workitem_vgpr_count = " +
+                                Twine(header.workitem_vgpr_count), false);
+    OutStreamer->emitRawComment("code_type = " + Twine(header.code_type), false);
+    OutStreamer->emitRawComment("wavefront_size = " +
+                                Twine((int)header.wavefront_size), false);
+    OutStreamer->emitRawComment("optimization_level = " +
+                                Twine(header.optimization_level), false);
+    OutStreamer->emitRawComment("hsail_profile = " +
+                                Twine(header.hsail_profile), false);
+    OutStreamer->emitRawComment("hsail_machine_model = " +
+                                Twine(header.hsail_machine_model), false);
+    OutStreamer->emitRawComment("hsail_version_major = " +
+                                Twine(header.hsail_version_major), false);
+    OutStreamer->emitRawComment("hsail_version_minor = " +
+                                Twine(header.hsail_version_minor), false);
+  }
+
+  OutStreamer->EmitBytes(StringRef((char*)&header, sizeof(header)));
+}
+
+bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                       unsigned AsmVariant,
+                                       const char *ExtraCode, raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0]) {
+    if (ExtraCode[1] != 0)
+      return true; // Unknown modifier.
+
+    switch (ExtraCode[0]) {
+    default:
+      // See if this is a generic print operand
+      return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
+    case 'r':
+      break;
+    }
+  }
+
+  AMDGPUInstPrinter::printRegOperand(MI->getOperand(OpNo).getReg(), O,
+                   *TM.getSubtargetImpl(*MF->getFunction())->getRegisterInfo());
+  return false;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
new file mode 100644
index 0000000..9207251
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -0,0 +1,113 @@
+//===-- AMDGPUAsmPrinter.h - Print AMDGPU assembly code ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief AMDGPU Assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_R600_AMDGPUASMPRINTER_H
+#define LLVM_LIB_TARGET_R600_AMDGPUASMPRINTER_H
+
+#include "llvm/CodeGen/AsmPrinter.h"
+#include <vector>
+
+namespace llvm {
+
+class AMDGPUAsmPrinter : public AsmPrinter {
+private:
+  struct SIProgramInfo {
+    SIProgramInfo() :
+      VGPRBlocks(0),
+      SGPRBlocks(0),
+      Priority(0),
+      FloatMode(0),
+      Priv(0),
+      DX10Clamp(0),
+      DebugMode(0),
+      IEEEMode(0),
+      ScratchSize(0),
+      ComputePGMRSrc1(0),
+      LDSBlocks(0),
+      ScratchBlocks(0),
+      ComputePGMRSrc2(0),
+      NumVGPR(0),
+      NumSGPR(0),
+      FlatUsed(false),
+      VCCUsed(false),
+      CodeLen(0) {}
+
+    // Fields set in PGM_RSRC1 pm4 packet.
+    uint32_t VGPRBlocks;
+    uint32_t SGPRBlocks;
+    uint32_t Priority;
+    uint32_t FloatMode;
+    uint32_t Priv;
+    uint32_t DX10Clamp;
+    uint32_t DebugMode;
+    uint32_t IEEEMode;
+    uint32_t ScratchSize;
+
+    uint64_t ComputePGMRSrc1;
+
+    // Fields set in PGM_RSRC2 pm4 packet.
+    uint32_t LDSBlocks;
+    uint32_t ScratchBlocks;
+
+    uint64_t ComputePGMRSrc2;
+
+    uint32_t NumVGPR;
+    uint32_t NumSGPR;
+    uint32_t LDSSize;
+    bool FlatUsed;
+
+    // Bonus information for debugging.
+    bool VCCUsed;
+    uint64_t CodeLen;
+  };
+
+  void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF) const;
+  void findNumUsedRegistersSI(const MachineFunction &MF,
+                              unsigned &NumSGPR,
+                              unsigned &NumVGPR) const;
+
+  /// \brief Emit register usage information so that the GPU driver
+  /// can correctly setup the GPU state.
+  void EmitProgramInfoR600(const MachineFunction &MF);
+  void EmitProgramInfoSI(const MachineFunction &MF, const SIProgramInfo &KernelInfo);
+  void EmitAmdKernelCodeT(const MachineFunction &MF,
+                          const SIProgramInfo &KernelInfo) const;
+
+public:
+  explicit AMDGPUAsmPrinter(TargetMachine &TM,
+                            std::unique_ptr<MCStreamer> Streamer);
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  const char *getPassName() const override {
+    return "AMDGPU Assembly Printer";
+  }
+
+  /// Implemented in AMDGPUMCInstLower.cpp
+  void EmitInstruction(const MachineInstr *MI) override;
+
+  void EmitEndOfAsmFile(Module &M) override;
+
+  bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                       unsigned AsmVariant, const char *ExtraCode,
+                       raw_ostream &O) override;
+
+protected:
+  std::vector<std::string> DisasmLines, HexLines;
+  size_t DisasmLineMaxLen;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
new file mode 100644
index 0000000..6ffa7a0
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
@@ -0,0 +1,82 @@
+//===---- AMDCallingConv.td - Calling Conventions for Radeon GPUs ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the AMD Radeon GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+// Inversion of CCIfInReg
+class CCIfNotInReg<CCAction A> : CCIf<"!ArgFlags.isInReg()", A> {}
+
+// Calling convention for SI
+def CC_SI : CallingConv<[
+
+  CCIfInReg<CCIfType<[f32, i32] , CCAssignToReg<[
+    SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
+    SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
+    SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21
+  ]>>>,
+
+  CCIfInReg<CCIfType<[i64] , CCAssignToRegWithShadow<
+    [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14 ],
+    [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15 ]
+  >>>,
+
+  CCIfNotInReg<CCIfType<[f32, i32] , CCAssignToReg<[
+    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
+    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
+    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
+    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31
+  ]>>>,
+
+  CCIfByVal<CCIfType<[i64] , CCAssignToRegWithShadow<
+    [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14 ],
+    [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15 ]
+  >>>
+
+]>;
+
+// Calling convention for R600
+def CC_R600 : CallingConv<[
+  CCIfInReg<CCIfType<[v4f32, v4i32] , CCAssignToReg<[
+    T0_XYZW, T1_XYZW, T2_XYZW, T3_XYZW, T4_XYZW, T5_XYZW, T6_XYZW, T7_XYZW,
+    T8_XYZW, T9_XYZW, T10_XYZW, T11_XYZW, T12_XYZW, T13_XYZW, T14_XYZW, T15_XYZW,
+    T16_XYZW, T17_XYZW, T18_XYZW, T19_XYZW, T20_XYZW, T21_XYZW, T22_XYZW,
+    T23_XYZW, T24_XYZW, T25_XYZW, T26_XYZW, T27_XYZW, T28_XYZW, T29_XYZW,
+    T30_XYZW, T31_XYZW, T32_XYZW
+  ]>>>
+]>;
+
+// Calling convention for compute kernels
+def CC_AMDGPU_Kernel : CallingConv<[
+  CCCustom<"allocateStack">
+]>;
+
+def CC_AMDGPU : CallingConv<[
+  CCIf<"static_cast<const AMDGPUSubtarget&>"
+        "(State.getMachineFunction().getSubtarget()).getGeneration() >="
+          "AMDGPUSubtarget::SOUTHERN_ISLANDS && "
+        "State.getMachineFunction().getInfo<SIMachineFunctionInfo>()"
+         "->getShaderType() == ShaderType::COMPUTE",
+       CCDelegateTo<CC_AMDGPU_Kernel>>,
+  CCIf<"static_cast<const AMDGPUSubtarget&>"
+        "(State.getMachineFunction().getSubtarget()).getGeneration() < "
+          "AMDGPUSubtarget::SOUTHERN_ISLANDS && "
+         "State.getMachineFunction().getInfo<R600MachineFunctionInfo>()"
+          "->getShaderType() == ShaderType::COMPUTE",
+        CCDelegateTo<CC_AMDGPU_Kernel>>,
+   CCIf<"static_cast<const AMDGPUSubtarget&>"
+         "(State.getMachineFunction().getSubtarget()).getGeneration() >= "
+           "AMDGPUSubtarget::SOUTHERN_ISLANDS",
+        CCDelegateTo<CC_SI>>,
+   CCIf<"static_cast<const AMDGPUSubtarget&>"
+          "(State.getMachineFunction().getSubtarget()).getGeneration() < "
+            "AMDGPUSubtarget::SOUTHERN_ISLANDS",
+        CCDelegateTo<CC_R600>>
+]>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp
new file mode 100644
index 0000000..8175786
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp
@@ -0,0 +1,112 @@
+//===----------------------- AMDGPUFrameLowering.cpp ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Interface to describe a layout of a stack frame on a AMDIL target machine
+//
+//===----------------------------------------------------------------------===//
+#include "AMDGPUFrameLowering.h"
+#include "AMDGPURegisterInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Instructions.h"
+
+using namespace llvm;
+AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl,
+    int LAO, unsigned TransAl)
+  : TargetFrameLowering(D, StackAl, LAO, TransAl) { }
+
+AMDGPUFrameLowering::~AMDGPUFrameLowering() { }
+
+unsigned AMDGPUFrameLowering::getStackWidth(const MachineFunction &MF) const {
+
+  // XXX: Hardcoding to 1 for now.
+  //
+  // I think the StackWidth should stored as metadata associated with the
+  // MachineFunction.  This metadata can either be added by a frontend, or
+  // calculated by a R600 specific LLVM IR pass.
+  //
+  // The StackWidth determines how stack objects are laid out in memory.
+  // For a vector stack variable, like: int4 stack[2], the data will be stored
+  // in the following ways depending on the StackWidth.
+  //
+  // StackWidth = 1:
+  //
+  // T0.X = stack[0].x
+  // T1.X = stack[0].y
+  // T2.X = stack[0].z
+  // T3.X = stack[0].w
+  // T4.X = stack[1].x
+  // T5.X = stack[1].y
+  // T6.X = stack[1].z
+  // T7.X = stack[1].w
+  //
+  // StackWidth = 2:
+  //
+  // T0.X = stack[0].x
+  // T0.Y = stack[0].y
+  // T1.X = stack[0].z
+  // T1.Y = stack[0].w
+  // T2.X = stack[1].x
+  // T2.Y = stack[1].y
+  // T3.X = stack[1].z
+  // T3.Y = stack[1].w
+  // 
+  // StackWidth = 4:
+  // T0.X = stack[0].x
+  // T0.Y = stack[0].y
+  // T0.Z = stack[0].z
+  // T0.W = stack[0].w
+  // T1.X = stack[1].x
+  // T1.Y = stack[1].y
+  // T1.Z = stack[1].z
+  // T1.W = stack[1].w
+  return 1;
+}
+
+/// \returns The number of registers allocated for \p FI.
+int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+                                         int FI) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  // Start the offset at 2 so we don't overwrite work group information.
+  // XXX: We should only do this when the shader actually uses this
+  // information.
+  unsigned OffsetBytes = 2 * (getStackWidth(MF) * 4);
+  int UpperBound = FI == -1 ? MFI->getNumObjects() : FI;
+
+  for (int i = MFI->getObjectIndexBegin(); i < UpperBound; ++i) {
+    OffsetBytes = RoundUpToAlignment(OffsetBytes, MFI->getObjectAlignment(i));
+    OffsetBytes += MFI->getObjectSize(i);
+    // Each register holds 4 bytes, so we must always align the offset to at
+    // least 4 bytes, so that 2 frame objects won't share the same register.
+    OffsetBytes = RoundUpToAlignment(OffsetBytes, 4);
+  }
+
+  if (FI != -1)
+    OffsetBytes = RoundUpToAlignment(OffsetBytes, MFI->getObjectAlignment(FI));
+
+  return OffsetBytes / (getStackWidth(MF) * 4);
+}
+
+const TargetFrameLowering::SpillSlot *
+AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
+  NumEntries = 0;
+  return nullptr;
+}
+void AMDGPUFrameLowering::emitPrologue(MachineFunction &MF,
+                                       MachineBasicBlock &MBB) const {}
+void
+AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF,
+                                  MachineBasicBlock &MBB) const {
+}
+
+bool
+AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const {
+  return false;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.h
new file mode 100644
index 0000000..9f31be1
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.h
@@ -0,0 +1,45 @@
+//===--------------------- AMDGPUFrameLowering.h ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface to describe a layout of a stack frame on a AMDIL target
+/// machine.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_R600_AMDGPUFRAMELOWERING_H
+#define LLVM_LIB_TARGET_R600_AMDGPUFRAMELOWERING_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+
+/// \brief Information about the stack frame layout on the AMDGPU targets.
+///
+/// It holds the direction of the stack growth, the known stack alignment on
+/// entry to each function, and the offset to the locals area.
+/// See TargetFrameInfo for more comments.
+class AMDGPUFrameLowering : public TargetFrameLowering {
+public:
+  AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO,
+                      unsigned TransAl = 1);
+  virtual ~AMDGPUFrameLowering();
+
+  /// \returns The number of 32-bit sub-registers that are used when storing
+  /// values to the stack.
+  unsigned getStackWidth(const MachineFunction &MF) const;
+  int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
+  const SpillSlot *
+    getCalleeSavedSpillSlots(unsigned &NumEntries) const override;
+  void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+  bool hasFP(const MachineFunction &MF) const override;
+};
+} // namespace llvm
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
new file mode 100644
index 0000000..df4461e
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -0,0 +1,1371 @@
+//===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Defines an instruction selector for the AMDGPU target.
+//
+//===----------------------------------------------------------------------===//
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPUISelLowering.h" // For AMDGPUISD
+#include "AMDGPURegisterInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "R600InstrInfo.h"
+#include "SIDefines.h"
+#include "SIISelLowering.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/Function.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// AMDGPU specific code to select AMDGPU machine instructions for
+/// SelectionDAG operations.
+class AMDGPUDAGToDAGISel : public SelectionDAGISel {
+  // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
+  // make the right decision when generating code for different targets.
+  const AMDGPUSubtarget *Subtarget;
+public:
+  AMDGPUDAGToDAGISel(TargetMachine &TM);
+  virtual ~AMDGPUDAGToDAGISel();
+  bool runOnMachineFunction(MachineFunction &MF) override;
+  SDNode *Select(SDNode *N) override;
+  const char *getPassName() const override;
+  void PostprocessISelDAG() override;
+
+private:
+  bool isInlineImmediate(SDNode *N) const;
+  bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
+                   const R600InstrInfo *TII);
+  bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
+  bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
+
+  // Complex pattern selectors
+  bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
+  bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
+  bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
+
+  static bool checkType(const Value *ptr, unsigned int addrspace);
+  static bool checkPrivateAddress(const MachineMemOperand *Op);
+
+  static bool isGlobalStore(const StoreSDNode *N);
+  static bool isFlatStore(const StoreSDNode *N);
+  static bool isPrivateStore(const StoreSDNode *N);
+  static bool isLocalStore(const StoreSDNode *N);
+  static bool isRegionStore(const StoreSDNode *N);
+
+  bool isCPLoad(const LoadSDNode *N) const;
+  bool isConstantLoad(const LoadSDNode *N, int cbID) const;
+  bool isGlobalLoad(const LoadSDNode *N) const;
+  bool isFlatLoad(const LoadSDNode *N) const;
+  bool isParamLoad(const LoadSDNode *N) const;
+  bool isPrivateLoad(const LoadSDNode *N) const;
+  bool isLocalLoad(const LoadSDNode *N) const;
+  bool isRegionLoad(const LoadSDNode *N) const;
+
+  SDNode *glueCopyToM0(SDNode *N) const;
+
+  const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
+  bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
+  bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
+                                       SDValue& Offset);
+  bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
+  bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
+  bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
+                       unsigned OffsetBits) const;
+  bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
+  bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
+                                 SDValue &Offset1) const;
+  void SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
+                   SDValue &SOffset, SDValue &Offset, SDValue &Offen,
+                   SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
+                   SDValue &TFE) const;
+  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
+                         SDValue &SOffset, SDValue &Offset, SDValue &GLC,
+                         SDValue &SLC, SDValue &TFE) const;
+  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
+                         SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
+                         SDValue &SLC) const;
+  bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
+                          SDValue &SOffset, SDValue &ImmOffset) const;
+  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
+                         SDValue &Offset, SDValue &GLC, SDValue &SLC,
+                         SDValue &TFE) const;
+  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
+                         SDValue &Offset, SDValue &GLC) const;
+  SDNode *SelectAddrSpaceCast(SDNode *N);
+  bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
+                       SDValue &Clamp, SDValue &Omod) const;
+
+  bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods,
+                            SDValue &Omod) const;
+  bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
+                                 SDValue &Clamp,
+                                 SDValue &Omod) const;
+
+  SDNode *SelectADD_SUB_I64(SDNode *N);
+  SDNode *SelectDIV_SCALE(SDNode *N);
+
+  SDNode *getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
+                   uint32_t Offset, uint32_t Width);
+  SDNode *SelectS_BFEFromShifts(SDNode *N);
+  SDNode *SelectS_BFE(SDNode *N);
+
+  // Include the pieces autogenerated from the target description.
+#include "AMDGPUGenDAGISel.inc"
+};
+}  // end anonymous namespace
+
+/// \brief This pass converts a legalized DAG into a AMDGPU-specific
+// DAG, ready for instruction scheduling.
+FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) {
+  return new AMDGPUDAGToDAGISel(TM);
+}
+
+AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM)
+    : SelectionDAGISel(TM) {}
+
+bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+  Subtarget = &static_cast<const AMDGPUSubtarget &>(MF.getSubtarget());
+  return SelectionDAGISel::runOnMachineFunction(MF);
+}
+
+AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
+}
+
+bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const {
+  const SITargetLowering *TL
+      = static_cast<const SITargetLowering *>(getTargetLowering());
+  return TL->analyzeImmediate(N) == 0;
+}
+
+/// \brief Determine the register class for \p OpNo
+/// \returns The register class of the virtual register that will be used for
+/// the given operand number \OpNo or NULL if the register class cannot be
+/// determined.
+const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
+                                                          unsigned OpNo) const {
+  if (!N->isMachineOpcode())
+    return nullptr;
+
+  switch (N->getMachineOpcode()) {
+  default: {
+    const MCInstrDesc &Desc =
+        Subtarget->getInstrInfo()->get(N->getMachineOpcode());
+    unsigned OpIdx = Desc.getNumDefs() + OpNo;
+    if (OpIdx >= Desc.getNumOperands())
+      return nullptr;
+    int RegClass = Desc.OpInfo[OpIdx].RegClass;
+    if (RegClass == -1)
+      return nullptr;
+
+    return Subtarget->getRegisterInfo()->getRegClass(RegClass);
+  }
+  case AMDGPU::REG_SEQUENCE: {
+    unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+    const TargetRegisterClass *SuperRC =
+        Subtarget->getRegisterInfo()->getRegClass(RCID);
+
+    SDValue SubRegOp = N->getOperand(OpNo + 1);
+    unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
+    return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
+                                                              SubRegIdx);
+  }
+  }
+}
+
+bool AMDGPUDAGToDAGISel::SelectADDRParam(
+  SDValue Addr, SDValue& R1, SDValue& R2) {
+
+  if (Addr.getOpcode() == ISD::FrameIndex) {
+    if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+      R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+      R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
+    } else {
+      R1 = Addr;
+      R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
+    }
+  } else if (Addr.getOpcode() == ISD::ADD) {
+    R1 = Addr.getOperand(0);
+    R2 = Addr.getOperand(1);
+  } else {
+    R1 = Addr;
+    R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
+  }
+  return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress) {
+    return false;
+  }
+  return SelectADDRParam(Addr, R1, R2);
+}
+
+
+bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress) {
+    return false;
+  }
+
+  if (Addr.getOpcode() == ISD::FrameIndex) {
+    if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+      R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
+      R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
+    } else {
+      R1 = Addr;
+      R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
+    }
+  } else if (Addr.getOpcode() == ISD::ADD) {
+    R1 = Addr.getOperand(0);
+    R2 = Addr.getOperand(1);
+  } else {
+    R1 = Addr;
+    R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
+  }
+  return true;
+}
+
+SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
+  if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
+      !checkType(cast<MemSDNode>(N)->getMemOperand()->getValue(),
+                 AMDGPUAS::LOCAL_ADDRESS))
+    return N;
+
+  const SITargetLowering& Lowering =
+      *static_cast<const SITargetLowering*>(getTargetLowering());
+
+  // Write max value to m0 before each load operation
+
+  SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
+                                 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
+
+  SDValue Glue = M0.getValue(1);
+
+  SmallVector <SDValue, 8> Ops;
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+     Ops.push_back(N->getOperand(i));
+  }
+  Ops.push_back(Glue);
+  CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
+
+  return N;
+}
+
+SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
+  unsigned int Opc = N->getOpcode();
+  if (N->isMachineOpcode()) {
+    N->setNodeId(-1);
+    return nullptr;   // Already selected.
+  }
+
+  if (isa<AtomicSDNode>(N))
+    N = glueCopyToM0(N);
+
+  switch (Opc) {
+  default: break;
+  // We are selecting i64 ADD here instead of custom lower it during
+  // DAG legalization, so we can fold some i64 ADDs used for address
+  // calculation into the LOAD and STORE instructions.
+  case ISD::ADD:
+  case ISD::SUB: {
+    if (N->getValueType(0) != MVT::i64 ||
+        Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
+      break;
+
+    return SelectADD_SUB_I64(N);
+  }
+  case ISD::SCALAR_TO_VECTOR:
+  case AMDGPUISD::BUILD_VERTICAL_VECTOR:
+  case ISD::BUILD_VECTOR: {
+    unsigned RegClassID;
+    const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo();
+    EVT VT = N->getValueType(0);
+    unsigned NumVectorElts = VT.getVectorNumElements();
+    EVT EltVT = VT.getVectorElementType();
+    assert(EltVT.bitsEq(MVT::i32));
+    if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+      bool UseVReg = true;
+      for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
+                                                    U != E; ++U) {
+        if (!U->isMachineOpcode()) {
+          continue;
+        }
+        const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
+        if (!RC) {
+          continue;
+        }
+        if (static_cast<const SIRegisterInfo *>(TRI)->isSGPRClass(RC)) {
+          UseVReg = false;
+        }
+      }
+      switch(NumVectorElts) {
+      case 1: RegClassID = UseVReg ? AMDGPU::VGPR_32RegClassID :
+                                     AMDGPU::SReg_32RegClassID;
+        break;
+      case 2: RegClassID = UseVReg ? AMDGPU::VReg_64RegClassID :
+                                     AMDGPU::SReg_64RegClassID;
+        break;
+      case 4: RegClassID = UseVReg ? AMDGPU::VReg_128RegClassID :
+                                     AMDGPU::SReg_128RegClassID;
+        break;
+      case 8: RegClassID = UseVReg ? AMDGPU::VReg_256RegClassID :
+                                     AMDGPU::SReg_256RegClassID;
+        break;
+      case 16: RegClassID = UseVReg ? AMDGPU::VReg_512RegClassID :
+                                      AMDGPU::SReg_512RegClassID;
+        break;
+      default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
+      }
+    } else {
+      // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
+      // that adds a 128 bits reg copy when going through TwoAddressInstructions
+      // pass. We want to avoid 128 bits copies as much as possible because they
+      // can't be bundled by our scheduler.
+      switch(NumVectorElts) {
+      case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
+      case 4:
+        if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
+          RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
+        else
+          RegClassID = AMDGPU::R600_Reg128RegClassID;
+        break;
+      default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
+      }
+    }
+
+    SDLoc DL(N);
+    SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
+
+    if (NumVectorElts == 1) {
+      return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT,
+                                  N->getOperand(0), RegClass);
+    }
+
+    assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
+                                  "supported yet");
+    // 16 = Max Num Vector Elements
+    // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
+    // 1 = Vector Register Class
+    SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
+
+    RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
+    bool IsRegSeq = true;
+    unsigned NOps = N->getNumOperands();
+    for (unsigned i = 0; i < NOps; i++) {
+      // XXX: Why is this here?
+      if (isa<RegisterSDNode>(N->getOperand(i))) {
+        IsRegSeq = false;
+        break;
+      }
+      RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
+      RegSeqArgs[1 + (2 * i) + 1] =
+              CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL,
+                                        MVT::i32);
+    }
+
+    if (NOps != NumVectorElts) {
+      // Fill in the missing undef elements if this was a scalar_to_vector.
+      assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
+
+      MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+                                                     DL, EltVT);
+      for (unsigned i = NOps; i < NumVectorElts; ++i) {
+        RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
+        RegSeqArgs[1 + (2 * i) + 1] =
+          CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32);
+      }
+    }
+
+    if (!IsRegSeq)
+      break;
+    return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
+                                RegSeqArgs);
+  }
+  case ISD::BUILD_PAIR: {
+    SDValue RC, SubReg0, SubReg1;
+    if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
+      break;
+    }
+    SDLoc DL(N);
+    if (N->getValueType(0) == MVT::i128) {
+      RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
+      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
+      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
+    } else if (N->getValueType(0) == MVT::i64) {
+      RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
+      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
+      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
+    } else {
+      llvm_unreachable("Unhandled value type for BUILD_PAIR");
+    }
+    const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
+                            N->getOperand(1), SubReg1 };
+    return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
+                                  DL, N->getValueType(0), Ops);
+  }
+
+  case ISD::Constant:
+  case ISD::ConstantFP: {
+    if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
+        N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
+      break;
+
+    uint64_t Imm;
+    if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
+      Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
+    else {
+      ConstantSDNode *C = cast<ConstantSDNode>(N);
+      Imm = C->getZExtValue();
+    }
+
+    SDLoc DL(N);
+    SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
+                                CurDAG->getConstant(Imm & 0xFFFFFFFF, DL,
+                                                    MVT::i32));
+    SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
+                                CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
+    const SDValue Ops[] = {
+      CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
+      SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
+      SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
+    };
+
+    return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
+                                  N->getValueType(0), Ops);
+  }
+
+  case ISD::LOAD: {
+    LoadSDNode *LD = cast<LoadSDNode>(N);
+    SDLoc SL(N);
+    EVT VT = N->getValueType(0);
+
+    if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD) {
+      N = glueCopyToM0(N);
+      break;
+    }
+
+    // To simplify the TableGen patters, we replace all i64 loads with
+    // v2i32 loads.  Alternatively, we could promote i64 loads to v2i32
+    // during DAG legalization, however, so places (ExpandUnalignedLoad)
+    // in the DAG legalizer assume that if i64 is legal, so doing this
+    // promotion early can cause problems.
+
+    SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SDLoc(N), LD->getChain(),
+                                      LD->getBasePtr(), LD->getMemOperand());
+    SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL,
+                                      MVT::i64, NewLoad);
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLoad.getValue(1));
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), BitCast);
+    SDNode *Load = glueCopyToM0(NewLoad.getNode());
+    SelectCode(Load);
+    N = BitCast.getNode();
+    break;
+  }
+
+  case ISD::STORE: {
+    // Handle i64 stores here for the same reason mentioned above for loads.
+    StoreSDNode *ST = cast<StoreSDNode>(N);
+    SDValue Value = ST->getValue();
+    if (Value.getValueType() == MVT::i64 && !ST->isTruncatingStore()) {
+
+      SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(N),
+                                        MVT::v2i32, Value);
+      SDValue NewStore = CurDAG->getStore(ST->getChain(), SDLoc(N), NewValue,
+                                          ST->getBasePtr(), ST->getMemOperand());
+
+      CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewStore);
+
+      if (NewValue.getOpcode() == ISD::BITCAST) {
+        Select(NewStore.getNode());
+        return SelectCode(NewValue.getNode());
+      }
+
+      // getNode() may fold the bitcast if its input was another bitcast.  If that
+      // happens we should only select the new store.
+      N = NewStore.getNode();
+    }
+
+    N = glueCopyToM0(N);
+    break;
+  }
+
+  case AMDGPUISD::REGISTER_LOAD: {
+    if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
+      break;
+    SDValue Addr, Offset;
+
+    SDLoc DL(N);
+    SelectADDRIndirect(N->getOperand(1), Addr, Offset);
+    const SDValue Ops[] = {
+      Addr,
+      Offset,
+      CurDAG->getTargetConstant(0, DL, MVT::i32),
+      N->getOperand(0),
+    };
+    return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, DL,
+                                  CurDAG->getVTList(MVT::i32, MVT::i64,
+                                                    MVT::Other),
+                                  Ops);
+  }
+  case AMDGPUISD::REGISTER_STORE: {
+    if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
+      break;
+    SDValue Addr, Offset;
+    SelectADDRIndirect(N->getOperand(2), Addr, Offset);
+    SDLoc DL(N);
+    const SDValue Ops[] = {
+      N->getOperand(1),
+      Addr,
+      Offset,
+      CurDAG->getTargetConstant(0, DL, MVT::i32),
+      N->getOperand(0),
+    };
+    return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, DL,
+                                        CurDAG->getVTList(MVT::Other),
+                                        Ops);
+  }
+
+  case AMDGPUISD::BFE_I32:
+  case AMDGPUISD::BFE_U32: {
+    if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
+      break;
+
+    // There is a scalar version available, but unlike the vector version which
+    // has a separate operand for the offset and width, the scalar version packs
+    // the width and offset into a single operand. Try to move to the scalar
+    // version if the offsets are constant, so that we can try to keep extended
+    // loads of kernel arguments in SGPRs.
+
+    // TODO: Technically we could try to pattern match scalar bitshifts of
+    // dynamic values, but it's probably not useful.
+    ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+    if (!Offset)
+      break;
+
+    ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
+    if (!Width)
+      break;
+
+    bool Signed = Opc == AMDGPUISD::BFE_I32;
+
+    uint32_t OffsetVal = Offset->getZExtValue();
+    uint32_t WidthVal = Width->getZExtValue();
+
+    return getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, SDLoc(N),
+                    N->getOperand(0), OffsetVal, WidthVal);
+
+  }
+  case AMDGPUISD::DIV_SCALE: {
+    return SelectDIV_SCALE(N);
+  }
+  case ISD::CopyToReg: {
+    const SITargetLowering& Lowering =
+      *static_cast<const SITargetLowering*>(getTargetLowering());
+    Lowering.legalizeTargetIndependentNode(N, *CurDAG);
+    break;
+  }
+  case ISD::ADDRSPACECAST:
+    return SelectAddrSpaceCast(N);
+  case ISD::AND:
+  case ISD::SRL:
+  case ISD::SRA:
+    if (N->getValueType(0) != MVT::i32 ||
+        Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
+      break;
+
+    return SelectS_BFE(N);
+  }
+
+  return SelectCode(N);
+}
+
+
+bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) {
+  assert(AS != 0 && "Use checkPrivateAddress instead.");
+  if (!Ptr)
+    return false;
+
+  return Ptr->getType()->getPointerAddressSpace() == AS;
+}
+
+bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) {
+  if (Op->getPseudoValue())
+    return true;
+
+  if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType()))
+    return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
+
+  return false;
+}
+
+bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
+  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
+  const Value *MemVal = N->getMemOperand()->getValue();
+  return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
+          !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
+          !checkType(MemVal, AMDGPUAS::REGION_ADDRESS));
+}
+
+bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
+  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isFlatStore(const StoreSDNode *N) {
+  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
+  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const {
+  const Value *MemVal = N->getMemOperand()->getValue();
+  if (CbId == -1)
+    return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS);
+
+  return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId);
+}
+
+bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const {
+  if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
+    if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
+        N->getMemoryVT().bitsLT(MVT::i32))
+      return true;
+
+  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const {
+  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isLocalLoad(const  LoadSDNode *N) const {
+  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isFlatLoad(const  LoadSDNode *N) const {
+  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isRegionLoad(const  LoadSDNode *N) const {
+  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const {
+  MachineMemOperand *MMO = N->getMemOperand();
+  if (checkPrivateAddress(N->getMemOperand())) {
+    if (MMO) {
+      const PseudoSourceValue *PSV = MMO->getPseudoValue();
+      if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const {
+  if (checkPrivateAddress(N->getMemOperand())) {
+    // Check to make sure we are not a constant pool load or a constant load
+    // that is marked as a private load
+    if (isCPLoad(N) || isConstantLoad(N, -1)) {
+      return false;
+    }
+  }
+
+  const Value *MemVal = N->getMemOperand()->getValue();
+  if (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
+      !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
+      !checkType(MemVal, AMDGPUAS::FLAT_ADDRESS) &&
+      !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) &&
+      !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) &&
+      !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) &&
+      !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS)) {
+    return true;
+  }
+  return false;
+}
+
+const char *AMDGPUDAGToDAGISel::getPassName() const {
+  return "AMDGPU DAG->DAG Pattern Instruction Selection";
+}
+
+#ifdef DEBUGTMP
+#undef INT64_C
+#endif
+#undef DEBUGTMP
+
+//===----------------------------------------------------------------------===//
+// Complex Patterns
+//===----------------------------------------------------------------------===//
+
+bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
+                                                         SDValue& IntPtr) {
+  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
+    IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
+                                       true);
+    return true;
+  }
+  return false;
+}
+
+bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
+    SDValue& BaseReg, SDValue &Offset) {
+  if (!isa<ConstantSDNode>(Addr)) {
+    BaseReg = Addr;
+    Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
+    return true;
+  }
+  return false;
+}
+
+bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
+                                           SDValue &Offset) {
+  ConstantSDNode *IMMOffset;
+
+  if (Addr.getOpcode() == ISD::ADD
+      && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+      && isInt<16>(IMMOffset->getZExtValue())) {
+
+      Base = Addr.getOperand(0);
+      Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
+                                         MVT::i32);
+      return true;
+  // If the pointer address is constant, we can move it to the offset field.
+  } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
+             && isInt<16>(IMMOffset->getZExtValue())) {
+    Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+                                  SDLoc(CurDAG->getEntryNode()),
+                                  AMDGPU::ZERO, MVT::i32);
+    Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
+                                       MVT::i32);
+    return true;
+  }
+
+  // Default case, no offset
+  Base = Addr;
+  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
+  return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
+                                            SDValue &Offset) {
+  ConstantSDNode *C;
+  SDLoc DL(Addr);
+
+  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
+    Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
+    Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
+  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
+            (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
+    Base = Addr.getOperand(0);
+    Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
+  } else {
+    Base = Addr;
+    Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
+  }
+
+  return true;
+}
+
+SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
+  SDLoc DL(N);
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+
+  bool IsAdd = (N->getOpcode() == ISD::ADD);
+
+  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
+  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
+
+  SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+                                       DL, MVT::i32, LHS, Sub0);
+  SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+                                       DL, MVT::i32, LHS, Sub1);
+
+  SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+                                       DL, MVT::i32, RHS, Sub0);
+  SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+                                       DL, MVT::i32, RHS, Sub1);
+
+  SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
+  SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
+
+
+  unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
+  unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
+
+  SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs);
+  SDValue Carry(AddLo, 1);
+  SDNode *AddHi
+    = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32,
+                             SDValue(Hi0, 0), SDValue(Hi1, 0), Carry);
+
+  SDValue Args[5] = {
+    CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
+    SDValue(AddLo,0),
+    Sub0,
+    SDValue(AddHi,0),
+    Sub1,
+  };
+  return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
+}
+
+// We need to handle this here because tablegen doesn't support matching
+// instructions with multiple outputs.
+SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
+  SDLoc SL(N);
+  EVT VT = N->getValueType(0);
+
+  assert(VT == MVT::f32 || VT == MVT::f64);
+
+  unsigned Opc
+    = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
+
+  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
+  SDValue Ops[8];
+
+  SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
+  SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
+  SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
+  return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
+}
+
+bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
+                                         unsigned OffsetBits) const {
+  if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
+      (OffsetBits == 8 && !isUInt<8>(Offset)))
+    return false;
+
+  if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS)
+    return true;
+
+  // On Southern Islands instruction with a negative base value and an offset
+  // don't seem to work.
+  return CurDAG->SignBitIsZero(Base);
+}
+
+bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
+                                              SDValue &Offset) const {
+  if (CurDAG->isBaseWithConstantOffset(Addr)) {
+    SDValue N0 = Addr.getOperand(0);
+    SDValue N1 = Addr.getOperand(1);
+    ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
+    if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
+      // (add n0, c0)
+      Base = N0;
+      Offset = N1;
+      return true;
+    }
+  }
+
+  SDLoc DL(Addr);
+
+  // If we have a constant address, prefer to put the constant into the
+  // offset. This can save moves to load the constant address since multiple
+  // operations can share the zero base address register, and enables merging
+  // into read2 / write2 instructions.
+  if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
+    if (isUInt<16>(CAddr->getZExtValue())) {
+      SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
+      MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
+                                 DL, MVT::i32, Zero);
+      Base = SDValue(MovZero, 0);
+      Offset = Addr;
+      return true;
+    }
+  }
+
+  // default case
+  Base = Addr;
+  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
+  return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
+                                                   SDValue &Offset0,
+                                                   SDValue &Offset1) const {
+  SDLoc DL(Addr);
+
+  if (CurDAG->isBaseWithConstantOffset(Addr)) {
+    SDValue N0 = Addr.getOperand(0);
+    SDValue N1 = Addr.getOperand(1);
+    ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
+    unsigned DWordOffset0 = C1->getZExtValue() / 4;
+    unsigned DWordOffset1 = DWordOffset0 + 1;
+    // (add n0, c0)
+    if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
+      Base = N0;
+      Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
+      Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
+      return true;
+    }
+  }
+
+  if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
+    unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
+    unsigned DWordOffset1 = DWordOffset0 + 1;
+    assert(4 * DWordOffset0 == CAddr->getZExtValue());
+
+    if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
+      SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
+      MachineSDNode *MovZero
+        = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
+                                 DL, MVT::i32, Zero);
+      Base = SDValue(MovZero, 0);
+      Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
+      Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
+      return true;
+    }
+  }
+
+  // default case
+  Base = Addr;
+  Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
+  Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
+  return true;
+}
+
+static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
+  return isUInt<12>(Imm->getZExtValue());
+}
+
+void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
+                                     SDValue &VAddr, SDValue &SOffset,
+                                     SDValue &Offset, SDValue &Offen,
+                                     SDValue &Idxen, SDValue &Addr64,
+                                     SDValue &GLC, SDValue &SLC,
+                                     SDValue &TFE) const {
+  SDLoc DL(Addr);
+
+  GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
+  SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
+  TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
+
+  Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
+  Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
+  Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
+  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
+
+  if (CurDAG->isBaseWithConstantOffset(Addr)) {
+    SDValue N0 = Addr.getOperand(0);
+    SDValue N1 = Addr.getOperand(1);
+    ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
+
+    if (N0.getOpcode() == ISD::ADD) {
+      // (add (add N2, N3), C1) -> addr64
+      SDValue N2 = N0.getOperand(0);
+      SDValue N3 = N0.getOperand(1);
+      Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
+      Ptr = N2;
+      VAddr = N3;
+    } else {
+
+      // (add N0, C1) -> offset
+      VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
+      Ptr = N0;
+    }
+
+    if (isLegalMUBUFImmOffset(C1)) {
+        Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
+        return;
+    } else if (isUInt<32>(C1->getZExtValue())) {
+      // Illegal offset, store it in soffset.
+      Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
+      SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
+                   CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
+                        0);
+      return;
+    }
+  }
+
+  if (Addr.getOpcode() == ISD::ADD) {
+    // (add N0, N1) -> addr64
+    SDValue N0 = Addr.getOperand(0);
+    SDValue N1 = Addr.getOperand(1);
+    Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
+    Ptr = N0;
+    VAddr = N1;
+    Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
+    return;
+  }
+
+  // default case -> offset
+  VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
+  Ptr = Addr;
+  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
+
+}
+
+bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
+                                           SDValue &VAddr, SDValue &SOffset,
+                                           SDValue &Offset, SDValue &GLC,
+                                           SDValue &SLC, SDValue &TFE) const {
+  SDValue Ptr, Offen, Idxen, Addr64;
+
+  SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
+              GLC, SLC, TFE);
+
+  ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
+  if (C->getSExtValue()) {
+    SDLoc DL(Addr);
+
+    const SITargetLowering& Lowering =
+      *static_cast<const SITargetLowering*>(getTargetLowering());
+
+    SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
+    return true;
+  }
+
+  return false;
+}
+
+bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
+                                           SDValue &VAddr, SDValue &SOffset,
+					   SDValue &Offset,
+					   SDValue &SLC) const {
+  SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
+  SDValue GLC, TFE;
+
+  return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
+}
+
+bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
+                                            SDValue &VAddr, SDValue &SOffset,
+                                            SDValue &ImmOffset) const {
+
+  SDLoc DL(Addr);
+  MachineFunction &MF = CurDAG->getMachineFunction();
+  const SIRegisterInfo *TRI =
+      static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const SITargetLowering& Lowering =
+    *static_cast<const SITargetLowering*>(getTargetLowering());
+
+  unsigned ScratchOffsetReg =
+      TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);
+  Lowering.CreateLiveInRegister(*CurDAG, &AMDGPU::SReg_32RegClass,
+                                ScratchOffsetReg, MVT::i32);
+  SDValue Sym0 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD0", MVT::i32);
+  SDValue ScratchRsrcDword0 =
+      SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym0), 0);
+
+  SDValue Sym1 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD1", MVT::i32);
+  SDValue ScratchRsrcDword1 =
+      SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym1), 0);
+
+  const SDValue RsrcOps[] = {
+      CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
+      ScratchRsrcDword0,
+      CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
+      ScratchRsrcDword1,
+      CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32),
+  };
+  SDValue ScratchPtr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
+                                              MVT::v2i32, RsrcOps), 0);
+  Rsrc = SDValue(Lowering.buildScratchRSRC(*CurDAG, DL, ScratchPtr), 0);
+  SOffset = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
+      MRI.getLiveInVirtReg(ScratchOffsetReg), MVT::i32);
+
+  // (add n0, c1)
+  if (CurDAG->isBaseWithConstantOffset(Addr)) {
+    SDValue N1 = Addr.getOperand(1);
+    ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
+
+    if (isLegalMUBUFImmOffset(C1)) {
+      VAddr = Addr.getOperand(0);
+      ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
+      return true;
+    }
+  }
+
+  // (node)
+  VAddr = Addr;
+  ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
+  return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
+                                           SDValue &SOffset, SDValue &Offset,
+                                           SDValue &GLC, SDValue &SLC,
+                                           SDValue &TFE) const {
+  SDValue Ptr, VAddr, Offen, Idxen, Addr64;
+  const SIInstrInfo *TII =
+    static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
+
+  SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
+              GLC, SLC, TFE);
+
+  if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
+      !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
+      !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
+    uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
+                    APInt::getAllOnesValue(32).getZExtValue(); // Size
+    SDLoc DL(Addr);
+
+    const SITargetLowering& Lowering =
+      *static_cast<const SITargetLowering*>(getTargetLowering());
+
+    SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
+    return true;
+  }
+  return false;
+}
+
+bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
+                                           SDValue &Soffset, SDValue &Offset,
+                                           SDValue &GLC) const {
+  SDValue SLC, TFE;
+
+  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
+}
+
+// FIXME: This is incorrect and only enough to be able to compile.
+SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
+  AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N);
+  SDLoc DL(N);
+
+  assert(Subtarget->hasFlatAddressSpace() &&
+         "addrspacecast only supported with flat address space!");
+
+  assert((ASC->getSrcAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS &&
+          ASC->getDestAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) &&
+         "Cannot cast address space to / from constant address!");
+
+  assert((ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS ||
+          ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) &&
+         "Can only cast to / from flat address space!");
+
+  // The flat instructions read the address as the index of the VGPR holding the
+  // address, so casting should just be reinterpreting the base VGPR, so just
+  // insert trunc / bitcast / zext.
+
+  SDValue Src = ASC->getOperand(0);
+  EVT DestVT = ASC->getValueType(0);
+  EVT SrcVT = Src.getValueType();
+
+  unsigned SrcSize = SrcVT.getSizeInBits();
+  unsigned DestSize = DestVT.getSizeInBits();
+
+  if (SrcSize > DestSize) {
+    assert(SrcSize == 64 && DestSize == 32);
+    return CurDAG->getMachineNode(
+      TargetOpcode::EXTRACT_SUBREG,
+      DL,
+      DestVT,
+      Src,
+      CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32));
+  }
+
+
+  if (DestSize > SrcSize) {
+    assert(SrcSize == 32 && DestSize == 64);
+
+    // FIXME: This is probably wrong, we should never be defining
+    // a register class with both VGPRs and SGPRs
+    SDValue RC = CurDAG->getTargetConstant(AMDGPU::VS_64RegClassID, DL,
+                                           MVT::i32);
+
+    const SDValue Ops[] = {
+      RC,
+      Src,
+      CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
+      SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
+                                     CurDAG->getConstant(0, DL, MVT::i32)), 0),
+      CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
+    };
+
+    return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
+                                  DL, N->getValueType(0), Ops);
+  }
+
+  assert(SrcSize == 64 && DestSize == 64);
+  return CurDAG->getNode(ISD::BITCAST, DL, DestVT, Src).getNode();
+}
+
+SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
+                                     uint32_t Offset, uint32_t Width) {
+  // Transformation function, pack the offset and width of a BFE into
+  // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
+  // source, bits [5:0] contain the offset and bits [22:16] the width.
+  uint32_t PackedVal = Offset | (Width << 16);
+  SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
+
+  return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
+}
+
+SDNode *AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
+  // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
+  // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
+  // Predicate: 0 < b <= c < 32
+
+  const SDValue &Shl = N->getOperand(0);
+  ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+
+  if (B && C) {
+    uint32_t BVal = B->getZExtValue();
+    uint32_t CVal = C->getZExtValue();
+
+    if (0 < BVal && BVal <= CVal && CVal < 32) {
+      bool Signed = N->getOpcode() == ISD::SRA;
+      unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
+
+      return getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0),
+                      CVal - BVal, 32 - CVal);
+    }
+  }
+  return SelectCode(N);
+}
+
+SDNode *AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
+  switch (N->getOpcode()) {
+  case ISD::AND:
+    if (N->getOperand(0).getOpcode() == ISD::SRL) {
+      // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
+      // Predicate: isMask(mask)
+      const SDValue &Srl = N->getOperand(0);
+      ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
+      ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
+
+      if (Shift && Mask) {
+        uint32_t ShiftVal = Shift->getZExtValue();
+        uint32_t MaskVal = Mask->getZExtValue();
+
+        if (isMask_32(MaskVal)) {
+          uint32_t WidthVal = countPopulation(MaskVal);
+
+          return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), Srl.getOperand(0),
+                          ShiftVal, WidthVal);
+        }
+      }
+    }
+    break;
+  case ISD::SRL:
+    if (N->getOperand(0).getOpcode() == ISD::AND) {
+      // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
+      // Predicate: isMask(mask >> b)
+      const SDValue &And = N->getOperand(0);
+      ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
+      ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
+
+      if (Shift && Mask) {
+        uint32_t ShiftVal = Shift->getZExtValue();
+        uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
+
+        if (isMask_32(MaskVal)) {
+          uint32_t WidthVal = countPopulation(MaskVal);
+
+          return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), And.getOperand(0),
+                          ShiftVal, WidthVal);
+        }
+      }
+    } else if (N->getOperand(0).getOpcode() == ISD::SHL)
+      return SelectS_BFEFromShifts(N);
+    break;
+  case ISD::SRA:
+    if (N->getOperand(0).getOpcode() == ISD::SHL)
+      return SelectS_BFEFromShifts(N);
+    break;
+  }
+
+  return SelectCode(N);
+}
+
+bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
+                                        SDValue &SrcMods) const {
+
+  unsigned Mods = 0;
+
+  Src = In;
+
+  if (Src.getOpcode() == ISD::FNEG) {
+    Mods |= SISrcMods::NEG;
+    Src = Src.getOperand(0);
+  }
+
+  if (Src.getOpcode() == ISD::FABS) {
+    Mods |= SISrcMods::ABS;
+    Src = Src.getOperand(0);
+  }
+
+  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
+
+  return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
+                                         SDValue &SrcMods, SDValue &Clamp,
+                                         SDValue &Omod) const {
+  SDLoc DL(In);
+  // FIXME: Handle Clamp and Omod
+  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32);
+  Omod = CurDAG->getTargetConstant(0, DL, MVT::i32);
+
+  return SelectVOP3Mods(In, Src, SrcMods);
+}
+
+bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src,
+                                              SDValue &SrcMods,
+                                              SDValue &Omod) const {
+  // FIXME: Handle Omod
+  Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
+
+  return SelectVOP3Mods(In, Src, SrcMods);
+}
+
+bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
+                                                   SDValue &SrcMods,
+                                                   SDValue &Clamp,
+                                                   SDValue &Omod) const {
+  Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
+  return SelectVOP3Mods(In, Src, SrcMods);
+}
+
+void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
+  const AMDGPUTargetLowering& Lowering =
+    *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
+  bool IsModified = false;
+  do {
+    IsModified = false;
+    // Go over all selected nodes and try to fold them a bit more
+    for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+         E = CurDAG->allnodes_end(); I != E; ++I) {
+
+      SDNode *Node = I;
+
+      MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
+      if (!MachineNode)
+        continue;
+
+      SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
+      if (ResNode != Node) {
+        ReplaceUses(Node, ResNode);
+        IsModified = true;
+      }
+    }
+    CurDAG->RemoveDeadNodes();
+  } while (IsModified);
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
new file mode 100644
index 0000000..570473d
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -0,0 +1,2866 @@
+//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief This is the parent TargetLowering class for hardware code gen
+/// targets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUISelLowering.h"
+#include "AMDGPU.h"
+#include "AMDGPUFrameLowering.h"
+#include "AMDGPUIntrinsicInfo.h"
+#include "AMDGPURegisterInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "R600MachineFunctionInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/DiagnosticPrinter.h"
+
+using namespace llvm;
+
+namespace {
+
+/// Diagnostic information for unimplemented or unsupported feature reporting.
+class DiagnosticInfoUnsupported : public DiagnosticInfo {
+private:
+  const Twine &Description;
+  const Function &Fn;
+
+  static int KindID;
+
+  static int getKindID() {
+    if (KindID == 0)
+      KindID = llvm::getNextAvailablePluginDiagnosticKind();
+    return KindID;
+  }
+
+public:
+  DiagnosticInfoUnsupported(const Function &Fn, const Twine &Desc,
+                          DiagnosticSeverity Severity = DS_Error)
+    : DiagnosticInfo(getKindID(), Severity),
+      Description(Desc),
+      Fn(Fn) { }
+
+  const Function &getFunction() const { return Fn; }
+  const Twine &getDescription() const { return Description; }
+
+  void print(DiagnosticPrinter &DP) const override {
+    DP << "unsupported " << getDescription() << " in " << Fn.getName();
+  }
+
+  static bool classof(const DiagnosticInfo *DI) {
+    return DI->getKind() == getKindID();
+  }
+};
+
+int DiagnosticInfoUnsupported::KindID = 0;
+} // namespace
+
+
+static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT,
+                      CCValAssign::LocInfo LocInfo,
+                      ISD::ArgFlagsTy ArgFlags, CCState &State) {
+  unsigned Offset = State.AllocateStack(ValVT.getStoreSize(),
+                                        ArgFlags.getOrigAlign());
+  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+
+  return true;
+}
+
+#include "AMDGPUGenCallingConv.inc"
+
+// Find a larger type to do a load / store of a vector with.
+EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) {
+  unsigned StoreSize = VT.getStoreSizeInBits();
+  if (StoreSize <= 32)
+    return EVT::getIntegerVT(Ctx, StoreSize);
+
+  assert(StoreSize % 32 == 0 && "Store size not a multiple of 32");
+  return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32);
+}
+
+// Type for a vector that will be loaded to.
+EVT AMDGPUTargetLowering::getEquivalentLoadRegType(LLVMContext &Ctx, EVT VT) {
+  unsigned StoreSize = VT.getStoreSizeInBits();
+  if (StoreSize <= 32)
+    return EVT::getIntegerVT(Ctx, 32);
+
+  return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32);
+}
+
+AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
+                                           const AMDGPUSubtarget &STI)
+    : TargetLowering(TM), Subtarget(&STI) {
+  setOperationAction(ISD::Constant, MVT::i32, Legal);
+  setOperationAction(ISD::Constant, MVT::i64, Legal);
+  setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+  setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+
+  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+  setOperationAction(ISD::BRIND, MVT::Other, Expand);
+
+  // We need to custom lower some of the intrinsics
+  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+  // Library functions.  These default to Expand, but we have instructions
+  // for them.
+  setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
+  setOperationAction(ISD::FEXP2,  MVT::f32, Legal);
+  setOperationAction(ISD::FPOW,   MVT::f32, Legal);
+  setOperationAction(ISD::FLOG2,  MVT::f32, Legal);
+  setOperationAction(ISD::FABS,   MVT::f32, Legal);
+  setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
+  setOperationAction(ISD::FRINT,  MVT::f32, Legal);
+  setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+  setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
+  setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
+
+  setOperationAction(ISD::FROUND, MVT::f32, Custom);
+  setOperationAction(ISD::FROUND, MVT::f64, Custom);
+
+  setOperationAction(ISD::FREM, MVT::f32, Custom);
+  setOperationAction(ISD::FREM, MVT::f64, Custom);
+
+  // v_mad_f32 does not support denormals according to some sources.
+  if (!Subtarget->hasFP32Denormals())
+    setOperationAction(ISD::FMAD, MVT::f32, Legal);
+
+  // Expand to fneg + fadd.
+  setOperationAction(ISD::FSUB, MVT::f64, Expand);
+
+  // Lower floating point store/load to integer store/load to reduce the number
+  // of patterns in tablegen.
+  setOperationAction(ISD::STORE, MVT::f32, Promote);
+  AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
+
+  setOperationAction(ISD::STORE, MVT::v2f32, Promote);
+  AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32);
+
+  setOperationAction(ISD::STORE, MVT::v4f32, Promote);
+  AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
+
+  setOperationAction(ISD::STORE, MVT::v8f32, Promote);
+  AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32);
+
+  setOperationAction(ISD::STORE, MVT::v16f32, Promote);
+  AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32);
+
+  setOperationAction(ISD::STORE, MVT::f64, Promote);
+  AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64);
+
+  setOperationAction(ISD::STORE, MVT::v2f64, Promote);
+  AddPromotedToType(ISD::STORE, MVT::v2f64, MVT::v2i64);
+
+  // Custom lowering of vector stores is required for local address space
+  // stores.
+  setOperationAction(ISD::STORE, MVT::v4i32, Custom);
+
+  setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
+  setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
+  setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
+
+  // XXX: This can be change to Custom, once ExpandVectorStores can
+  // handle 64-bit stores.
+  setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
+
+  setTruncStoreAction(MVT::i64, MVT::i16, Expand);
+  setTruncStoreAction(MVT::i64, MVT::i8, Expand);
+  setTruncStoreAction(MVT::i64, MVT::i1, Expand);
+  setTruncStoreAction(MVT::v2i64, MVT::v2i1, Expand);
+  setTruncStoreAction(MVT::v4i64, MVT::v4i1, Expand);
+
+
+  setOperationAction(ISD::LOAD, MVT::f32, Promote);
+  AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
+
+  setOperationAction(ISD::LOAD, MVT::v2f32, Promote);
+  AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32);
+
+  setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
+  AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
+
+  setOperationAction(ISD::LOAD, MVT::v8f32, Promote);
+  AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32);
+
+  setOperationAction(ISD::LOAD, MVT::v16f32, Promote);
+  AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32);
+
+  setOperationAction(ISD::LOAD, MVT::f64, Promote);
+  AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64);
+
+  setOperationAction(ISD::LOAD, MVT::v2f64, Promote);
+  AddPromotedToType(ISD::LOAD, MVT::v2f64, MVT::v2i64);
+
+  setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
+  setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom);
+  setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i32, Custom);
+  setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f32, Custom);
+  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom);
+  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom);
+  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f32, Custom);
+  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32, Custom);
+  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f32, Custom);
+  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i32, Custom);
+
+  // There are no 64-bit extloads. These should be done as a 32-bit extload and
+  // an extension to 64-bit.
+  for (MVT VT : MVT::integer_valuetypes()) {
+    setLoadExtAction(ISD::EXTLOAD, MVT::i64, VT, Expand);
+    setLoadExtAction(ISD::SEXTLOAD, MVT::i64, VT, Expand);
+    setLoadExtAction(ISD::ZEXTLOAD, MVT::i64, VT, Expand);
+  }
+
+  for (MVT VT : MVT::integer_vector_valuetypes()) {
+    setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Expand);
+    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Expand);
+    setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i8, Expand);
+    setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Expand);
+    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Expand);
+    setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v4i8, Expand);
+    setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Expand);
+    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Expand);
+    setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i16, Expand);
+    setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Expand);
+    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Expand);
+    setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v4i16, Expand);
+  }
+
+  setOperationAction(ISD::BR_CC, MVT::i1, Expand);
+
+  if (Subtarget->getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
+    setOperationAction(ISD::FCEIL, MVT::f64, Custom);
+    setOperationAction(ISD::FTRUNC, MVT::f64, Custom);
+    setOperationAction(ISD::FRINT, MVT::f64, Custom);
+    setOperationAction(ISD::FFLOOR, MVT::f64, Custom);
+  }
+
+  if (!Subtarget->hasBFI()) {
+    // fcopysign can be done in a single instruction with BFI.
+    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+  }
+
+  setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
+
+  setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
+  setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand);
+  setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand);
+  setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand);
+
+  setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
+  setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand);
+  setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand);
+  setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Expand);
+
+  setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+  setTruncStoreAction(MVT::v2f32, MVT::v2f16, Expand);
+  setTruncStoreAction(MVT::v4f32, MVT::v4f16, Expand);
+  setTruncStoreAction(MVT::v8f32, MVT::v8f16, Expand);
+
+  setTruncStoreAction(MVT::f64, MVT::f16, Expand);
+  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+  const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
+  for (MVT VT : ScalarIntVTs) {
+    setOperationAction(ISD::SREM, VT, Expand);
+    setOperationAction(ISD::SDIV, VT, Expand);
+
+    // GPU does not have divrem function for signed or unsigned.
+    setOperationAction(ISD::SDIVREM, VT, Custom);
+    setOperationAction(ISD::UDIVREM, VT, Custom);
+
+    // GPU does not have [S|U]MUL_LOHI functions as a single instruction.
+    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+    setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+
+    setOperationAction(ISD::BSWAP, VT, Expand);
+    setOperationAction(ISD::CTTZ, VT, Expand);
+    setOperationAction(ISD::CTLZ, VT, Expand);
+  }
+
+  if (!Subtarget->hasBCNT(32))
+    setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+
+  if (!Subtarget->hasBCNT(64))
+    setOperationAction(ISD::CTPOP, MVT::i64, Expand);
+
+  // The hardware supports 32-bit ROTR, but not ROTL.
+  setOperationAction(ISD::ROTL, MVT::i32, Expand);
+  setOperationAction(ISD::ROTL, MVT::i64, Expand);
+  setOperationAction(ISD::ROTR, MVT::i64, Expand);
+
+  setOperationAction(ISD::MUL, MVT::i64, Expand);
+  setOperationAction(ISD::MULHU, MVT::i64, Expand);
+  setOperationAction(ISD::MULHS, MVT::i64, Expand);
+  setOperationAction(ISD::UDIV, MVT::i32, Expand);
+  setOperationAction(ISD::UREM, MVT::i32, Expand);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+  setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+  setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
+  setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
+
+  setOperationAction(ISD::SMIN, MVT::i32, Legal);
+  setOperationAction(ISD::UMIN, MVT::i32, Legal);
+  setOperationAction(ISD::SMAX, MVT::i32, Legal);
+  setOperationAction(ISD::UMAX, MVT::i32, Legal);
+
+  if (!Subtarget->hasFFBH())
+    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
+
+  if (!Subtarget->hasFFBL())
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+
+  static const MVT::SimpleValueType VectorIntTypes[] = {
+    MVT::v2i32, MVT::v4i32
+  };
+
+  for (MVT VT : VectorIntTypes) {
+    // Expand the following operations for the current type by default.
+    setOperationAction(ISD::ADD,  VT, Expand);
+    setOperationAction(ISD::AND,  VT, Expand);
+    setOperationAction(ISD::FP_TO_SINT, VT, Expand);
+    setOperationAction(ISD::FP_TO_UINT, VT, Expand);
+    setOperationAction(ISD::MUL,  VT, Expand);
+    setOperationAction(ISD::OR,   VT, Expand);
+    setOperationAction(ISD::SHL,  VT, Expand);
+    setOperationAction(ISD::SRA,  VT, Expand);
+    setOperationAction(ISD::SRL,  VT, Expand);
+    setOperationAction(ISD::ROTL, VT, Expand);
+    setOperationAction(ISD::ROTR, VT, Expand);
+    setOperationAction(ISD::SUB,  VT, Expand);
+    setOperationAction(ISD::SINT_TO_FP, VT, Expand);
+    setOperationAction(ISD::UINT_TO_FP, VT, Expand);
+    setOperationAction(ISD::SDIV, VT, Expand);
+    setOperationAction(ISD::UDIV, VT, Expand);
+    setOperationAction(ISD::SREM, VT, Expand);
+    setOperationAction(ISD::UREM, VT, Expand);
+    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+    setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+    setOperationAction(ISD::SDIVREM, VT, Custom);
+    setOperationAction(ISD::UDIVREM, VT, Custom);
+    setOperationAction(ISD::ADDC, VT, Expand);
+    setOperationAction(ISD::SUBC, VT, Expand);
+    setOperationAction(ISD::ADDE, VT, Expand);
+    setOperationAction(ISD::SUBE, VT, Expand);
+    setOperationAction(ISD::SELECT, VT, Expand);
+    setOperationAction(ISD::VSELECT, VT, Expand);
+    setOperationAction(ISD::SELECT_CC, VT, Expand);
+    setOperationAction(ISD::XOR,  VT, Expand);
+    setOperationAction(ISD::BSWAP, VT, Expand);
+    setOperationAction(ISD::CTPOP, VT, Expand);
+    setOperationAction(ISD::CTTZ, VT, Expand);
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
+    setOperationAction(ISD::CTLZ, VT, Expand);
+    setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
+    setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
+  }
+
+  static const MVT::SimpleValueType FloatVectorTypes[] = {
+    MVT::v2f32, MVT::v4f32
+  };
+
+  for (MVT VT : FloatVectorTypes) {
+    setOperationAction(ISD::FABS, VT, Expand);
+    setOperationAction(ISD::FMINNUM, VT, Expand);
+    setOperationAction(ISD::FMAXNUM, VT, Expand);
+    setOperationAction(ISD::FADD, VT, Expand);
+    setOperationAction(ISD::FCEIL, VT, Expand);
+    setOperationAction(ISD::FCOS, VT, Expand);
+    setOperationAction(ISD::FDIV, VT, Expand);
+    setOperationAction(ISD::FEXP2, VT, Expand);
+    setOperationAction(ISD::FLOG2, VT, Expand);
+    setOperationAction(ISD::FREM, VT, Expand);
+    setOperationAction(ISD::FPOW, VT, Expand);
+    setOperationAction(ISD::FFLOOR, VT, Expand);
+    setOperationAction(ISD::FTRUNC, VT, Expand);
+    setOperationAction(ISD::FMUL, VT, Expand);
+    setOperationAction(ISD::FMA, VT, Expand);
+    setOperationAction(ISD::FRINT, VT, Expand);
+    setOperationAction(ISD::FNEARBYINT, VT, Expand);
+    setOperationAction(ISD::FSQRT, VT, Expand);
+    setOperationAction(ISD::FSIN, VT, Expand);
+    setOperationAction(ISD::FSUB, VT, Expand);
+    setOperationAction(ISD::FNEG, VT, Expand);
+    setOperationAction(ISD::SELECT, VT, Expand);
+    setOperationAction(ISD::VSELECT, VT, Expand);
+    setOperationAction(ISD::SELECT_CC, VT, Expand);
+    setOperationAction(ISD::FCOPYSIGN, VT, Expand);
+    setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
+  }
+
+  setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom);
+  setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom);
+
+  setTargetDAGCombine(ISD::MUL);
+  setTargetDAGCombine(ISD::SELECT);
+  setTargetDAGCombine(ISD::SELECT_CC);
+  setTargetDAGCombine(ISD::STORE);
+
+  setTargetDAGCombine(ISD::FADD);
+  setTargetDAGCombine(ISD::FSUB);
+
+  setBooleanContents(ZeroOrNegativeOneBooleanContent);
+  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+
+  setSchedulingPreference(Sched::RegPressure);
+  setJumpIsExpensive(true);
+
+  // SI at least has hardware support for floating point exceptions, but no way
+  // of using or handling them is implemented. They are also optional in OpenCL
+  // (Section 7.3)
+  setHasFloatingPointExceptions(false);
+
+  setSelectIsExpensive(false);
+  PredictableSelectIsExpensive = false;
+
+  // There are no integer divide instructions, and these expand to a pretty
+  // large sequence of instructions.
+  setIntDivIsCheap(false);
+  setPow2SDivIsCheap(false);
+  setFsqrtIsCheap(true);
+
+  // FIXME: Need to really handle these.
+  MaxStoresPerMemcpy  = 4096;
+  MaxStoresPerMemmove = 4096;
+  MaxStoresPerMemset  = 4096;
+}
+
+//===----------------------------------------------------------------------===//
+// Target Information
+//===----------------------------------------------------------------------===//
+
+MVT AMDGPUTargetLowering::getVectorIdxTy() const {
+  return MVT::i32;
+}
+
+bool AMDGPUTargetLowering::isSelectSupported(SelectSupportKind SelType) const {
+  return true;
+}
+
+// The backend supports 32 and 64 bit floating point immediates.
+// FIXME: Why are we reporting vectors of FP immediates as legal?
+bool AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+  EVT ScalarVT = VT.getScalarType();
+  return (ScalarVT == MVT::f32 || ScalarVT == MVT::f64);
+}
+
+// We don't want to shrink f64 / f32 constants.
+bool AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
+  EVT ScalarVT = VT.getScalarType();
+  return (ScalarVT != MVT::f32 && ScalarVT != MVT::f64);
+}
+
+bool AMDGPUTargetLowering::shouldReduceLoadWidth(SDNode *N,
+                                                 ISD::LoadExtType,
+                                                 EVT NewVT) const {
+
+  unsigned NewSize = NewVT.getStoreSizeInBits();
+
+  // If we are reducing to a 32-bit load, this is always better.
+  if (NewSize == 32)
+    return true;
+
+  EVT OldVT = N->getValueType(0);
+  unsigned OldSize = OldVT.getStoreSizeInBits();
+
+  // Don't produce extloads from sub 32-bit types. SI doesn't have scalar
+  // extloads, so doing one requires using a buffer_load. In cases where we
+  // still couldn't use a scalar load, using the wider load shouldn't really
+  // hurt anything.
+
+  // If the old size already had to be an extload, there's no harm in continuing
+  // to reduce the width.
+  return (OldSize < 32);
+}
+
+bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy,
+                                                   EVT CastTy) const {
+  if (LoadTy.getSizeInBits() != CastTy.getSizeInBits())
+    return true;
+
+  unsigned LScalarSize = LoadTy.getScalarType().getSizeInBits();
+  unsigned CastScalarSize = CastTy.getScalarType().getSizeInBits();
+
+  return ((LScalarSize <= CastScalarSize) ||
+          (CastScalarSize >= 32) ||
+          (LScalarSize < 32));
+}
+
+// SI+ has instructions for cttz / ctlz for 32-bit values. This is probably also
+// profitable with the expansion for 64-bit since it's generally good to
+// speculate things.
+// FIXME: These should really have the size as a parameter.
+bool AMDGPUTargetLowering::isCheapToSpeculateCttz() const {
+  return true;
+}
+
+bool AMDGPUTargetLowering::isCheapToSpeculateCtlz() const {
+  return true;
+}
+
+//===---------------------------------------------------------------------===//
+// Target Properties
+//===---------------------------------------------------------------------===//
+
+bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const {
+  assert(VT.isFloatingPoint());
+  return VT == MVT::f32 || VT == MVT::f64;
+}
+
+bool AMDGPUTargetLowering::isFNegFree(EVT VT) const {
+  assert(VT.isFloatingPoint());
+  return VT == MVT::f32 || VT == MVT::f64;
+}
+
+bool AMDGPUTargetLowering:: storeOfVectorConstantIsCheap(EVT MemVT,
+                                                         unsigned NumElem,
+                                                         unsigned AS) const {
+  return true;
+}
+
+bool AMDGPUTargetLowering::isTruncateFree(EVT Source, EVT Dest) const {
+  // Truncate is just accessing a subregister.
+  return Dest.bitsLT(Source) && (Dest.getSizeInBits() % 32 == 0);
+}
+
+bool AMDGPUTargetLowering::isTruncateFree(Type *Source, Type *Dest) const {
+  // Truncate is just accessing a subregister.
+  return Dest->getPrimitiveSizeInBits() < Source->getPrimitiveSizeInBits() &&
+         (Dest->getPrimitiveSizeInBits() % 32 == 0);
+}
+
+bool AMDGPUTargetLowering::isZExtFree(Type *Src, Type *Dest) const {
+  const DataLayout *DL = getDataLayout();
+  unsigned SrcSize = DL->getTypeSizeInBits(Src->getScalarType());
+  unsigned DestSize = DL->getTypeSizeInBits(Dest->getScalarType());
+
+  return SrcSize == 32 && DestSize == 64;
+}
+
+bool AMDGPUTargetLowering::isZExtFree(EVT Src, EVT Dest) const {
+  // Any register load of a 64-bit value really requires 2 32-bit moves. For all
+  // practical purposes, the extra mov 0 to load a 64-bit is free.  As used,
+  // this will enable reducing 64-bit operations the 32-bit, which is always
+  // good.
+  return Src == MVT::i32 && Dest == MVT::i64;
+}
+
+bool AMDGPUTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
+  return isZExtFree(Val.getValueType(), VT2);
+}
+
+bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const {
+  // There aren't really 64-bit registers, but pairs of 32-bit ones and only a
+  // limited number of native 64-bit operations. Shrinking an operation to fit
+  // in a single 32-bit register should always be helpful. As currently used,
+  // this is much less general than the name suggests, and is only used in
+  // places trying to reduce the sizes of loads. Shrinking loads to < 32-bits is
+  // not profitable, and may actually be harmful.
+  return SrcVT.getSizeInBits() > 32 && DestVT.getSizeInBits() == 32;
+}
+
+//===---------------------------------------------------------------------===//
+// TargetLowering Callbacks
+//===---------------------------------------------------------------------===//
+
+void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
+                             const SmallVectorImpl<ISD::InputArg> &Ins) const {
+
+  State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
+}
+
+SDValue AMDGPUTargetLowering::LowerReturn(
+                                     SDValue Chain,
+                                     CallingConv::ID CallConv,
+                                     bool isVarArg,
+                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                     const SmallVectorImpl<SDValue> &OutVals,
+                                     SDLoc DL, SelectionDAG &DAG) const {
+  return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
+}
+
+//===---------------------------------------------------------------------===//
+// Target specific lowering
+//===---------------------------------------------------------------------===//
+
+SDValue AMDGPUTargetLowering::LowerCall(CallLoweringInfo &CLI,
+                                        SmallVectorImpl<SDValue> &InVals) const {
+  SDValue Callee = CLI.Callee;
+  SelectionDAG &DAG = CLI.DAG;
+
+  const Function &Fn = *DAG.getMachineFunction().getFunction();
+
+  StringRef FuncName("<unknown>");
+
+  if (const ExternalSymbolSDNode *G = dyn_cast<ExternalSymbolSDNode>(Callee))
+    FuncName = G->getSymbol();
+  else if (const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+    FuncName = G->getGlobal()->getName();
+
+  DiagnosticInfoUnsupported NoCalls(Fn, "call to function " + FuncName);
+  DAG.getContext()->diagnose(NoCalls);
+  return SDValue();
+}
+
+SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  switch (Op.getOpcode()) {
+  default:
+    Op.getNode()->dump();
+    llvm_unreachable("Custom lowering code for this"
+                     "instruction is not implemented yet!");
+    break;
+  case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
+  case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
+  case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
+  case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
+  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+  case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
+  case ISD::SDIVREM: return LowerSDIVREM(Op, DAG);
+  case ISD::FREM: return LowerFREM(Op, DAG);
+  case ISD::FCEIL: return LowerFCEIL(Op, DAG);
+  case ISD::FTRUNC: return LowerFTRUNC(Op, DAG);
+  case ISD::FRINT: return LowerFRINT(Op, DAG);
+  case ISD::FNEARBYINT: return LowerFNEARBYINT(Op, DAG);
+  case ISD::FROUND: return LowerFROUND(Op, DAG);
+  case ISD::FFLOOR: return LowerFFLOOR(Op, DAG);
+  case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
+  case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
+  case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
+  case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
+  }
+  return Op;
+}
+
+void AMDGPUTargetLowering::ReplaceNodeResults(SDNode *N,
+                                              SmallVectorImpl<SDValue> &Results,
+                                              SelectionDAG &DAG) const {
+  switch (N->getOpcode()) {
+  case ISD::SIGN_EXTEND_INREG:
+    // Different parts of legalization seem to interpret which type of
+    // sign_extend_inreg is the one to check for custom lowering. The extended
+    // from type is what really matters, but some places check for custom
+    // lowering of the result type. This results in trying to use
+    // ReplaceNodeResults to sext_in_reg to an illegal type, so we'll just do
+    // nothing here and let the illegal result integer be handled normally.
+    return;
+  case ISD::LOAD: {
+    SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
+    if (!Node)
+      return;
+
+    Results.push_back(SDValue(Node, 0));
+    Results.push_back(SDValue(Node, 1));
+    // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
+    // function
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
+    return;
+  }
+  case ISD::STORE: {
+    SDValue Lowered = LowerSTORE(SDValue(N, 0), DAG);
+    if (Lowered.getNode())
+      Results.push_back(Lowered);
+    return;
+  }
+  default:
+    return;
+  }
+}
+
+// FIXME: This implements accesses to initialized globals in the constant
+// address space by copying them to private and accessing that. It does not
+// properly handle illegal types or vectors. The private vector loads are not
+// scalarized, and the illegal scalars hit an assertion. This technique will not
+// work well with large initializers, and this should eventually be
+// removed. Initialized globals should be placed into a data section that the
+// runtime will load into a buffer before the kernel is executed. Uses of the
+// global need to be replaced with a pointer loaded from an implicit kernel
+// argument into this buffer holding the copy of the data, which will remove the
+// need for any of this.
+SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init,
+                                                       const GlobalValue *GV,
+                                                       const SDValue &InitPtr,
+                                                       SDValue Chain,
+                                                       SelectionDAG &DAG) const {
+  const DataLayout *TD = getDataLayout();
+  SDLoc DL(InitPtr);
+  Type *InitTy = Init->getType();
+
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Init)) {
+    EVT VT = EVT::getEVT(InitTy);
+    PointerType *PtrTy = PointerType::get(InitTy, AMDGPUAS::PRIVATE_ADDRESS);
+    return DAG.getStore(Chain, DL, DAG.getConstant(*CI, DL, VT), InitPtr,
+                        MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
+                        TD->getPrefTypeAlignment(InitTy));
+  }
+
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Init)) {
+    EVT VT = EVT::getEVT(CFP->getType());
+    PointerType *PtrTy = PointerType::get(CFP->getType(), 0);
+    return DAG.getStore(Chain, DL, DAG.getConstantFP(*CFP, DL, VT), InitPtr,
+                 MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
+                 TD->getPrefTypeAlignment(CFP->getType()));
+  }
+
+  if (StructType *ST = dyn_cast<StructType>(InitTy)) {
+    const StructLayout *SL = TD->getStructLayout(ST);
+
+    EVT PtrVT = InitPtr.getValueType();
+    SmallVector<SDValue, 8> Chains;
+
+    for (unsigned I = 0, N = ST->getNumElements(); I != N; ++I) {
+      SDValue Offset = DAG.getConstant(SL->getElementOffset(I), DL, PtrVT);
+      SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, InitPtr, Offset);
+
+      Constant *Elt = Init->getAggregateElement(I);
+      Chains.push_back(LowerConstantInitializer(Elt, GV, Ptr, Chain, DAG));
+    }
+
+    return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+  }
+
+  if (SequentialType *SeqTy = dyn_cast<SequentialType>(InitTy)) {
+    EVT PtrVT = InitPtr.getValueType();
+
+    unsigned NumElements;
+    if (ArrayType *AT = dyn_cast<ArrayType>(SeqTy))
+      NumElements = AT->getNumElements();
+    else if (VectorType *VT = dyn_cast<VectorType>(SeqTy))
+      NumElements = VT->getNumElements();
+    else
+      llvm_unreachable("Unexpected type");
+
+    unsigned EltSize = TD->getTypeAllocSize(SeqTy->getElementType());
+    SmallVector<SDValue, 8> Chains;
+    for (unsigned i = 0; i < NumElements; ++i) {
+      SDValue Offset = DAG.getConstant(i * EltSize, DL, PtrVT);
+      SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, InitPtr, Offset);
+
+      Constant *Elt = Init->getAggregateElement(i);
+      Chains.push_back(LowerConstantInitializer(Elt, GV, Ptr, Chain, DAG));
+    }
+
+    return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+  }
+
+  if (isa<UndefValue>(Init)) {
+    EVT VT = EVT::getEVT(InitTy);
+    PointerType *PtrTy = PointerType::get(InitTy, AMDGPUAS::PRIVATE_ADDRESS);
+    return DAG.getStore(Chain, DL, DAG.getUNDEF(VT), InitPtr,
+                        MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
+                        TD->getPrefTypeAlignment(InitTy));
+  }
+
+  Init->dump();
+  llvm_unreachable("Unhandled constant initializer");
+}
+
+static bool hasDefinedInitializer(const GlobalValue *GV) {
+  const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+  if (!GVar || !GVar->hasInitializer())
+    return false;
+
+  if (isa<UndefValue>(GVar->getInitializer()))
+    return false;
+
+  return true;
+}
+
+SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
+                                                 SDValue Op,
+                                                 SelectionDAG &DAG) const {
+
+  const DataLayout *TD = getDataLayout();
+  GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
+  const GlobalValue *GV = G->getGlobal();
+
+  switch (G->getAddressSpace()) {
+  case AMDGPUAS::LOCAL_ADDRESS: {
+    // XXX: What does the value of G->getOffset() mean?
+    assert(G->getOffset() == 0 &&
+         "Do not know what to do with an non-zero offset");
+
+    // TODO: We could emit code to handle the initialization somewhere.
+    if (hasDefinedInitializer(GV))
+      break;
+
+    unsigned Offset;
+    if (MFI->LocalMemoryObjects.count(GV) == 0) {
+      uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
+      Offset = MFI->LDSSize;
+      MFI->LocalMemoryObjects[GV] = Offset;
+      // XXX: Account for alignment?
+      MFI->LDSSize += Size;
+    } else {
+      Offset = MFI->LocalMemoryObjects[GV];
+    }
+
+    return DAG.getConstant(Offset, SDLoc(Op),
+                           getPointerTy(AMDGPUAS::LOCAL_ADDRESS));
+  }
+  case AMDGPUAS::CONSTANT_ADDRESS: {
+    MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+    Type *EltType = GV->getType()->getElementType();
+    unsigned Size = TD->getTypeAllocSize(EltType);
+    unsigned Alignment = TD->getPrefTypeAlignment(EltType);
+
+    MVT PrivPtrVT = getPointerTy(AMDGPUAS::PRIVATE_ADDRESS);
+    MVT ConstPtrVT = getPointerTy(AMDGPUAS::CONSTANT_ADDRESS);
+
+    int FI = FrameInfo->CreateStackObject(Size, Alignment, false);
+    SDValue InitPtr = DAG.getFrameIndex(FI, PrivPtrVT);
+
+    const GlobalVariable *Var = cast<GlobalVariable>(GV);
+    if (!Var->hasInitializer()) {
+      // This has no use, but bugpoint will hit it.
+      return DAG.getZExtOrTrunc(InitPtr, SDLoc(Op), ConstPtrVT);
+    }
+
+    const Constant *Init = Var->getInitializer();
+    SmallVector<SDNode*, 8> WorkList;
+
+    for (SDNode::use_iterator I = DAG.getEntryNode()->use_begin(),
+                              E = DAG.getEntryNode()->use_end(); I != E; ++I) {
+      if (I->getOpcode() != AMDGPUISD::REGISTER_LOAD && I->getOpcode() != ISD::LOAD)
+        continue;
+      WorkList.push_back(*I);
+    }
+    SDValue Chain = LowerConstantInitializer(Init, GV, InitPtr, DAG.getEntryNode(), DAG);
+    for (SmallVector<SDNode*, 8>::iterator I = WorkList.begin(),
+                                           E = WorkList.end(); I != E; ++I) {
+      SmallVector<SDValue, 8> Ops;
+      Ops.push_back(Chain);
+      for (unsigned i = 1; i < (*I)->getNumOperands(); ++i) {
+        Ops.push_back((*I)->getOperand(i));
+      }
+      DAG.UpdateNodeOperands(*I, Ops);
+    }
+    return DAG.getZExtOrTrunc(InitPtr, SDLoc(Op), ConstPtrVT);
+  }
+  }
+
+  const Function &Fn = *DAG.getMachineFunction().getFunction();
+  DiagnosticInfoUnsupported BadInit(Fn,
+                                    "initializer for address space");
+  DAG.getContext()->diagnose(BadInit);
+  return SDValue();
+}
+
+SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
+                                                  SelectionDAG &DAG) const {
+  SmallVector<SDValue, 8> Args;
+
+  for (const SDUse &U : Op->ops())
+    DAG.ExtractVectorElements(U.get(), Args);
+
+  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), Args);
+}
+
+SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
+                                                     SelectionDAG &DAG) const {
+
+  SmallVector<SDValue, 8> Args;
+  unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+  EVT VT = Op.getValueType();
+  DAG.ExtractVectorElements(Op.getOperand(0), Args, Start,
+                            VT.getVectorNumElements());
+
+  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), Args);
+}
+
+SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op,
+                                              SelectionDAG &DAG) const {
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  const AMDGPUFrameLowering *TFL = Subtarget->getFrameLowering();
+
+  FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
+
+  unsigned FrameIndex = FIN->getIndex();
+  unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
+  return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op),
+                         Op.getValueType());
+}
+
+SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
+    SelectionDAG &DAG) const {
+  unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  SDLoc DL(Op);
+  EVT VT = Op.getValueType();
+
+  switch (IntrinsicID) {
+    default: return Op;
+    case AMDGPUIntrinsic::AMDGPU_abs:
+    case AMDGPUIntrinsic::AMDIL_abs: // Legacy name.
+      return LowerIntrinsicIABS(Op, DAG);
+    case AMDGPUIntrinsic::AMDGPU_lrp:
+      return LowerIntrinsicLRP(Op, DAG);
+
+    case AMDGPUIntrinsic::AMDGPU_clamp:
+    case AMDGPUIntrinsic::AMDIL_clamp: // Legacy name.
+      return DAG.getNode(AMDGPUISD::CLAMP, DL, VT,
+                         Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
+    case Intrinsic::AMDGPU_div_scale: {
+      // 3rd parameter required to be a constant.
+      const ConstantSDNode *Param = dyn_cast<ConstantSDNode>(Op.getOperand(3));
+      if (!Param)
+        return DAG.getUNDEF(VT);
+
+      // Translate to the operands expected by the machine instruction. The
+      // first parameter must be the same as the first instruction.
+      SDValue Numerator = Op.getOperand(1);
+      SDValue Denominator = Op.getOperand(2);
+
+      // Note this order is opposite of the machine instruction's operations,
+      // which is s0.f = Quotient, s1.f = Denominator, s2.f = Numerator. The
+      // intrinsic has the numerator as the first operand to match a normal
+      // division operation.
+
+      SDValue Src0 = Param->isAllOnesValue() ? Numerator : Denominator;
+
+      return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, Op->getVTList(), Src0,
+                         Denominator, Numerator);
+    }
+
+    case Intrinsic::AMDGPU_div_fmas:
+      return DAG.getNode(AMDGPUISD::DIV_FMAS, DL, VT,
+                         Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
+                         Op.getOperand(4));
+
+    case Intrinsic::AMDGPU_div_fixup:
+      return DAG.getNode(AMDGPUISD::DIV_FIXUP, DL, VT,
+                         Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
+    case Intrinsic::AMDGPU_trig_preop:
+      return DAG.getNode(AMDGPUISD::TRIG_PREOP, DL, VT,
+                         Op.getOperand(1), Op.getOperand(2));
+
+    case Intrinsic::AMDGPU_rcp:
+      return DAG.getNode(AMDGPUISD::RCP, DL, VT, Op.getOperand(1));
+
+    case Intrinsic::AMDGPU_rsq:
+      return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
+
+    case AMDGPUIntrinsic::AMDGPU_legacy_rsq:
+      return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
+
+    case Intrinsic::AMDGPU_rsq_clamped:
+      if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+        Type *Type = VT.getTypeForEVT(*DAG.getContext());
+        APFloat Max = APFloat::getLargest(Type->getFltSemantics());
+        APFloat Min = APFloat::getLargest(Type->getFltSemantics(), true);
+
+        SDValue Rsq = DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
+        SDValue Tmp = DAG.getNode(ISD::FMINNUM, DL, VT, Rsq,
+                                  DAG.getConstantFP(Max, DL, VT));
+        return DAG.getNode(ISD::FMAXNUM, DL, VT, Tmp,
+                           DAG.getConstantFP(Min, DL, VT));
+      } else {
+        return DAG.getNode(AMDGPUISD::RSQ_CLAMPED, DL, VT, Op.getOperand(1));
+      }
+
+    case Intrinsic::AMDGPU_ldexp:
+      return DAG.getNode(AMDGPUISD::LDEXP, DL, VT, Op.getOperand(1),
+                                                   Op.getOperand(2));
+
+    case AMDGPUIntrinsic::AMDGPU_imax:
+      return DAG.getNode(ISD::SMAX, DL, VT, Op.getOperand(1),
+                                            Op.getOperand(2));
+    case AMDGPUIntrinsic::AMDGPU_umax:
+      return DAG.getNode(ISD::UMAX, DL, VT, Op.getOperand(1),
+                                            Op.getOperand(2));
+    case AMDGPUIntrinsic::AMDGPU_imin:
+      return DAG.getNode(ISD::SMIN, DL, VT, Op.getOperand(1),
+                                            Op.getOperand(2));
+    case AMDGPUIntrinsic::AMDGPU_umin:
+      return DAG.getNode(ISD::UMIN, DL, VT, Op.getOperand(1),
+                                            Op.getOperand(2));
+
+    case AMDGPUIntrinsic::AMDGPU_umul24:
+      return DAG.getNode(AMDGPUISD::MUL_U24, DL, VT,
+                         Op.getOperand(1), Op.getOperand(2));
+
+    case AMDGPUIntrinsic::AMDGPU_imul24:
+      return DAG.getNode(AMDGPUISD::MUL_I24, DL, VT,
+                         Op.getOperand(1), Op.getOperand(2));
+
+    case AMDGPUIntrinsic::AMDGPU_umad24:
+      return DAG.getNode(AMDGPUISD::MAD_U24, DL, VT,
+                         Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
+    case AMDGPUIntrinsic::AMDGPU_imad24:
+      return DAG.getNode(AMDGPUISD::MAD_I24, DL, VT,
+                         Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
+    case AMDGPUIntrinsic::AMDGPU_cvt_f32_ubyte0:
+      return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0, DL, VT, Op.getOperand(1));
+
+    case AMDGPUIntrinsic::AMDGPU_cvt_f32_ubyte1:
+      return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE1, DL, VT, Op.getOperand(1));
+
+    case AMDGPUIntrinsic::AMDGPU_cvt_f32_ubyte2:
+      return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE2, DL, VT, Op.getOperand(1));
+
+    case AMDGPUIntrinsic::AMDGPU_cvt_f32_ubyte3:
+      return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE3, DL, VT, Op.getOperand(1));
+
+    case AMDGPUIntrinsic::AMDGPU_bfe_i32:
+      return DAG.getNode(AMDGPUISD::BFE_I32, DL, VT,
+                         Op.getOperand(1),
+                         Op.getOperand(2),
+                         Op.getOperand(3));
+
+    case AMDGPUIntrinsic::AMDGPU_bfe_u32:
+      return DAG.getNode(AMDGPUISD::BFE_U32, DL, VT,
+                         Op.getOperand(1),
+                         Op.getOperand(2),
+                         Op.getOperand(3));
+
+    case AMDGPUIntrinsic::AMDGPU_bfi:
+      return DAG.getNode(AMDGPUISD::BFI, DL, VT,
+                         Op.getOperand(1),
+                         Op.getOperand(2),
+                         Op.getOperand(3));
+
+    case AMDGPUIntrinsic::AMDGPU_bfm:
+      return DAG.getNode(AMDGPUISD::BFM, DL, VT,
+                         Op.getOperand(1),
+                         Op.getOperand(2));
+
+    case AMDGPUIntrinsic::AMDGPU_brev:
+      return DAG.getNode(AMDGPUISD::BREV, DL, VT, Op.getOperand(1));
+
+  case Intrinsic::AMDGPU_class:
+    return DAG.getNode(AMDGPUISD::FP_CLASS, DL, VT,
+                       Op.getOperand(1), Op.getOperand(2));
+
+    case AMDGPUIntrinsic::AMDIL_exp: // Legacy name.
+      return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
+
+    case AMDGPUIntrinsic::AMDIL_round_nearest: // Legacy name.
+      return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
+    case AMDGPUIntrinsic::AMDGPU_trunc: // Legacy name.
+      return DAG.getNode(ISD::FTRUNC, DL, VT, Op.getOperand(1));
+  }
+}
+
+///IABS(a) = SMAX(sub(0, a), a)
+SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
+                                                 SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  EVT VT = Op.getValueType();
+  SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+                            Op.getOperand(1));
+
+  return DAG.getNode(ISD::SMAX, DL, VT, Neg, Op.getOperand(1));
+}
+
+/// Linear Interpolation
+/// LRP(a, b, c) = muladd(a,  b, (1 - a) * c)
+SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  EVT VT = Op.getValueType();
+  SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
+                                DAG.getConstantFP(1.0f, DL, MVT::f32),
+                                Op.getOperand(1));
+  SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
+                                                    Op.getOperand(3));
+  return DAG.getNode(ISD::FADD, DL, VT,
+      DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)),
+      OneSubAC);
+}
+
+/// \brief Generate Min/Max node
+SDValue AMDGPUTargetLowering::CombineFMinMaxLegacy(SDLoc DL,
+                                                   EVT VT,
+                                                   SDValue LHS,
+                                                   SDValue RHS,
+                                                   SDValue True,
+                                                   SDValue False,
+                                                   SDValue CC,
+                                                   DAGCombinerInfo &DCI) const {
+  if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+    return SDValue();
+
+  if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
+    return SDValue();
+
+  SelectionDAG &DAG = DCI.DAG;
+  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+  switch (CCOpcode) {
+  case ISD::SETOEQ:
+  case ISD::SETONE:
+  case ISD::SETUNE:
+  case ISD::SETNE:
+  case ISD::SETUEQ:
+  case ISD::SETEQ:
+  case ISD::SETFALSE:
+  case ISD::SETFALSE2:
+  case ISD::SETTRUE:
+  case ISD::SETTRUE2:
+  case ISD::SETUO:
+  case ISD::SETO:
+    break;
+  case ISD::SETULE:
+  case ISD::SETULT: {
+    if (LHS == True)
+      return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);
+    return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);
+  }
+  case ISD::SETOLE:
+  case ISD::SETOLT:
+  case ISD::SETLE:
+  case ISD::SETLT: {
+    // Ordered. Assume ordered for undefined.
+
+    // Only do this after legalization to avoid interfering with other combines
+    // which might occur.
+    if (DCI.getDAGCombineLevel() < AfterLegalizeDAG &&
+        !DCI.isCalledByLegalizer())
+      return SDValue();
+
+    // We need to permute the operands to get the correct NaN behavior. The
+    // selected operand is the second one based on the failing compare with NaN,
+    // so permute it based on the compare type the hardware uses.
+    if (LHS == True)
+      return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS);
+    return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS);
+  }
+  case ISD::SETUGE:
+  case ISD::SETUGT: {
+    if (LHS == True)
+      return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS);
+    return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS);
+  }
+  case ISD::SETGT:
+  case ISD::SETGE:
+  case ISD::SETOGE:
+  case ISD::SETOGT: {
+    if (DCI.getDAGCombineLevel() < AfterLegalizeDAG &&
+        !DCI.isCalledByLegalizer())
+      return SDValue();
+
+    if (LHS == True)
+      return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);
+    return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);
+  }
+  case ISD::SETCC_INVALID:
+    llvm_unreachable("Invalid setcc condcode!");
+  }
+  return SDValue();
+}
+
+// FIXME: Remove this when combines added to DAGCombiner.
+SDValue AMDGPUTargetLowering::CombineIMinMax(SDLoc DL,
+                                             EVT VT,
+                                             SDValue LHS,
+                                             SDValue RHS,
+                                             SDValue True,
+                                             SDValue False,
+                                             SDValue CC,
+                                             SelectionDAG &DAG) const {
+  if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
+    return SDValue();
+
+  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+  switch (CCOpcode) {
+  case ISD::SETULE:
+  case ISD::SETULT: {
+    unsigned Opc = (LHS == True) ? ISD::UMIN : ISD::UMAX;
+    return DAG.getNode(Opc, DL, VT, LHS, RHS);
+  }
+  case ISD::SETLE:
+  case ISD::SETLT: {
+    unsigned Opc = (LHS == True) ? ISD::SMIN : ISD::SMAX;
+    return DAG.getNode(Opc, DL, VT, LHS, RHS);
+  }
+  case ISD::SETGT:
+  case ISD::SETGE: {
+    unsigned Opc = (LHS == True) ? ISD::SMAX : ISD::SMIN;
+    return DAG.getNode(Opc, DL, VT, LHS, RHS);
+  }
+  case ISD::SETUGE:
+  case ISD::SETUGT: {
+    unsigned Opc = (LHS == True) ? ISD::UMAX : ISD::UMIN;
+    return DAG.getNode(Opc, DL, VT, LHS, RHS);
+  }
+  default:
+    return SDValue();
+  }
+}
+
+SDValue AMDGPUTargetLowering::ScalarizeVectorLoad(const SDValue Op,
+                                                  SelectionDAG &DAG) const {
+  LoadSDNode *Load = cast<LoadSDNode>(Op);
+  EVT MemVT = Load->getMemoryVT();
+  EVT MemEltVT = MemVT.getVectorElementType();
+
+  EVT LoadVT = Op.getValueType();
+  EVT EltVT = LoadVT.getVectorElementType();
+  EVT PtrVT = Load->getBasePtr().getValueType();
+
+  unsigned NumElts = Load->getMemoryVT().getVectorNumElements();
+  SmallVector<SDValue, 8> Loads;
+  SmallVector<SDValue, 8> Chains;
+
+  SDLoc SL(Op);
+  unsigned MemEltSize = MemEltVT.getStoreSize();
+  MachinePointerInfo SrcValue(Load->getMemOperand()->getValue());
+
+  for (unsigned i = 0; i < NumElts; ++i) {
+    SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Load->getBasePtr(),
+                              DAG.getConstant(i * MemEltSize, SL, PtrVT));
+
+    SDValue NewLoad
+      = DAG.getExtLoad(Load->getExtensionType(), SL, EltVT,
+                       Load->getChain(), Ptr,
+                       SrcValue.getWithOffset(i * MemEltSize),
+                       MemEltVT, Load->isVolatile(), Load->isNonTemporal(),
+                       Load->isInvariant(), Load->getAlignment());
+    Loads.push_back(NewLoad.getValue(0));
+    Chains.push_back(NewLoad.getValue(1));
+  }
+
+  SDValue Ops[] = {
+    DAG.getNode(ISD::BUILD_VECTOR, SL, LoadVT, Loads),
+    DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Chains)
+  };
+
+  return DAG.getMergeValues(Ops, SL);
+}
+
+SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
+                                              SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+
+  // If this is a 2 element vector, we really want to scalarize and not create
+  // weird 1 element vectors.
+  if (VT.getVectorNumElements() == 2)
+    return ScalarizeVectorLoad(Op, DAG);
+
+  LoadSDNode *Load = cast<LoadSDNode>(Op);
+  SDValue BasePtr = Load->getBasePtr();
+  EVT PtrVT = BasePtr.getValueType();
+  EVT MemVT = Load->getMemoryVT();
+  SDLoc SL(Op);
+  MachinePointerInfo SrcValue(Load->getMemOperand()->getValue());
+
+  EVT LoVT, HiVT;
+  EVT LoMemVT, HiMemVT;
+  SDValue Lo, Hi;
+
+  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
+  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemVT);
+  std::tie(Lo, Hi) = DAG.SplitVector(Op, SL, LoVT, HiVT);
+  SDValue LoLoad
+    = DAG.getExtLoad(Load->getExtensionType(), SL, LoVT,
+                     Load->getChain(), BasePtr,
+                     SrcValue,
+                     LoMemVT, Load->isVolatile(), Load->isNonTemporal(),
+                     Load->isInvariant(), Load->getAlignment());
+
+  SDValue HiPtr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
+                              DAG.getConstant(LoMemVT.getStoreSize(), SL,
+                                              PtrVT));
+
+  SDValue HiLoad
+    = DAG.getExtLoad(Load->getExtensionType(), SL, HiVT,
+                     Load->getChain(), HiPtr,
+                     SrcValue.getWithOffset(LoMemVT.getStoreSize()),
+                     HiMemVT, Load->isVolatile(), Load->isNonTemporal(),
+                     Load->isInvariant(), Load->getAlignment());
+
+  SDValue Ops[] = {
+    DAG.getNode(ISD::CONCAT_VECTORS, SL, VT, LoLoad, HiLoad),
+    DAG.getNode(ISD::TokenFactor, SL, MVT::Other,
+                LoLoad.getValue(1), HiLoad.getValue(1))
+  };
+
+  return DAG.getMergeValues(Ops, SL);
+}
+
+SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
+                                               SelectionDAG &DAG) const {
+  StoreSDNode *Store = cast<StoreSDNode>(Op);
+  EVT MemVT = Store->getMemoryVT();
+  unsigned MemBits = MemVT.getSizeInBits();
+
+  // Byte stores are really expensive, so if possible, try to pack 32-bit vector
+  // truncating store into an i32 store.
+  // XXX: We could also handle optimize other vector bitwidths.
+  if (!MemVT.isVector() || MemBits > 32) {
+    return SDValue();
+  }
+
+  SDLoc DL(Op);
+  SDValue Value = Store->getValue();
+  EVT VT = Value.getValueType();
+  EVT ElemVT = VT.getVectorElementType();
+  SDValue Ptr = Store->getBasePtr();
+  EVT MemEltVT = MemVT.getVectorElementType();
+  unsigned MemEltBits = MemEltVT.getSizeInBits();
+  unsigned MemNumElements = MemVT.getVectorNumElements();
+  unsigned PackedSize = MemVT.getStoreSizeInBits();
+  SDValue Mask = DAG.getConstant((1 << MemEltBits) - 1, DL, MVT::i32);
+
+  assert(Value.getValueType().getScalarSizeInBits() >= 32);
+
+  SDValue PackedValue;
+  for (unsigned i = 0; i < MemNumElements; ++i) {
+    SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value,
+                              DAG.getConstant(i, DL, MVT::i32));
+    Elt = DAG.getZExtOrTrunc(Elt, DL, MVT::i32);
+    Elt = DAG.getNode(ISD::AND, DL, MVT::i32, Elt, Mask); // getZeroExtendInReg
+
+    SDValue Shift = DAG.getConstant(MemEltBits * i, DL, MVT::i32);
+    Elt = DAG.getNode(ISD::SHL, DL, MVT::i32, Elt, Shift);
+
+    if (i == 0) {
+      PackedValue = Elt;
+    } else {
+      PackedValue = DAG.getNode(ISD::OR, DL, MVT::i32, PackedValue, Elt);
+    }
+  }
+
+  if (PackedSize < 32) {
+    EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), PackedSize);
+    return DAG.getTruncStore(Store->getChain(), DL, PackedValue, Ptr,
+                             Store->getMemOperand()->getPointerInfo(),
+                             PackedVT,
+                             Store->isNonTemporal(), Store->isVolatile(),
+                             Store->getAlignment());
+  }
+
+  return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr,
+                      Store->getMemOperand()->getPointerInfo(),
+                      Store->isVolatile(),  Store->isNonTemporal(),
+                      Store->getAlignment());
+}
+
+SDValue AMDGPUTargetLowering::ScalarizeVectorStore(SDValue Op,
+                                                   SelectionDAG &DAG) const {
+  StoreSDNode *Store = cast<StoreSDNode>(Op);
+  EVT MemEltVT = Store->getMemoryVT().getVectorElementType();
+  EVT EltVT = Store->getValue().getValueType().getVectorElementType();
+  EVT PtrVT = Store->getBasePtr().getValueType();
+  unsigned NumElts = Store->getMemoryVT().getVectorNumElements();
+  SDLoc SL(Op);
+
+  SmallVector<SDValue, 8> Chains;
+
+  unsigned EltSize = MemEltVT.getStoreSize();
+  MachinePointerInfo SrcValue(Store->getMemOperand()->getValue());
+
+  for (unsigned i = 0, e = NumElts; i != e; ++i) {
+    SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
+                              Store->getValue(),
+                              DAG.getConstant(i, SL, MVT::i32));
+
+    SDValue Offset = DAG.getConstant(i * MemEltVT.getStoreSize(), SL, PtrVT);
+    SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Store->getBasePtr(), Offset);
+    SDValue NewStore =
+      DAG.getTruncStore(Store->getChain(), SL, Val, Ptr,
+                        SrcValue.getWithOffset(i * EltSize),
+                        MemEltVT, Store->isNonTemporal(), Store->isVolatile(),
+                        Store->getAlignment());
+    Chains.push_back(NewStore);
+  }
+
+  return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Chains);
+}
+
+SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  StoreSDNode *Store = cast<StoreSDNode>(Op);
+  SDValue Val = Store->getValue();
+  EVT VT = Val.getValueType();
+
+  // If this is a 2 element vector, we really want to scalarize and not create
+  // weird 1 element vectors.
+  if (VT.getVectorNumElements() == 2)
+    return ScalarizeVectorStore(Op, DAG);
+
+  EVT MemVT = Store->getMemoryVT();
+  SDValue Chain = Store->getChain();
+  SDValue BasePtr = Store->getBasePtr();
+  SDLoc SL(Op);
+
+  EVT LoVT, HiVT;
+  EVT LoMemVT, HiMemVT;
+  SDValue Lo, Hi;
+
+  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
+  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemVT);
+  std::tie(Lo, Hi) = DAG.SplitVector(Val, SL, LoVT, HiVT);
+
+  EVT PtrVT = BasePtr.getValueType();
+  SDValue HiPtr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
+                              DAG.getConstant(LoMemVT.getStoreSize(), SL,
+                                              PtrVT));
+
+  MachinePointerInfo SrcValue(Store->getMemOperand()->getValue());
+  SDValue LoStore
+    = DAG.getTruncStore(Chain, SL, Lo,
+                        BasePtr,
+                        SrcValue,
+                        LoMemVT,
+                        Store->isNonTemporal(),
+                        Store->isVolatile(),
+                        Store->getAlignment());
+  SDValue HiStore
+    = DAG.getTruncStore(Chain, SL, Hi,
+                        HiPtr,
+                        SrcValue.getWithOffset(LoMemVT.getStoreSize()),
+                        HiMemVT,
+                        Store->isNonTemporal(),
+                        Store->isVolatile(),
+                        Store->getAlignment());
+
+  return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoStore, HiStore);
+}
+
+
+SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  LoadSDNode *Load = cast<LoadSDNode>(Op);
+  ISD::LoadExtType ExtType = Load->getExtensionType();
+  EVT VT = Op.getValueType();
+  EVT MemVT = Load->getMemoryVT();
+
+  if (ExtType == ISD::NON_EXTLOAD && VT.getSizeInBits() < 32) {
+    assert(VT == MVT::i1 && "Only i1 non-extloads expected");
+    // FIXME: Copied from PPC
+    // First, load into 32 bits, then truncate to 1 bit.
+
+    SDValue Chain = Load->getChain();
+    SDValue BasePtr = Load->getBasePtr();
+    MachineMemOperand *MMO = Load->getMemOperand();
+
+    SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain,
+                                   BasePtr, MVT::i8, MMO);
+
+    SDValue Ops[] = {
+      DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD),
+      NewLD.getValue(1)
+    };
+
+    return DAG.getMergeValues(Ops, DL);
+  }
+
+  if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS ||
+      Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS ||
+      ExtType == ISD::NON_EXTLOAD || Load->getMemoryVT().bitsGE(MVT::i32))
+    return SDValue();
+
+  // <SI && AS=PRIVATE && EXTLOAD && size < 32bit,
+  // register (2-)byte extract.
+
+  // Get Register holding the target.
+  SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(),
+                            DAG.getConstant(2, DL, MVT::i32));
+  // Load the Register.
+  SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
+                            Load->getChain(), Ptr,
+                            DAG.getTargetConstant(0, DL, MVT::i32),
+                            Op.getOperand(2));
+
+  // Get offset within the register.
+  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
+                                Load->getBasePtr(),
+                                DAG.getConstant(0x3, DL, MVT::i32));
+
+  // Bit offset of target byte (byteIdx * 8).
+  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
+                                 DAG.getConstant(3, DL, MVT::i32));
+
+  // Shift to the right.
+  Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Ret, ShiftAmt);
+
+  // Eliminate the upper bits by setting them to ...
+  EVT MemEltVT = MemVT.getScalarType();
+
+  // ... ones.
+  if (ExtType == ISD::SEXTLOAD) {
+    SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
+
+    SDValue Ops[] = {
+      DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode),
+      Load->getChain()
+    };
+
+    return DAG.getMergeValues(Ops, DL);
+  }
+
+  // ... or zeros.
+  SDValue Ops[] = {
+    DAG.getZeroExtendInReg(Ret, DL, MemEltVT),
+    Load->getChain()
+  };
+
+  return DAG.getMergeValues(Ops, DL);
+}
+
+SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG);
+  if (Result.getNode()) {
+    return Result;
+  }
+
+  StoreSDNode *Store = cast<StoreSDNode>(Op);
+  SDValue Chain = Store->getChain();
+  if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
+       Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
+      Store->getValue().getValueType().isVector()) {
+    return ScalarizeVectorStore(Op, DAG);
+  }
+
+  EVT MemVT = Store->getMemoryVT();
+  if (Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS &&
+      MemVT.bitsLT(MVT::i32)) {
+    unsigned Mask = 0;
+    if (Store->getMemoryVT() == MVT::i8) {
+      Mask = 0xff;
+    } else if (Store->getMemoryVT() == MVT::i16) {
+      Mask = 0xffff;
+    }
+    SDValue BasePtr = Store->getBasePtr();
+    SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, BasePtr,
+                              DAG.getConstant(2, DL, MVT::i32));
+    SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
+                              Chain, Ptr,
+                              DAG.getTargetConstant(0, DL, MVT::i32));
+
+    SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, BasePtr,
+                                  DAG.getConstant(0x3, DL, MVT::i32));
+
+    SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
+                                   DAG.getConstant(3, DL, MVT::i32));
+
+    SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
+                                    Store->getValue());
+
+    SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
+
+    SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
+                                       MaskedValue, ShiftAmt);
+
+    SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32,
+                                  DAG.getConstant(Mask, DL, MVT::i32),
+                                  ShiftAmt);
+    DstMask = DAG.getNode(ISD::XOR, DL, MVT::i32, DstMask,
+                          DAG.getConstant(0xffffffff, DL, MVT::i32));
+    Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
+
+    SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
+    return DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
+                       Chain, Value, Ptr,
+                       DAG.getTargetConstant(0, DL, MVT::i32));
+  }
+  return SDValue();
+}
+
+// This is a shortcut for integer division because we have fast i32<->f32
+// conversions, and fast f32 reciprocal instructions. The fractional part of a
+// float is enough to accurately represent up to a 24-bit integer.
+SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const {
+  SDLoc DL(Op);
+  EVT VT = Op.getValueType();
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  MVT IntVT = MVT::i32;
+  MVT FltVT = MVT::f32;
+
+  ISD::NodeType ToFp  = sign ? ISD::SINT_TO_FP : ISD::UINT_TO_FP;
+  ISD::NodeType ToInt = sign ? ISD::FP_TO_SINT : ISD::FP_TO_UINT;
+
+  if (VT.isVector()) {
+    unsigned NElts = VT.getVectorNumElements();
+    IntVT = MVT::getVectorVT(MVT::i32, NElts);
+    FltVT = MVT::getVectorVT(MVT::f32, NElts);
+  }
+
+  unsigned BitSize = VT.getScalarType().getSizeInBits();
+
+  SDValue jq = DAG.getConstant(1, DL, IntVT);
+
+  if (sign) {
+    // char|short jq = ia ^ ib;
+    jq = DAG.getNode(ISD::XOR, DL, VT, LHS, RHS);
+
+    // jq = jq >> (bitsize - 2)
+    jq = DAG.getNode(ISD::SRA, DL, VT, jq,
+                     DAG.getConstant(BitSize - 2, DL, VT));
+
+    // jq = jq | 0x1
+    jq = DAG.getNode(ISD::OR, DL, VT, jq, DAG.getConstant(1, DL, VT));
+
+    // jq = (int)jq
+    jq = DAG.getSExtOrTrunc(jq, DL, IntVT);
+  }
+
+  // int ia = (int)LHS;
+  SDValue ia = sign ?
+    DAG.getSExtOrTrunc(LHS, DL, IntVT) : DAG.getZExtOrTrunc(LHS, DL, IntVT);
+
+  // int ib, (int)RHS;
+  SDValue ib = sign ?
+    DAG.getSExtOrTrunc(RHS, DL, IntVT) : DAG.getZExtOrTrunc(RHS, DL, IntVT);
+
+  // float fa = (float)ia;
+  SDValue fa = DAG.getNode(ToFp, DL, FltVT, ia);
+
+  // float fb = (float)ib;
+  SDValue fb = DAG.getNode(ToFp, DL, FltVT, ib);
+
+  // float fq = native_divide(fa, fb);
+  SDValue fq = DAG.getNode(ISD::FMUL, DL, FltVT,
+                           fa, DAG.getNode(AMDGPUISD::RCP, DL, FltVT, fb));
+
+  // fq = trunc(fq);
+  fq = DAG.getNode(ISD::FTRUNC, DL, FltVT, fq);
+
+  // float fqneg = -fq;
+  SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FltVT, fq);
+
+  // float fr = mad(fqneg, fb, fa);
+  SDValue fr = DAG.getNode(ISD::FADD, DL, FltVT,
+                           DAG.getNode(ISD::FMUL, DL, FltVT, fqneg, fb), fa);
+
+  // int iq = (int)fq;
+  SDValue iq = DAG.getNode(ToInt, DL, IntVT, fq);
+
+  // fr = fabs(fr);
+  fr = DAG.getNode(ISD::FABS, DL, FltVT, fr);
+
+  // fb = fabs(fb);
+  fb = DAG.getNode(ISD::FABS, DL, FltVT, fb);
+
+  EVT SetCCVT = getSetCCResultType(*DAG.getContext(), VT);
+
+  // int cv = fr >= fb;
+  SDValue cv = DAG.getSetCC(DL, SetCCVT, fr, fb, ISD::SETOGE);
+
+  // jq = (cv ? jq : 0);
+  jq = DAG.getNode(ISD::SELECT, DL, VT, cv, jq, DAG.getConstant(0, DL, VT));
+
+  // dst = trunc/extend to legal type
+  iq = sign ? DAG.getSExtOrTrunc(iq, DL, VT) : DAG.getZExtOrTrunc(iq, DL, VT);
+
+  // dst = iq + jq;
+  SDValue Div = DAG.getNode(ISD::ADD, DL, VT, iq, jq);
+
+  // Rem needs compensation, it's easier to recompute it
+  SDValue Rem = DAG.getNode(ISD::MUL, DL, VT, Div, RHS);
+  Rem = DAG.getNode(ISD::SUB, DL, VT, LHS, Rem);
+
+  SDValue Res[2] = {
+    Div,
+    Rem
+  };
+  return DAG.getMergeValues(Res, DL);
+}
+
+void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
+                                      SelectionDAG &DAG,
+                                      SmallVectorImpl<SDValue> &Results) const {
+  assert(Op.getValueType() == MVT::i64);
+
+  SDLoc DL(Op);
+  EVT VT = Op.getValueType();
+  EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
+
+  SDValue one = DAG.getConstant(1, DL, HalfVT);
+  SDValue zero = DAG.getConstant(0, DL, HalfVT);
+
+  //HiLo split
+  SDValue LHS = Op.getOperand(0);
+  SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, zero);
+  SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, one);
+
+  SDValue RHS = Op.getOperand(1);
+  SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero);
+  SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one);
+
+  if (VT == MVT::i64 &&
+    DAG.MaskedValueIsZero(RHS, APInt::getHighBitsSet(64, 32)) &&
+    DAG.MaskedValueIsZero(LHS, APInt::getHighBitsSet(64, 32))) {
+
+    SDValue Res = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(HalfVT, HalfVT),
+                              LHS_Lo, RHS_Lo);
+
+    SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, Res.getValue(0), zero);
+    SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, Res.getValue(1), zero);
+    Results.push_back(DIV);
+    Results.push_back(REM);
+    return;
+  }
+
+  // Get Speculative values
+  SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo);
+  SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo);
+
+  SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, zero, REM_Part, LHS_Hi, ISD::SETEQ);
+  SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, zero);
+
+  SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, zero, DIV_Part, zero, ISD::SETEQ);
+  SDValue DIV_Lo = zero;
+
+  const unsigned halfBitWidth = HalfVT.getSizeInBits();
+
+  for (unsigned i = 0; i < halfBitWidth; ++i) {
+    const unsigned bitPos = halfBitWidth - i - 1;
+    SDValue POS = DAG.getConstant(bitPos, DL, HalfVT);
+    // Get value of high bit
+    SDValue HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
+    HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one);
+    HBit = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, HBit);
+
+    // Shift
+    REM = DAG.getNode(ISD::SHL, DL, VT, REM, DAG.getConstant(1, DL, VT));
+    // Add LHS high bit
+    REM = DAG.getNode(ISD::OR, DL, VT, REM, HBit);
+
+    SDValue BIT = DAG.getConstant(1 << bitPos, DL, HalfVT);
+    SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, zero, ISD::SETUGE);
+
+    DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT);
+
+    // Update REM
+    SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS);
+    REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETUGE);
+  }
+
+  SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, DIV_Lo, DIV_Hi);
+  Results.push_back(DIV);
+  Results.push_back(REM);
+}
+
+SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  EVT VT = Op.getValueType();
+
+  if (VT == MVT::i64) {
+    SmallVector<SDValue, 2> Results;
+    LowerUDIVREM64(Op, DAG, Results);
+    return DAG.getMergeValues(Results, DL);
+  }
+
+  SDValue Num = Op.getOperand(0);
+  SDValue Den = Op.getOperand(1);
+
+  if (VT == MVT::i32) {
+    if (DAG.MaskedValueIsZero(Num, APInt::getHighBitsSet(32, 8)) &&
+        DAG.MaskedValueIsZero(Den, APInt::getHighBitsSet(32, 8))) {
+      // TODO: We technically could do this for i64, but shouldn't that just be
+      // handled by something generally reducing 64-bit division on 32-bit
+      // values to 32-bit?
+      return LowerDIVREM24(Op, DAG, false);
+    }
+  }
+
+  // RCP =  URECIP(Den) = 2^32 / Den + e
+  // e is rounding error.
+  SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
+
+  // RCP_LO = mul(RCP, Den) */
+  SDValue RCP_LO = DAG.getNode(ISD::MUL, DL, VT, RCP, Den);
+
+  // RCP_HI = mulhu (RCP, Den) */
+  SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
+
+  // NEG_RCP_LO = -RCP_LO
+  SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+                                                     RCP_LO);
+
+  // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
+  SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, DL, VT),
+                                           NEG_RCP_LO, RCP_LO,
+                                           ISD::SETEQ);
+  // Calculate the rounding error from the URECIP instruction
+  // E = mulhu(ABS_RCP_LO, RCP)
+  SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
+
+  // RCP_A_E = RCP + E
+  SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
+
+  // RCP_S_E = RCP - E
+  SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
+
+  // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
+  SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, DL, VT),
+                                     RCP_A_E, RCP_S_E,
+                                     ISD::SETEQ);
+  // Quotient = mulhu(Tmp0, Num)
+  SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
+
+  // Num_S_Remainder = Quotient * Den
+  SDValue Num_S_Remainder = DAG.getNode(ISD::MUL, DL, VT, Quotient, Den);
+
+  // Remainder = Num - Num_S_Remainder
+  SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
+
+  // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
+  SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
+                                                 DAG.getConstant(-1, DL, VT),
+                                                 DAG.getConstant(0, DL, VT),
+                                                 ISD::SETUGE);
+  // Remainder_GE_Zero = (Num >= Num_S_Remainder ? -1 : 0)
+  SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Num,
+                                                  Num_S_Remainder,
+                                                  DAG.getConstant(-1, DL, VT),
+                                                  DAG.getConstant(0, DL, VT),
+                                                  ISD::SETUGE);
+  // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
+  SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
+                                               Remainder_GE_Zero);
+
+  // Calculate Division result:
+
+  // Quotient_A_One = Quotient + 1
+  SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
+                                       DAG.getConstant(1, DL, VT));
+
+  // Quotient_S_One = Quotient - 1
+  SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
+                                       DAG.getConstant(1, DL, VT));
+
+  // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
+  SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, DL, VT),
+                                     Quotient, Quotient_A_One, ISD::SETEQ);
+
+  // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
+  Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, DL, VT),
+                            Quotient_S_One, Div, ISD::SETEQ);
+
+  // Calculate Rem result:
+
+  // Remainder_S_Den = Remainder - Den
+  SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
+
+  // Remainder_A_Den = Remainder + Den
+  SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
+
+  // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
+  SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, DL, VT),
+                                    Remainder, Remainder_S_Den, ISD::SETEQ);
+
+  // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
+  Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, DL, VT),
+                            Remainder_A_Den, Rem, ISD::SETEQ);
+  SDValue Ops[2] = {
+    Div,
+    Rem
+  };
+  return DAG.getMergeValues(Ops, DL);
+}
+
+SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  EVT VT = Op.getValueType();
+
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+
+  SDValue Zero = DAG.getConstant(0, DL, VT);
+  SDValue NegOne = DAG.getConstant(-1, DL, VT);
+
+  if (VT == MVT::i32 &&
+      DAG.ComputeNumSignBits(LHS) > 8 &&
+      DAG.ComputeNumSignBits(RHS) > 8) {
+    return LowerDIVREM24(Op, DAG, true);
+  }
+  if (VT == MVT::i64 &&
+      DAG.ComputeNumSignBits(LHS) > 32 &&
+      DAG.ComputeNumSignBits(RHS) > 32) {
+    EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
+
+    //HiLo split
+    SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, Zero);
+    SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, Zero);
+    SDValue DIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(HalfVT, HalfVT),
+                                 LHS_Lo, RHS_Lo);
+    SDValue Res[2] = {
+      DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(0)),
+      DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(1))
+    };
+    return DAG.getMergeValues(Res, DL);
+  }
+
+  SDValue LHSign = DAG.getSelectCC(DL, LHS, Zero, NegOne, Zero, ISD::SETLT);
+  SDValue RHSign = DAG.getSelectCC(DL, RHS, Zero, NegOne, Zero, ISD::SETLT);
+  SDValue DSign = DAG.getNode(ISD::XOR, DL, VT, LHSign, RHSign);
+  SDValue RSign = LHSign; // Remainder sign is the same as LHS
+
+  LHS = DAG.getNode(ISD::ADD, DL, VT, LHS, LHSign);
+  RHS = DAG.getNode(ISD::ADD, DL, VT, RHS, RHSign);
+
+  LHS = DAG.getNode(ISD::XOR, DL, VT, LHS, LHSign);
+  RHS = DAG.getNode(ISD::XOR, DL, VT, RHS, RHSign);
+
+  SDValue Div = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT), LHS, RHS);
+  SDValue Rem = Div.getValue(1);
+
+  Div = DAG.getNode(ISD::XOR, DL, VT, Div, DSign);
+  Rem = DAG.getNode(ISD::XOR, DL, VT, Rem, RSign);
+
+  Div = DAG.getNode(ISD::SUB, DL, VT, Div, DSign);
+  Rem = DAG.getNode(ISD::SUB, DL, VT, Rem, RSign);
+
+  SDValue Res[2] = {
+    Div,
+    Rem
+  };
+  return DAG.getMergeValues(Res, DL);
+}
+
+// (frem x, y) -> (fsub x, (fmul (ftrunc (fdiv x, y)), y))
+SDValue AMDGPUTargetLowering::LowerFREM(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc SL(Op);
+  EVT VT = Op.getValueType();
+  SDValue X = Op.getOperand(0);
+  SDValue Y = Op.getOperand(1);
+
+  SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y);
+  SDValue Floor = DAG.getNode(ISD::FTRUNC, SL, VT, Div);
+  SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Floor, Y);
+
+  return DAG.getNode(ISD::FSUB, SL, VT, X, Mul);
+}
+
+SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc SL(Op);
+  SDValue Src = Op.getOperand(0);
+
+  // result = trunc(src)
+  // if (src > 0.0 && src != result)
+  //   result += 1.0
+
+  SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
+
+  const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64);
+  const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f64);
+
+  EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
+
+  SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOGT);
+  SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
+  SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
+
+  SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, One, Zero);
+  return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
+}
+
+static SDValue extractF64Exponent(SDValue Hi, SDLoc SL, SelectionDAG &DAG) {
+  const unsigned FractBits = 52;
+  const unsigned ExpBits = 11;
+
+  SDValue ExpPart = DAG.getNode(AMDGPUISD::BFE_U32, SL, MVT::i32,
+                                Hi,
+                                DAG.getConstant(FractBits - 32, SL, MVT::i32),
+                                DAG.getConstant(ExpBits, SL, MVT::i32));
+  SDValue Exp = DAG.getNode(ISD::SUB, SL, MVT::i32, ExpPart,
+                            DAG.getConstant(1023, SL, MVT::i32));
+
+  return Exp;
+}
+
+SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc SL(Op);
+  SDValue Src = Op.getOperand(0);
+
+  assert(Op.getValueType() == MVT::f64);
+
+  const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
+  const SDValue One = DAG.getConstant(1, SL, MVT::i32);
+
+  SDValue VecSrc = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
+
+  // Extract the upper half, since this is where we will find the sign and
+  // exponent.
+  SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, VecSrc, One);
+
+  SDValue Exp = extractF64Exponent(Hi, SL, DAG);
+
+  const unsigned FractBits = 52;
+
+  // Extract the sign bit.
+  const SDValue SignBitMask = DAG.getConstant(UINT32_C(1) << 31, SL, MVT::i32);
+  SDValue SignBit = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, SignBitMask);
+
+  // Extend back to to 64-bits.
+  SDValue SignBit64 = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
+                                  Zero, SignBit);
+  SignBit64 = DAG.getNode(ISD::BITCAST, SL, MVT::i64, SignBit64);
+
+  SDValue BcInt = DAG.getNode(ISD::BITCAST, SL, MVT::i64, Src);
+  const SDValue FractMask
+    = DAG.getConstant((UINT64_C(1) << FractBits) - 1, SL, MVT::i64);
+
+  SDValue Shr = DAG.getNode(ISD::SRA, SL, MVT::i64, FractMask, Exp);
+  SDValue Not = DAG.getNOT(SL, Shr, MVT::i64);
+  SDValue Tmp0 = DAG.getNode(ISD::AND, SL, MVT::i64, BcInt, Not);
+
+  EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::i32);
+
+  const SDValue FiftyOne = DAG.getConstant(FractBits - 1, SL, MVT::i32);
+
+  SDValue ExpLt0 = DAG.getSetCC(SL, SetCCVT, Exp, Zero, ISD::SETLT);
+  SDValue ExpGt51 = DAG.getSetCC(SL, SetCCVT, Exp, FiftyOne, ISD::SETGT);
+
+  SDValue Tmp1 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpLt0, SignBit64, Tmp0);
+  SDValue Tmp2 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpGt51, BcInt, Tmp1);
+
+  return DAG.getNode(ISD::BITCAST, SL, MVT::f64, Tmp2);
+}
+
+SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc SL(Op);
+  SDValue Src = Op.getOperand(0);
+
+  assert(Op.getValueType() == MVT::f64);
+
+  APFloat C1Val(APFloat::IEEEdouble, "0x1.0p+52");
+  SDValue C1 = DAG.getConstantFP(C1Val, SL, MVT::f64);
+  SDValue CopySign = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, C1, Src);
+
+  SDValue Tmp1 = DAG.getNode(ISD::FADD, SL, MVT::f64, Src, CopySign);
+  SDValue Tmp2 = DAG.getNode(ISD::FSUB, SL, MVT::f64, Tmp1, CopySign);
+
+  SDValue Fabs = DAG.getNode(ISD::FABS, SL, MVT::f64, Src);
+
+  APFloat C2Val(APFloat::IEEEdouble, "0x1.fffffffffffffp+51");
+  SDValue C2 = DAG.getConstantFP(C2Val, SL, MVT::f64);
+
+  EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
+  SDValue Cond = DAG.getSetCC(SL, SetCCVT, Fabs, C2, ISD::SETOGT);
+
+  return DAG.getSelect(SL, MVT::f64, Cond, Src, Tmp2);
+}
+
+SDValue AMDGPUTargetLowering::LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const {
+  // FNEARBYINT and FRINT are the same, except in their handling of FP
+  // exceptions. Those aren't really meaningful for us, and OpenCL only has
+  // rint, so just treat them as equivalent.
+  return DAG.getNode(ISD::FRINT, SDLoc(Op), Op.getValueType(), Op.getOperand(0));
+}
+
+// XXX - May require not supporting f32 denormals?
+SDValue AMDGPUTargetLowering::LowerFROUND32(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc SL(Op);
+  SDValue X = Op.getOperand(0);
+
+  SDValue T = DAG.getNode(ISD::FTRUNC, SL, MVT::f32, X);
+
+  SDValue Diff = DAG.getNode(ISD::FSUB, SL, MVT::f32, X, T);
+
+  SDValue AbsDiff = DAG.getNode(ISD::FABS, SL, MVT::f32, Diff);
+
+  const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f32);
+  const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);
+  const SDValue Half = DAG.getConstantFP(0.5, SL, MVT::f32);
+
+  SDValue SignOne = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f32, One, X);
+
+  EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f32);
+
+  SDValue Cmp = DAG.getSetCC(SL, SetCCVT, AbsDiff, Half, ISD::SETOGE);
+
+  SDValue Sel = DAG.getNode(ISD::SELECT, SL, MVT::f32, Cmp, SignOne, Zero);
+
+  return DAG.getNode(ISD::FADD, SL, MVT::f32, T, Sel);
+}
+
+SDValue AMDGPUTargetLowering::LowerFROUND64(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc SL(Op);
+  SDValue X = Op.getOperand(0);
+
+  SDValue L = DAG.getNode(ISD::BITCAST, SL, MVT::i64, X);
+
+  const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
+  const SDValue One = DAG.getConstant(1, SL, MVT::i32);
+  const SDValue NegOne = DAG.getConstant(-1, SL, MVT::i32);
+  const SDValue FiftyOne = DAG.getConstant(51, SL, MVT::i32);
+  EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::i32);
+
+
+  SDValue BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, X);
+
+  SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC, One);
+
+  SDValue Exp = extractF64Exponent(Hi, SL, DAG);
+
+  const SDValue Mask = DAG.getConstant(INT64_C(0x000fffffffffffff), SL,
+                                       MVT::i64);
+
+  SDValue M = DAG.getNode(ISD::SRA, SL, MVT::i64, Mask, Exp);
+  SDValue D = DAG.getNode(ISD::SRA, SL, MVT::i64,
+                          DAG.getConstant(INT64_C(0x0008000000000000), SL,
+                                          MVT::i64),
+                          Exp);
+
+  SDValue Tmp0 = DAG.getNode(ISD::AND, SL, MVT::i64, L, M);
+  SDValue Tmp1 = DAG.getSetCC(SL, SetCCVT,
+                              DAG.getConstant(0, SL, MVT::i64), Tmp0,
+                              ISD::SETNE);
+
+  SDValue Tmp2 = DAG.getNode(ISD::SELECT, SL, MVT::i64, Tmp1,
+                             D, DAG.getConstant(0, SL, MVT::i64));
+  SDValue K = DAG.getNode(ISD::ADD, SL, MVT::i64, L, Tmp2);
+
+  K = DAG.getNode(ISD::AND, SL, MVT::i64, K, DAG.getNOT(SL, M, MVT::i64));
+  K = DAG.getNode(ISD::BITCAST, SL, MVT::f64, K);
+
+  SDValue ExpLt0 = DAG.getSetCC(SL, SetCCVT, Exp, Zero, ISD::SETLT);
+  SDValue ExpGt51 = DAG.getSetCC(SL, SetCCVT, Exp, FiftyOne, ISD::SETGT);
+  SDValue ExpEqNegOne = DAG.getSetCC(SL, SetCCVT, NegOne, Exp, ISD::SETEQ);
+
+  SDValue Mag = DAG.getNode(ISD::SELECT, SL, MVT::f64,
+                            ExpEqNegOne,
+                            DAG.getConstantFP(1.0, SL, MVT::f64),
+                            DAG.getConstantFP(0.0, SL, MVT::f64));
+
+  SDValue S = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, Mag, X);
+
+  K = DAG.getNode(ISD::SELECT, SL, MVT::f64, ExpLt0, S, K);
+  K = DAG.getNode(ISD::SELECT, SL, MVT::f64, ExpGt51, X, K);
+
+  return K;
+}
+
+SDValue AMDGPUTargetLowering::LowerFROUND(SDValue Op, SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+
+  if (VT == MVT::f32)
+    return LowerFROUND32(Op, DAG);
+
+  if (VT == MVT::f64)
+    return LowerFROUND64(Op, DAG);
+
+  llvm_unreachable("unhandled type");
+}
+
+SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc SL(Op);
+  SDValue Src = Op.getOperand(0);
+
+  // result = trunc(src);
+  // if (src < 0.0 && src != result)
+  //   result += -1.0.
+
+  SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
+
+  const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64);
+  const SDValue NegOne = DAG.getConstantFP(-1.0, SL, MVT::f64);
+
+  EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
+
+  SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOLT);
+  SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
+  SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
+
+  SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, NegOne, Zero);
+  return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
+}
+
+SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG,
+                                               bool Signed) const {
+  SDLoc SL(Op);
+  SDValue Src = Op.getOperand(0);
+
+  SDValue BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
+
+  SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC,
+                           DAG.getConstant(0, SL, MVT::i32));
+  SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC,
+                           DAG.getConstant(1, SL, MVT::i32));
+
+  SDValue CvtHi = DAG.getNode(Signed ? ISD::SINT_TO_FP : ISD::UINT_TO_FP,
+                              SL, MVT::f64, Hi);
+
+  SDValue CvtLo = DAG.getNode(ISD::UINT_TO_FP, SL, MVT::f64, Lo);
+
+  SDValue LdExp = DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f64, CvtHi,
+                              DAG.getConstant(32, SL, MVT::i32));
+
+  return DAG.getNode(ISD::FADD, SL, MVT::f64, LdExp, CvtLo);
+}
+
+SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  SDValue S0 = Op.getOperand(0);
+  if (S0.getValueType() != MVT::i64)
+    return SDValue();
+
+  EVT DestVT = Op.getValueType();
+  if (DestVT == MVT::f64)
+    return LowerINT_TO_FP64(Op, DAG, false);
+
+  assert(DestVT == MVT::f32);
+
+  SDLoc DL(Op);
+
+  // f32 uint_to_fp i64
+  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
+                           DAG.getConstant(0, DL, MVT::i32));
+  SDValue FloatLo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Lo);
+  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
+                           DAG.getConstant(1, DL, MVT::i32));
+  SDValue FloatHi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Hi);
+  FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi,
+                        DAG.getConstantFP(4294967296.0f, DL, MVT::f32)); // 2^32
+  return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi);
+}
+
+SDValue AMDGPUTargetLowering::LowerSINT_TO_FP(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  SDValue Src = Op.getOperand(0);
+  if (Src.getValueType() == MVT::i64 && Op.getValueType() == MVT::f64)
+    return LowerINT_TO_FP64(Op, DAG, true);
+
+  return SDValue();
+}
+
+SDValue AMDGPUTargetLowering::LowerFP64_TO_INT(SDValue Op, SelectionDAG &DAG,
+                                               bool Signed) const {
+  SDLoc SL(Op);
+
+  SDValue Src = Op.getOperand(0);
+
+  SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
+
+  SDValue K0 = DAG.getConstantFP(BitsToDouble(UINT64_C(0x3df0000000000000)), SL,
+                                 MVT::f64);
+  SDValue K1 = DAG.getConstantFP(BitsToDouble(UINT64_C(0xc1f0000000000000)), SL,
+                                 MVT::f64);
+
+  SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, Trunc, K0);
+
+  SDValue FloorMul = DAG.getNode(ISD::FFLOOR, SL, MVT::f64, Mul);
+
+
+  SDValue Fma = DAG.getNode(ISD::FMA, SL, MVT::f64, FloorMul, K1, Trunc);
+
+  SDValue Hi = DAG.getNode(Signed ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, SL,
+                           MVT::i32, FloorMul);
+  SDValue Lo = DAG.getNode(ISD::FP_TO_UINT, SL, MVT::i32, Fma);
+
+  SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, Lo, Hi);
+
+  return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Result);
+}
+
+SDValue AMDGPUTargetLowering::LowerFP_TO_SINT(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  SDValue Src = Op.getOperand(0);
+
+  if (Op.getValueType() == MVT::i64 && Src.getValueType() == MVT::f64)
+    return LowerFP64_TO_INT(Op, DAG, true);
+
+  return SDValue();
+}
+
+SDValue AMDGPUTargetLowering::LowerFP_TO_UINT(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  SDValue Src = Op.getOperand(0);
+
+  if (Op.getValueType() == MVT::i64 && Src.getValueType() == MVT::f64)
+    return LowerFP64_TO_INT(Op, DAG, false);
+
+  return SDValue();
+}
+
+SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
+                                                     SelectionDAG &DAG) const {
+  EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+  MVT VT = Op.getSimpleValueType();
+  MVT ScalarVT = VT.getScalarType();
+
+  if (!VT.isVector())
+    return SDValue();
+
+  SDValue Src = Op.getOperand(0);
+  SDLoc DL(Op);
+
+  // TODO: Don't scalarize on Evergreen?
+  unsigned NElts = VT.getVectorNumElements();
+  SmallVector<SDValue, 8> Args;
+  DAG.ExtractVectorElements(Src, Args, 0, NElts);
+
+  SDValue VTOp = DAG.getValueType(ExtraVT.getScalarType());
+  for (unsigned I = 0; I < NElts; ++I)
+    Args[I] = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ScalarVT, Args[I], VTOp);
+
+  return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Args);
+}
+
+//===----------------------------------------------------------------------===//
+// Custom DAG optimizations
+//===----------------------------------------------------------------------===//
+
+static bool isU24(SDValue Op, SelectionDAG &DAG) {
+  APInt KnownZero, KnownOne;
+  EVT VT = Op.getValueType();
+  DAG.computeKnownBits(Op, KnownZero, KnownOne);
+
+  return (VT.getSizeInBits() - KnownZero.countLeadingOnes()) <= 24;
+}
+
+static bool isI24(SDValue Op, SelectionDAG &DAG) {
+  EVT VT = Op.getValueType();
+
+  // In order for this to be a signed 24-bit value, bit 23, must
+  // be a sign bit.
+  return VT.getSizeInBits() >= 24 && // Types less than 24-bit should be treated
+                                     // as unsigned 24-bit values.
+         (VT.getSizeInBits() - DAG.ComputeNumSignBits(Op)) < 24;
+}
+
+static void simplifyI24(SDValue Op, TargetLowering::DAGCombinerInfo &DCI) {
+
+  SelectionDAG &DAG = DCI.DAG;
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  EVT VT = Op.getValueType();
+
+  APInt Demanded = APInt::getLowBitsSet(VT.getSizeInBits(), 24);
+  APInt KnownZero, KnownOne;
+  TargetLowering::TargetLoweringOpt TLO(DAG, true, true);
+  if (TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
+    DCI.CommitTargetLoweringOpt(TLO);
+}
+
+template <typename IntTy>
+static SDValue constantFoldBFE(SelectionDAG &DAG, IntTy Src0,
+                               uint32_t Offset, uint32_t Width, SDLoc DL) {
+  if (Width + Offset < 32) {
+    uint32_t Shl = static_cast<uint32_t>(Src0) << (32 - Offset - Width);
+    IntTy Result = static_cast<IntTy>(Shl) >> (32 - Width);
+    return DAG.getConstant(Result, DL, MVT::i32);
+  }
+
+  return DAG.getConstant(Src0 >> Offset, DL, MVT::i32);
+}
+
+static bool usesAllNormalStores(SDNode *LoadVal) {
+  for (SDNode::use_iterator I = LoadVal->use_begin(); !I.atEnd(); ++I) {
+    if (!ISD::isNormalStore(*I))
+      return false;
+  }
+
+  return true;
+}
+
+// If we have a copy of an illegal type, replace it with a load / store of an
+// equivalently sized legal type. This avoids intermediate bit pack / unpack
+// instructions emitted when handling extloads and truncstores. Ideally we could
+// recognize the pack / unpack pattern to eliminate it.
+SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
+                                                  DAGCombinerInfo &DCI) const {
+  if (!DCI.isBeforeLegalize())
+    return SDValue();
+
+  StoreSDNode *SN = cast<StoreSDNode>(N);
+  SDValue Value = SN->getValue();
+  EVT VT = Value.getValueType();
+
+  if (isTypeLegal(VT) || SN->isVolatile() ||
+      !ISD::isNormalLoad(Value.getNode()) || VT.getSizeInBits() < 8)
+    return SDValue();
+
+  LoadSDNode *LoadVal = cast<LoadSDNode>(Value);
+  if (LoadVal->isVolatile() || !usesAllNormalStores(LoadVal))
+    return SDValue();
+
+  EVT MemVT = LoadVal->getMemoryVT();
+
+  SDLoc SL(N);
+  SelectionDAG &DAG = DCI.DAG;
+  EVT LoadVT = getEquivalentMemType(*DAG.getContext(), MemVT);
+
+  SDValue NewLoad = DAG.getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD,
+                                LoadVT, SL,
+                                LoadVal->getChain(),
+                                LoadVal->getBasePtr(),
+                                LoadVal->getOffset(),
+                                LoadVT,
+                                LoadVal->getMemOperand());
+
+  SDValue CastLoad = DAG.getNode(ISD::BITCAST, SL, VT, NewLoad.getValue(0));
+  DCI.CombineTo(LoadVal, CastLoad, NewLoad.getValue(1), false);
+
+  return DAG.getStore(SN->getChain(), SL, NewLoad,
+                      SN->getBasePtr(), SN->getMemOperand());
+}
+
+SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N,
+                                                DAGCombinerInfo &DCI) const {
+  EVT VT = N->getValueType(0);
+
+  if (VT.isVector() || VT.getSizeInBits() > 32)
+    return SDValue();
+
+  SelectionDAG &DAG = DCI.DAG;
+  SDLoc DL(N);
+
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue Mul;
+
+  if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {
+    N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
+    N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
+    Mul = DAG.getNode(AMDGPUISD::MUL_U24, DL, MVT::i32, N0, N1);
+  } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {
+    N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
+    N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
+    Mul = DAG.getNode(AMDGPUISD::MUL_I24, DL, MVT::i32, N0, N1);
+  } else {
+    return SDValue();
+  }
+
+  // We need to use sext even for MUL_U24, because MUL_U24 is used
+  // for signed multiply of 8 and 16-bit types.
+  return DAG.getSExtOrTrunc(Mul, DL, VT);
+}
+
+SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
+                                                DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+  SDLoc DL(N);
+
+  switch(N->getOpcode()) {
+    default: break;
+    case ISD::MUL:
+      return performMulCombine(N, DCI);
+    case AMDGPUISD::MUL_I24:
+    case AMDGPUISD::MUL_U24: {
+      SDValue N0 = N->getOperand(0);
+      SDValue N1 = N->getOperand(1);
+      simplifyI24(N0, DCI);
+      simplifyI24(N1, DCI);
+      return SDValue();
+    }
+  case ISD::SELECT: {
+    SDValue Cond = N->getOperand(0);
+    if (Cond.getOpcode() == ISD::SETCC && Cond.hasOneUse()) {
+      EVT VT = N->getValueType(0);
+      SDValue LHS = Cond.getOperand(0);
+      SDValue RHS = Cond.getOperand(1);
+      SDValue CC = Cond.getOperand(2);
+
+      SDValue True = N->getOperand(1);
+      SDValue False = N->getOperand(2);
+
+      if (VT == MVT::f32)
+        return CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
+
+      // TODO: Implement min / max Evergreen instructions.
+      if (VT == MVT::i32 &&
+          Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+        return CombineIMinMax(DL, VT, LHS, RHS, True, False, CC, DAG);
+      }
+    }
+
+    break;
+  }
+  case AMDGPUISD::BFE_I32:
+  case AMDGPUISD::BFE_U32: {
+    assert(!N->getValueType(0).isVector() &&
+           "Vector handling of BFE not implemented");
+    ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
+    if (!Width)
+      break;
+
+    uint32_t WidthVal = Width->getZExtValue() & 0x1f;
+    if (WidthVal == 0)
+      return DAG.getConstant(0, DL, MVT::i32);
+
+    ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+    if (!Offset)
+      break;
+
+    SDValue BitsFrom = N->getOperand(0);
+    uint32_t OffsetVal = Offset->getZExtValue() & 0x1f;
+
+    bool Signed = N->getOpcode() == AMDGPUISD::BFE_I32;
+
+    if (OffsetVal == 0) {
+      // This is already sign / zero extended, so try to fold away extra BFEs.
+      unsigned SignBits =  Signed ? (32 - WidthVal + 1) : (32 - WidthVal);
+
+      unsigned OpSignBits = DAG.ComputeNumSignBits(BitsFrom);
+      if (OpSignBits >= SignBits)
+        return BitsFrom;
+
+      EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), WidthVal);
+      if (Signed) {
+        // This is a sign_extend_inreg. Replace it to take advantage of existing
+        // DAG Combines. If not eliminated, we will match back to BFE during
+        // selection.
+
+        // TODO: The sext_inreg of extended types ends, although we can could
+        // handle them in a single BFE.
+        return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, BitsFrom,
+                           DAG.getValueType(SmallVT));
+      }
+
+      return DAG.getZeroExtendInReg(BitsFrom, DL, SmallVT);
+    }
+
+    if (ConstantSDNode *CVal = dyn_cast<ConstantSDNode>(BitsFrom)) {
+      if (Signed) {
+        return constantFoldBFE<int32_t>(DAG,
+                                        CVal->getSExtValue(),
+                                        OffsetVal,
+                                        WidthVal,
+                                        DL);
+      }
+
+      return constantFoldBFE<uint32_t>(DAG,
+                                       CVal->getZExtValue(),
+                                       OffsetVal,
+                                       WidthVal,
+                                       DL);
+    }
+
+    if ((OffsetVal + WidthVal) >= 32) {
+      SDValue ShiftVal = DAG.getConstant(OffsetVal, DL, MVT::i32);
+      return DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, MVT::i32,
+                         BitsFrom, ShiftVal);
+    }
+
+    if (BitsFrom.hasOneUse()) {
+      APInt Demanded = APInt::getBitsSet(32,
+                                         OffsetVal,
+                                         OffsetVal + WidthVal);
+
+      APInt KnownZero, KnownOne;
+      TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+                                            !DCI.isBeforeLegalizeOps());
+      const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+      if (TLO.ShrinkDemandedConstant(BitsFrom, Demanded) ||
+          TLI.SimplifyDemandedBits(BitsFrom, Demanded,
+                                   KnownZero, KnownOne, TLO)) {
+        DCI.CommitTargetLoweringOpt(TLO);
+      }
+    }
+
+    break;
+  }
+
+  case ISD::STORE:
+    return performStoreCombine(N, DCI);
+  }
+  return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Helper functions
+//===----------------------------------------------------------------------===//
+
+void AMDGPUTargetLowering::getOriginalFunctionArgs(
+                               SelectionDAG &DAG,
+                               const Function *F,
+                               const SmallVectorImpl<ISD::InputArg> &Ins,
+                               SmallVectorImpl<ISD::InputArg> &OrigIns) const {
+
+  for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
+    if (Ins[i].ArgVT == Ins[i].VT) {
+      OrigIns.push_back(Ins[i]);
+      continue;
+    }
+
+    EVT VT;
+    if (Ins[i].ArgVT.isVector() && !Ins[i].VT.isVector()) {
+      // Vector has been split into scalars.
+      VT = Ins[i].ArgVT.getVectorElementType();
+    } else if (Ins[i].VT.isVector() && Ins[i].ArgVT.isVector() &&
+               Ins[i].ArgVT.getVectorElementType() !=
+               Ins[i].VT.getVectorElementType()) {
+      // Vector elements have been promoted
+      VT = Ins[i].ArgVT;
+    } else {
+      // Vector has been spilt into smaller vectors.
+      VT = Ins[i].VT;
+    }
+
+    ISD::InputArg Arg(Ins[i].Flags, VT, VT, Ins[i].Used,
+                      Ins[i].OrigArgIndex, Ins[i].PartOffset);
+    OrigIns.push_back(Arg);
+  }
+}
+
+bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
+  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
+    return CFP->isExactlyValue(1.0);
+  }
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+    return C->isAllOnesValue();
+  }
+  return false;
+}
+
+bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const {
+  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
+    return CFP->getValueAPF().isZero();
+  }
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+    return C->isNullValue();
+  }
+  return false;
+}
+
+SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
+                                                  const TargetRegisterClass *RC,
+                                                   unsigned Reg, EVT VT) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  unsigned VirtualRegister;
+  if (!MRI.isLiveIn(Reg)) {
+    VirtualRegister = MRI.createVirtualRegister(RC);
+    MRI.addLiveIn(Reg, VirtualRegister);
+  } else {
+    VirtualRegister = MRI.getLiveInVirtReg(Reg);
+  }
+  return DAG.getRegister(VirtualRegister, VT);
+}
+
+#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
+
+const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch ((AMDGPUISD::NodeType)Opcode) {
+  case AMDGPUISD::FIRST_NUMBER: break;
+  // AMDIL DAG nodes
+  NODE_NAME_CASE(CALL);
+  NODE_NAME_CASE(UMUL);
+  NODE_NAME_CASE(RET_FLAG);
+  NODE_NAME_CASE(BRANCH_COND);
+
+  // AMDGPU DAG nodes
+  NODE_NAME_CASE(DWORDADDR)
+  NODE_NAME_CASE(FRACT)
+  NODE_NAME_CASE(CLAMP)
+  NODE_NAME_CASE(COS_HW)
+  NODE_NAME_CASE(SIN_HW)
+  NODE_NAME_CASE(FMAX_LEGACY)
+  NODE_NAME_CASE(FMIN_LEGACY)
+  NODE_NAME_CASE(FMAX3)
+  NODE_NAME_CASE(SMAX3)
+  NODE_NAME_CASE(UMAX3)
+  NODE_NAME_CASE(FMIN3)
+  NODE_NAME_CASE(SMIN3)
+  NODE_NAME_CASE(UMIN3)
+  NODE_NAME_CASE(URECIP)
+  NODE_NAME_CASE(DIV_SCALE)
+  NODE_NAME_CASE(DIV_FMAS)
+  NODE_NAME_CASE(DIV_FIXUP)
+  NODE_NAME_CASE(TRIG_PREOP)
+  NODE_NAME_CASE(RCP)
+  NODE_NAME_CASE(RSQ)
+  NODE_NAME_CASE(RSQ_LEGACY)
+  NODE_NAME_CASE(RSQ_CLAMPED)
+  NODE_NAME_CASE(LDEXP)
+  NODE_NAME_CASE(FP_CLASS)
+  NODE_NAME_CASE(DOT4)
+  NODE_NAME_CASE(CARRY)
+  NODE_NAME_CASE(BORROW)
+  NODE_NAME_CASE(BFE_U32)
+  NODE_NAME_CASE(BFE_I32)
+  NODE_NAME_CASE(BFI)
+  NODE_NAME_CASE(BFM)
+  NODE_NAME_CASE(BREV)
+  NODE_NAME_CASE(MUL_U24)
+  NODE_NAME_CASE(MUL_I24)
+  NODE_NAME_CASE(MAD_U24)
+  NODE_NAME_CASE(MAD_I24)
+  NODE_NAME_CASE(TEXTURE_FETCH)
+  NODE_NAME_CASE(EXPORT)
+  NODE_NAME_CASE(CONST_ADDRESS)
+  NODE_NAME_CASE(REGISTER_LOAD)
+  NODE_NAME_CASE(REGISTER_STORE)
+  NODE_NAME_CASE(LOAD_CONSTANT)
+  NODE_NAME_CASE(LOAD_INPUT)
+  NODE_NAME_CASE(SAMPLE)
+  NODE_NAME_CASE(SAMPLEB)
+  NODE_NAME_CASE(SAMPLED)
+  NODE_NAME_CASE(SAMPLEL)
+  NODE_NAME_CASE(CVT_F32_UBYTE0)
+  NODE_NAME_CASE(CVT_F32_UBYTE1)
+  NODE_NAME_CASE(CVT_F32_UBYTE2)
+  NODE_NAME_CASE(CVT_F32_UBYTE3)
+  NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)
+  NODE_NAME_CASE(CONST_DATA_PTR)
+  case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
+  NODE_NAME_CASE(SENDMSG)
+  NODE_NAME_CASE(INTERP_MOV)
+  NODE_NAME_CASE(INTERP_P1)
+  NODE_NAME_CASE(INTERP_P2)
+  NODE_NAME_CASE(STORE_MSKOR)
+  NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
+  case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break;
+  }
+  return nullptr;
+}
+
+SDValue AMDGPUTargetLowering::getRsqrtEstimate(SDValue Operand,
+                                               DAGCombinerInfo &DCI,
+                                               unsigned &RefinementSteps,
+                                               bool &UseOneConstNR) const {
+  SelectionDAG &DAG = DCI.DAG;
+  EVT VT = Operand.getValueType();
+
+  if (VT == MVT::f32) {
+    RefinementSteps = 0;
+    return DAG.getNode(AMDGPUISD::RSQ, SDLoc(Operand), VT, Operand);
+  }
+
+  // TODO: There is also f64 rsq instruction, but the documentation is less
+  // clear on its precision.
+
+  return SDValue();
+}
+
+SDValue AMDGPUTargetLowering::getRecipEstimate(SDValue Operand,
+                                               DAGCombinerInfo &DCI,
+                                               unsigned &RefinementSteps) const {
+  SelectionDAG &DAG = DCI.DAG;
+  EVT VT = Operand.getValueType();
+
+  if (VT == MVT::f32) {
+    // Reciprocal, < 1 ulp error.
+    //
+    // This reciprocal approximation converges to < 0.5 ulp error with one
+    // newton rhapson performed with two fused multiple adds (FMAs).
+
+    RefinementSteps = 0;
+    return DAG.getNode(AMDGPUISD::RCP, SDLoc(Operand), VT, Operand);
+  }
+
+  // TODO: There is also f64 rcp instruction, but the documentation is less
+  // clear on its precision.
+
+  return SDValue();
+}
+
+static void computeKnownBitsForMinMax(const SDValue Op0,
+                                      const SDValue Op1,
+                                      APInt &KnownZero,
+                                      APInt &KnownOne,
+                                      const SelectionDAG &DAG,
+                                      unsigned Depth) {
+  APInt Op0Zero, Op0One;
+  APInt Op1Zero, Op1One;
+  DAG.computeKnownBits(Op0, Op0Zero, Op0One, Depth);
+  DAG.computeKnownBits(Op1, Op1Zero, Op1One, Depth);
+
+  KnownZero = Op0Zero & Op1Zero;
+  KnownOne = Op0One & Op1One;
+}
+
+void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
+  const SDValue Op,
+  APInt &KnownZero,
+  APInt &KnownOne,
+  const SelectionDAG &DAG,
+  unsigned Depth) const {
+
+  KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything.
+
+  APInt KnownZero2;
+  APInt KnownOne2;
+  unsigned Opc = Op.getOpcode();
+
+  switch (Opc) {
+  default:
+    break;
+  case ISD::INTRINSIC_WO_CHAIN: {
+    // FIXME: The intrinsic should just use the node.
+    switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
+    case AMDGPUIntrinsic::AMDGPU_imax:
+    case AMDGPUIntrinsic::AMDGPU_umax:
+    case AMDGPUIntrinsic::AMDGPU_imin:
+    case AMDGPUIntrinsic::AMDGPU_umin:
+      computeKnownBitsForMinMax(Op.getOperand(1), Op.getOperand(2),
+                                KnownZero, KnownOne, DAG, Depth);
+      break;
+    default:
+      break;
+    }
+
+    break;
+  }
+  case AMDGPUISD::CARRY:
+  case AMDGPUISD::BORROW: {
+    KnownZero = APInt::getHighBitsSet(32, 31);
+    break;
+  }
+
+  case AMDGPUISD::BFE_I32:
+  case AMDGPUISD::BFE_U32: {
+    ConstantSDNode *CWidth = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+    if (!CWidth)
+      return;
+
+    unsigned BitWidth = 32;
+    uint32_t Width = CWidth->getZExtValue() & 0x1f;
+
+    if (Opc == AMDGPUISD::BFE_U32)
+      KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - Width);
+
+    break;
+  }
+  }
+}
+
+unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode(
+  SDValue Op,
+  const SelectionDAG &DAG,
+  unsigned Depth) const {
+  switch (Op.getOpcode()) {
+  case AMDGPUISD::BFE_I32: {
+    ConstantSDNode *Width = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+    if (!Width)
+      return 1;
+
+    unsigned SignBits = 32 - Width->getZExtValue() + 1;
+    ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+    if (!Offset || !Offset->isNullValue())
+      return SignBits;
+
+    // TODO: Could probably figure something out with non-0 offsets.
+    unsigned Op0SignBits = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+    return std::max(SignBits, Op0SignBits);
+  }
+
+  case AMDGPUISD::BFE_U32: {
+    ConstantSDNode *Width = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+    return Width ? 32 - (Width->getZExtValue() & 0x1f) : 1;
+  }
+
+  case AMDGPUISD::CARRY:
+  case AMDGPUISD::BORROW:
+    return 31;
+
+  default:
+    return 1;
+  }
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
new file mode 100644
index 0000000..fbb7d3c
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -0,0 +1,307 @@
+//===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface definition of the TargetLowering class that is common
+/// to all AMD GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_R600_AMDGPUISELLOWERING_H
+#define LLVM_LIB_TARGET_R600_AMDGPUISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+
+class AMDGPUMachineFunction;
+class AMDGPUSubtarget;
+class MachineRegisterInfo;
+
+class AMDGPUTargetLowering : public TargetLowering {
+protected:
+  const AMDGPUSubtarget *Subtarget;
+
+private:
+  SDValue LowerConstantInitializer(const Constant* Init, const GlobalValue *GV,
+                                   const SDValue &InitPtr,
+                                   SDValue Chain,
+                                   SelectionDAG &DAG) const;
+  SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+  /// \brief Lower vector stores by merging the vector elements into an integer
+  /// of the same bitwidth.
+  SDValue MergeVectorStore(const SDValue &Op, SelectionDAG &DAG) const;
+  /// \brief Split a vector store into multiple scalar stores.
+  /// \returns The resulting chain.
+
+  SDValue LowerFREM(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue LowerFROUND32(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, bool Signed) const;
+  SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue LowerFP64_TO_INT(SDValue Op, SelectionDAG &DAG, bool Signed) const;
+  SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+protected:
+  static EVT getEquivalentMemType(LLVMContext &Context, EVT VT);
+  static EVT getEquivalentLoadRegType(LLVMContext &Context, EVT VT);
+
+  virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
+                                     SelectionDAG &DAG) const;
+
+  /// \brief Split a vector load into a scalar load of each component.
+  SDValue ScalarizeVectorLoad(SDValue Op, SelectionDAG &DAG) const;
+
+  /// \brief Split a vector load into 2 loads of half the vector.
+  SDValue SplitVectorLoad(SDValue Op, SelectionDAG &DAG) const;
+
+  /// \brief Split a vector store into a scalar store of each component.
+  SDValue ScalarizeVectorStore(SDValue Op, SelectionDAG &DAG) const;
+
+  /// \brief Split a vector store into 2 stores of half the vector.
+  SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const;
+  void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG,
+                                    SmallVectorImpl<SDValue> &Results) const;
+  bool isHWTrueValue(SDValue Op) const;
+  bool isHWFalseValue(SDValue Op) const;
+
+  /// The SelectionDAGBuilder will automatically promote function arguments
+  /// with illegal types.  However, this does not work for the AMDGPU targets
+  /// since the function arguments are stored in memory as these illegal types.
+  /// In order to handle this properly we need to get the origianl types sizes
+  /// from the LLVM IR Function and fixup the ISD:InputArg values before
+  /// passing them to AnalyzeFormalArguments()
+  void getOriginalFunctionArgs(SelectionDAG &DAG,
+                               const Function *F,
+                               const SmallVectorImpl<ISD::InputArg> &Ins,
+                               SmallVectorImpl<ISD::InputArg> &OrigIns) const;
+  void AnalyzeFormalArguments(CCState &State,
+                              const SmallVectorImpl<ISD::InputArg> &Ins) const;
+
+public:
+  AMDGPUTargetLowering(TargetMachine &TM, const AMDGPUSubtarget &STI);
+
+  bool isFAbsFree(EVT VT) const override;
+  bool isFNegFree(EVT VT) const override;
+  bool isTruncateFree(EVT Src, EVT Dest) const override;
+  bool isTruncateFree(Type *Src, Type *Dest) const override;
+
+  bool isZExtFree(Type *Src, Type *Dest) const override;
+  bool isZExtFree(EVT Src, EVT Dest) const override;
+  bool isZExtFree(SDValue Val, EVT VT2) const override;
+
+  bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
+
+  MVT getVectorIdxTy() const override;
+  bool isSelectSupported(SelectSupportKind) const override;
+
+  bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
+  bool ShouldShrinkFPConstant(EVT VT) const override;
+  bool shouldReduceLoadWidth(SDNode *Load,
+                             ISD::LoadExtType ExtType,
+                             EVT ExtVT) const override;
+
+  bool isLoadBitCastBeneficial(EVT, EVT) const override;
+
+  bool storeOfVectorConstantIsCheap(EVT MemVT,
+                                    unsigned NumElem,
+                                    unsigned AS) const override;
+  bool isCheapToSpeculateCttz() const override;
+  bool isCheapToSpeculateCtlz() const override;
+
+  SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
+                      bool isVarArg,
+                      const SmallVectorImpl<ISD::OutputArg> &Outs,
+                      const SmallVectorImpl<SDValue> &OutVals,
+                      SDLoc DL, SelectionDAG &DAG) const override;
+  SDValue LowerCall(CallLoweringInfo &CLI,
+                    SmallVectorImpl<SDValue> &InVals) const override;
+
+  SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+  SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
+  void ReplaceNodeResults(SDNode * N,
+                          SmallVectorImpl<SDValue> &Results,
+                          SelectionDAG &DAG) const override;
+
+  SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
+  SDValue CombineFMinMaxLegacy(SDLoc DL,
+                               EVT VT,
+                               SDValue LHS,
+                               SDValue RHS,
+                               SDValue True,
+                               SDValue False,
+                               SDValue CC,
+                               DAGCombinerInfo &DCI) const;
+  SDValue CombineIMinMax(SDLoc DL,
+                         EVT VT,
+                         SDValue LHS,
+                         SDValue RHS,
+                         SDValue True,
+                         SDValue False,
+                         SDValue CC,
+                         SelectionDAG &DAG) const;
+
+  const char* getTargetNodeName(unsigned Opcode) const override;
+
+  SDValue getRsqrtEstimate(SDValue Operand,
+                           DAGCombinerInfo &DCI,
+                           unsigned &RefinementSteps,
+                           bool &UseOneConstNR) const override;
+  SDValue getRecipEstimate(SDValue Operand,
+                           DAGCombinerInfo &DCI,
+                           unsigned &RefinementSteps) const override;
+
+  virtual SDNode *PostISelFolding(MachineSDNode *N,
+                                  SelectionDAG &DAG) const {
+    return N;
+  }
+
+  /// \brief Determine which of the bits specified in \p Mask are known to be
+  /// either zero or one and return them in the \p KnownZero and \p KnownOne
+  /// bitsets.
+  void computeKnownBitsForTargetNode(const SDValue Op,
+                                     APInt &KnownZero,
+                                     APInt &KnownOne,
+                                     const SelectionDAG &DAG,
+                                     unsigned Depth = 0) const override;
+
+  unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const SelectionDAG &DAG,
+                                           unsigned Depth = 0) const override;
+
+  /// \brief Helper function that adds Reg to the LiveIn list of the DAG's
+  /// MachineFunction.
+  ///
+  /// \returns a RegisterSDNode representing Reg.
+  virtual SDValue CreateLiveInRegister(SelectionDAG &DAG,
+                                       const TargetRegisterClass *RC,
+                                       unsigned Reg, EVT VT) const;
+};
+
+namespace AMDGPUISD {
+
+enum NodeType : unsigned {
+  // AMDIL ISD Opcodes
+  FIRST_NUMBER = ISD::BUILTIN_OP_END,
+  CALL,        // Function call based on a single integer
+  UMUL,        // 32bit unsigned multiplication
+  RET_FLAG,
+  BRANCH_COND,
+  // End AMDIL ISD Opcodes
+  DWORDADDR,
+  FRACT,
+  CLAMP,
+
+  // SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi.
+  // Denormals handled on some parts.
+  COS_HW,
+  SIN_HW,
+  FMAX_LEGACY,
+  FMIN_LEGACY,
+  FMAX3,
+  SMAX3,
+  UMAX3,
+  FMIN3,
+  SMIN3,
+  UMIN3,
+  URECIP,
+  DIV_SCALE,
+  DIV_FMAS,
+  DIV_FIXUP,
+  TRIG_PREOP, // 1 ULP max error for f64
+
+  // RCP, RSQ - For f32, 1 ULP max error, no denormal handling.
+  //            For f64, max error 2^29 ULP, handles denormals.
+  RCP,
+  RSQ,
+  RSQ_LEGACY,
+  RSQ_CLAMPED,
+  LDEXP,
+  FP_CLASS,
+  DOT4,
+  CARRY,
+  BORROW,
+  BFE_U32, // Extract range of bits with zero extension to 32-bits.
+  BFE_I32, // Extract range of bits with sign extension to 32-bits.
+  BFI, // (src0 & src1) | (~src0 & src2)
+  BFM, // Insert a range of bits into a 32-bit word.
+  BREV, // Reverse bits.
+  MUL_U24,
+  MUL_I24,
+  MAD_U24,
+  MAD_I24,
+  TEXTURE_FETCH,
+  EXPORT,
+  CONST_ADDRESS,
+  REGISTER_LOAD,
+  REGISTER_STORE,
+  LOAD_INPUT,
+  SAMPLE,
+  SAMPLEB,
+  SAMPLED,
+  SAMPLEL,
+
+  // These cvt_f32_ubyte* nodes need to remain consecutive and in order.
+  CVT_F32_UBYTE0,
+  CVT_F32_UBYTE1,
+  CVT_F32_UBYTE2,
+  CVT_F32_UBYTE3,
+  /// This node is for VLIW targets and it is used to represent a vector
+  /// that is stored in consecutive registers with the same channel.
+  /// For example:
+  ///   |X  |Y|Z|W|
+  /// T0|v.x| | | |
+  /// T1|v.y| | | |
+  /// T2|v.z| | | |
+  /// T3|v.w| | | |
+  BUILD_VERTICAL_VECTOR,
+  /// Pointer to the start of the shader's constant data.
+  CONST_DATA_PTR,
+  SENDMSG,
+  INTERP_MOV,
+  INTERP_P1,
+  INTERP_P2,
+  FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
+  STORE_MSKOR,
+  LOAD_CONSTANT,
+  TBUFFER_STORE_FORMAT,
+  LAST_AMDGPU_ISD_NUMBER
+};
+
+
+} // End namespace AMDGPUISD
+
+} // End namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
new file mode 100644
index 0000000..15a3d54
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
@@ -0,0 +1,369 @@
+//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Implementation of the TargetInstrInfo class that is common to all
+/// AMD GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPURegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+#define GET_INSTRINFO_CTOR_DTOR
+#define GET_INSTRINFO_NAMED_OPS
+#define GET_INSTRMAP_INFO
+#include "AMDGPUGenInstrInfo.inc"
+
+// Pin the vtable to this file.
+void AMDGPUInstrInfo::anchor() {}
+
+AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &st)
+    : AMDGPUGenInstrInfo(-1, -1), ST(st) {}
+
+const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const {
+  return RI;
+}
+
+bool AMDGPUInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
+                                           unsigned &SrcReg, unsigned &DstReg,
+                                           unsigned &SubIdx) const {
+// TODO: Implement this function
+  return false;
+}
+
+unsigned AMDGPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                             int &FrameIndex) const {
+// TODO: Implement this function
+  return 0;
+}
+
+unsigned AMDGPUInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
+                                                   int &FrameIndex) const {
+// TODO: Implement this function
+  return 0;
+}
+
+bool AMDGPUInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
+                                          const MachineMemOperand *&MMO,
+                                          int &FrameIndex) const {
+// TODO: Implement this function
+  return false;
+}
+unsigned AMDGPUInstrInfo::isStoreFromStackSlot(const MachineInstr *MI,
+                                              int &FrameIndex) const {
+// TODO: Implement this function
+  return 0;
+}
+unsigned AMDGPUInstrInfo::isStoreFromStackSlotPostFE(const MachineInstr *MI,
+                                                    int &FrameIndex) const {
+// TODO: Implement this function
+  return 0;
+}
+bool AMDGPUInstrInfo::hasStoreFromStackSlot(const MachineInstr *MI,
+                                           const MachineMemOperand *&MMO,
+                                           int &FrameIndex) const {
+// TODO: Implement this function
+  return false;
+}
+
+MachineInstr *
+AMDGPUInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
+                                      MachineBasicBlock::iterator &MBBI,
+                                      LiveVariables *LV) const {
+// TODO: Implement this function
+  return nullptr;
+}
+
+void
+AMDGPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MI,
+                                    unsigned SrcReg, bool isKill,
+                                    int FrameIndex,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const {
+  llvm_unreachable("Not Implemented");
+}
+
+void
+AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator MI,
+                                     unsigned DestReg, int FrameIndex,
+                                     const TargetRegisterClass *RC,
+                                     const TargetRegisterInfo *TRI) const {
+  llvm_unreachable("Not Implemented");
+}
+
+bool AMDGPUInstrInfo::expandPostRAPseudo (MachineBasicBlock::iterator MI) const {
+  MachineBasicBlock *MBB = MI->getParent();
+  int OffsetOpIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+                                               AMDGPU::OpName::addr);
+   // addr is a custom operand with multiple MI operands, and only the
+   // first MI operand is given a name.
+  int RegOpIdx = OffsetOpIdx + 1;
+  int ChanOpIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+                                             AMDGPU::OpName::chan);
+  if (isRegisterLoad(*MI)) {
+    int DstOpIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+                                              AMDGPU::OpName::dst);
+    unsigned RegIndex = MI->getOperand(RegOpIdx).getImm();
+    unsigned Channel = MI->getOperand(ChanOpIdx).getImm();
+    unsigned Address = calculateIndirectAddress(RegIndex, Channel);
+    unsigned OffsetReg = MI->getOperand(OffsetOpIdx).getReg();
+    if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
+      buildMovInstr(MBB, MI, MI->getOperand(DstOpIdx).getReg(),
+                    getIndirectAddrRegClass()->getRegister(Address));
+    } else {
+      buildIndirectRead(MBB, MI, MI->getOperand(DstOpIdx).getReg(),
+                        Address, OffsetReg);
+    }
+  } else if (isRegisterStore(*MI)) {
+    int ValOpIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+                                              AMDGPU::OpName::val);
+    unsigned RegIndex = MI->getOperand(RegOpIdx).getImm();
+    unsigned Channel = MI->getOperand(ChanOpIdx).getImm();
+    unsigned Address = calculateIndirectAddress(RegIndex, Channel);
+    unsigned OffsetReg = MI->getOperand(OffsetOpIdx).getReg();
+    if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
+      buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address),
+                    MI->getOperand(ValOpIdx).getReg());
+    } else {
+      buildIndirectWrite(MBB, MI, MI->getOperand(ValOpIdx).getReg(),
+                         calculateIndirectAddress(RegIndex, Channel),
+                         OffsetReg);
+    }
+  } else {
+    return false;
+  }
+
+  MBB->erase(MI);
+  return true;
+}
+
+MachineInstr *AMDGPUInstrInfo::foldMemoryOperandImpl(
+    MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops,
+    MachineBasicBlock::iterator InsertPt, int FrameIndex) const {
+// TODO: Implement this function
+  return nullptr;
+}
+MachineInstr *AMDGPUInstrInfo::foldMemoryOperandImpl(
+    MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops,
+    MachineBasicBlock::iterator InsertPt, MachineInstr *LoadMI) const {
+  // TODO: Implement this function
+  return nullptr;
+}
+bool AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
+                                           ArrayRef<unsigned> Ops) const {
+  // TODO: Implement this function
+  return false;
+}
+bool
+AMDGPUInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
+                                 unsigned Reg, bool UnfoldLoad,
+                                 bool UnfoldStore,
+                                 SmallVectorImpl<MachineInstr*> &NewMIs) const {
+  // TODO: Implement this function
+  return false;
+}
+
+bool
+AMDGPUInstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
+                                    SmallVectorImpl<SDNode*> &NewNodes) const {
+  // TODO: Implement this function
+  return false;
+}
+
+unsigned
+AMDGPUInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
+                                           bool UnfoldLoad, bool UnfoldStore,
+                                           unsigned *LoadRegIndex) const {
+  // TODO: Implement this function
+  return 0;
+}
+
+bool AMDGPUInstrInfo::enableClusterLoads() const {
+  return true;
+}
+
+// FIXME: This behaves strangely. If, for example, you have 32 load + stores,
+// the first 16 loads will be interleaved with the stores, and the next 16 will
+// be clustered as expected. It should really split into 2 16 store batches.
+//
+// Loads are clustered until this returns false, rather than trying to schedule
+// groups of stores. This also means we have to deal with saying different
+// address space loads should be clustered, and ones which might cause bank
+// conflicts.
+//
+// This might be deprecated so it might not be worth that much effort to fix.
+bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
+                                              int64_t Offset0, int64_t Offset1,
+                                              unsigned NumLoads) const {
+  assert(Offset1 > Offset0 &&
+         "Second offset should be larger than first offset!");
+  // If we have less than 16 loads in a row, and the offsets are within 64
+  // bytes, then schedule together.
+
+  // A cacheline is 64 bytes (for global memory).
+  return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
+}
+
+bool
+AMDGPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
+  const {
+  // TODO: Implement this function
+  return true;
+}
+void AMDGPUInstrInfo::insertNoop(MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator MI) const {
+  // TODO: Implement this function
+}
+
+bool AMDGPUInstrInfo::isPredicated(const MachineInstr *MI) const {
+  // TODO: Implement this function
+  return false;
+}
+
+bool AMDGPUInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
+                                        ArrayRef<MachineOperand> Pred2) const {
+  // TODO: Implement this function
+  return false;
+}
+
+bool AMDGPUInstrInfo::DefinesPredicate(MachineInstr *MI,
+                                      std::vector<MachineOperand> &Pred) const {
+  // TODO: Implement this function
+  return false;
+}
+
+bool AMDGPUInstrInfo::isPredicable(MachineInstr *MI) const {
+  // TODO: Implement this function
+  return MI->getDesc().isPredicable();
+}
+
+bool
+AMDGPUInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
+  // TODO: Implement this function
+  return true;
+}
+
+bool AMDGPUInstrInfo::isRegisterStore(const MachineInstr &MI) const {
+  return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_STORE;
+}
+
+bool AMDGPUInstrInfo::isRegisterLoad(const MachineInstr &MI) const {
+  return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_LOAD;
+}
+
+int AMDGPUInstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  int Offset = -1;
+
+  if (MFI->getNumObjects() == 0) {
+    return -1;
+  }
+
+  if (MRI.livein_empty()) {
+    return 0;
+  }
+
+  const TargetRegisterClass *IndirectRC = getIndirectAddrRegClass();
+  for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
+                                            LE = MRI.livein_end();
+                                            LI != LE; ++LI) {
+    unsigned Reg = LI->first;
+    if (TargetRegisterInfo::isVirtualRegister(Reg) ||
+        !IndirectRC->contains(Reg))
+      continue;
+
+    unsigned RegIndex;
+    unsigned RegEnd;
+    for (RegIndex = 0, RegEnd = IndirectRC->getNumRegs(); RegIndex != RegEnd;
+                                                          ++RegIndex) {
+      if (IndirectRC->getRegister(RegIndex) == Reg)
+        break;
+    }
+    Offset = std::max(Offset, (int)RegIndex);
+  }
+
+  return Offset + 1;
+}
+
+int AMDGPUInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
+  int Offset = 0;
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  // Variable sized objects are not supported
+  assert(!MFI->hasVarSizedObjects());
+
+  if (MFI->getNumObjects() == 0) {
+    return -1;
+  }
+
+  Offset = MF.getSubtarget().getFrameLowering()->getFrameIndexOffset(MF, -1);
+
+  return getIndirectIndexBegin(MF) + Offset;
+}
+
+int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const {
+  switch (Channels) {
+  default: return Opcode;
+  case 1: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_1);
+  case 2: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_2);
+  case 3: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_3);
+  }
+}
+
+// Wrapper for Tablegen'd function.  enum Subtarget is not defined in any
+// header files, so we need to wrap it in a function that takes unsigned
+// instead.
+namespace llvm {
+namespace AMDGPU {
+static int getMCOpcode(uint16_t Opcode, unsigned Gen) {
+  return getMCOpcodeGen(Opcode, (enum Subtarget)Gen);
+}
+}
+}
+
+// This must be kept in sync with the SISubtarget class in SIInstrInfo.td
+enum SISubtarget {
+  SI = 0,
+  VI = 1
+};
+
+static enum SISubtarget AMDGPUSubtargetToSISubtarget(unsigned Gen) {
+  switch (Gen) {
+  default:
+    return SI;
+  case AMDGPUSubtarget::VOLCANIC_ISLANDS:
+    return VI;
+  }
+}
+
+int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
+  int MCOp = AMDGPU::getMCOpcode(
+      Opcode, AMDGPUSubtargetToSISubtarget(ST.getGeneration()));
+
+  // -1 means that Opcode is already a native instruction.
+  if (MCOp == -1)
+    return Opcode;
+
+  // (uint16_t)-1 means that Opcode is a pseudo instruction that has
+  // no encoding in the given subtarget generation.
+  if (MCOp == (uint16_t)-1)
+    return -1;
+
+  return MCOp;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
new file mode 100644
index 0000000..31ae9a3
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -0,0 +1,206 @@
+//===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Contains the definition of a TargetInstrInfo class that is common
+/// to all AMD GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_R600_AMDGPUINSTRINFO_H
+#define LLVM_LIB_TARGET_R600_AMDGPUINSTRINFO_H
+
+#include "AMDGPURegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include <map>
+
+#define GET_INSTRINFO_HEADER
+#define GET_INSTRINFO_ENUM
+#define GET_INSTRINFO_OPERAND_ENUM
+#include "AMDGPUGenInstrInfo.inc"
+
+#define OPCODE_IS_ZERO_INT AMDGPU::PRED_SETE_INT
+#define OPCODE_IS_NOT_ZERO_INT AMDGPU::PRED_SETNE_INT
+#define OPCODE_IS_ZERO AMDGPU::PRED_SETE
+#define OPCODE_IS_NOT_ZERO AMDGPU::PRED_SETNE
+
+namespace llvm {
+
+class AMDGPUSubtarget;
+class MachineFunction;
+class MachineInstr;
+class MachineInstrBuilder;
+
+class AMDGPUInstrInfo : public AMDGPUGenInstrInfo {
+private:
+  const AMDGPURegisterInfo RI;
+  virtual void anchor();
+protected:
+  const AMDGPUSubtarget &ST;
+public:
+  explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st);
+
+  virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0;
+
+  bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
+                             unsigned &DstReg, unsigned &SubIdx) const override;
+
+  unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                               int &FrameIndex) const override;
+  unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
+                                     int &FrameIndex) const override;
+  bool hasLoadFromStackSlot(const MachineInstr *MI,
+                            const MachineMemOperand *&MMO,
+                            int &FrameIndex) const override;
+  unsigned isStoreFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+  unsigned isStoreFromStackSlotPostFE(const MachineInstr *MI,
+                                      int &FrameIndex) const;
+  bool hasStoreFromStackSlot(const MachineInstr *MI,
+                             const MachineMemOperand *&MMO,
+                             int &FrameIndex) const;
+
+  MachineInstr *
+  convertToThreeAddress(MachineFunction::iterator &MFI,
+                        MachineBasicBlock::iterator &MBBI,
+                        LiveVariables *LV) const override;
+
+
+  bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override;
+
+  void storeRegToStackSlot(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MI,
+                           unsigned SrcReg, bool isKill, int FrameIndex,
+                           const TargetRegisterClass *RC,
+                           const TargetRegisterInfo *TRI) const override;
+  void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MI,
+                            unsigned DestReg, int FrameIndex,
+                            const TargetRegisterClass *RC,
+                            const TargetRegisterInfo *TRI) const override;
+
+protected:
+  MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+                                      ArrayRef<unsigned> Ops,
+                                      MachineBasicBlock::iterator InsertPt,
+                                      int FrameIndex) const override;
+  MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+                                      ArrayRef<unsigned> Ops,
+                                      MachineBasicBlock::iterator InsertPt,
+                                      MachineInstr *LoadMI) const override;
+
+public:
+  /// \returns the smallest register index that will be accessed by an indirect
+  /// read or write or -1 if indirect addressing is not used by this program.
+  int getIndirectIndexBegin(const MachineFunction &MF) const;
+
+  /// \returns the largest register index that will be accessed by an indirect
+  /// read or write or -1 if indirect addressing is not used by this program.
+  int getIndirectIndexEnd(const MachineFunction &MF) const;
+
+  bool canFoldMemoryOperand(const MachineInstr *MI,
+                            ArrayRef<unsigned> Ops) const override;
+  bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
+                        unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
+                        SmallVectorImpl<MachineInstr *> &NewMIs) const override;
+  bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
+                           SmallVectorImpl<SDNode *> &NewNodes) const override;
+  unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
+                               bool UnfoldLoad, bool UnfoldStore,
+                               unsigned *LoadRegIndex = nullptr) const override;
+
+  bool enableClusterLoads() const override;
+
+  bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+                               int64_t Offset1, int64_t Offset2,
+                               unsigned NumLoads) const override;
+
+  bool
+  ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+  void insertNoop(MachineBasicBlock &MBB,
+                  MachineBasicBlock::iterator MI) const override;
+  bool isPredicated(const MachineInstr *MI) const override;
+  bool SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
+                         ArrayRef<MachineOperand> Pred2) const override;
+  bool DefinesPredicate(MachineInstr *MI,
+                        std::vector<MachineOperand> &Pred) const override;
+  bool isPredicable(MachineInstr *MI) const override;
+  bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const override;
+
+  // Helper functions that check the opcode for status information
+  bool isRegisterStore(const MachineInstr &MI) const;
+  bool isRegisterLoad(const MachineInstr &MI) const;
+
+  /// \brief Return a target-specific opcode if Opcode is a pseudo instruction.
+  /// Return -1 if the target-specific opcode for the pseudo instruction does
+  /// not exist. If Opcode is not a pseudo instruction, this is identity.
+  int pseudoToMCOpcode(int Opcode) const;
+
+  /// \brief Return the descriptor of the target-specific machine instruction
+  /// that corresponds to the specified pseudo or native opcode.
+  const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const {
+    return get(pseudoToMCOpcode(Opcode));
+  }
+
+//===---------------------------------------------------------------------===//
+// Pure virtual funtions to be implemented by sub-classes.
+//===---------------------------------------------------------------------===//
+
+  virtual bool isMov(unsigned opcode) const = 0;
+
+  /// \brief Calculate the "Indirect Address" for the given \p RegIndex and
+  ///        \p Channel
+  ///
+  /// We model indirect addressing using a virtual address space that can be
+  /// accesed with loads and stores.  The "Indirect Address" is the memory
+  /// address in this virtual address space that maps to the given \p RegIndex
+  /// and \p Channel.
+  virtual unsigned calculateIndirectAddress(unsigned RegIndex,
+                                            unsigned Channel) const = 0;
+
+  /// \returns The register class to be used for loading and storing values
+  /// from an "Indirect Address" .
+  virtual const TargetRegisterClass *getIndirectAddrRegClass() const = 0;
+
+  /// \brief Build instruction(s) for an indirect register write.
+  ///
+  /// \returns The instruction that performs the indirect register write
+  virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
+                                    MachineBasicBlock::iterator I,
+                                    unsigned ValueReg, unsigned Address,
+                                    unsigned OffsetReg) const = 0;
+
+  /// \brief Build instruction(s) for an indirect register read.
+  ///
+  /// \returns The instruction that performs the indirect register read
+  virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
+                                    MachineBasicBlock::iterator I,
+                                    unsigned ValueReg, unsigned Address,
+                                    unsigned OffsetReg) const = 0;
+
+  /// \brief Build a MOV instruction.
+  virtual MachineInstr *buildMovInstr(MachineBasicBlock *MBB,
+                                      MachineBasicBlock::iterator I,
+                                      unsigned DstReg, unsigned SrcReg) const = 0;
+
+  /// \brief Given a MIMG \p Opcode that writes all 4 channels, return the
+  /// equivalent opcode that writes \p Channels Channels.
+  int getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const;
+
+};
+
+namespace AMDGPU {
+  int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex);
+}  // End namespace AMDGPU
+
+} // namespace llvm
+
+#define AMDGPU_FLAG_REGISTER_LOAD  (UINT64_C(1) << 63)
+#define AMDGPU_FLAG_REGISTER_STORE (UINT64_C(1) << 62)
+
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
new file mode 100644
index 0000000..b413897
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -0,0 +1,245 @@
+//===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains DAG node defintions for the AMDGPU target.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// AMDGPU DAG Profiles
+//===----------------------------------------------------------------------===//
+
+def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
+  SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
+]>;
+
+def AMDGPUTrigPreOp : SDTypeProfile<1, 2,
+  [SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>]
+>;
+
+def AMDGPULdExpOp : SDTypeProfile<1, 2,
+  [SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>]
+>;
+
+def AMDGPUFPClassOp : SDTypeProfile<1, 2,
+  [SDTCisInt<0>, SDTCisFP<1>, SDTCisInt<2>]
+>;
+
+def AMDGPUDivScaleOp : SDTypeProfile<2, 3,
+  [SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>]
+>;
+
+// float, float, float, vcc
+def AMDGPUFmasOp : SDTypeProfile<1, 4,
+  [SDTCisFP<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<4>]
+>;
+
+//===----------------------------------------------------------------------===//
+// AMDGPU DAG Nodes
+//
+
+// This argument to this node is a dword address.
+def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
+
+def AMDGPUcos : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>;
+def AMDGPUsin : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>;
+
+// out = a - floor(a)
+def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
+
+// out = 1.0 / a
+def AMDGPUrcp : SDNode<"AMDGPUISD::RCP", SDTFPUnaryOp>;
+
+// out = 1.0 / sqrt(a)
+def AMDGPUrsq : SDNode<"AMDGPUISD::RSQ", SDTFPUnaryOp>;
+
+// out = 1.0 / sqrt(a)
+def AMDGPUrsq_legacy : SDNode<"AMDGPUISD::RSQ_LEGACY", SDTFPUnaryOp>;
+
+// out = 1.0 / sqrt(a) result clamped to +/- max_float.
+def AMDGPUrsq_clamped : SDNode<"AMDGPUISD::RSQ_CLAMPED", SDTFPUnaryOp>;
+
+def AMDGPUldexp : SDNode<"AMDGPUISD::LDEXP", AMDGPULdExpOp>;
+
+def AMDGPUfp_class : SDNode<"AMDGPUISD::FP_CLASS", AMDGPUFPClassOp>;
+
+// out = max(a, b) a and b are floats, where a nan comparison fails.
+// This is not commutative because this gives the second operand:
+//   x < nan ? x : nan -> nan
+//   nan < x ? nan : x -> x
+def AMDGPUfmax_legacy : SDNode<"AMDGPUISD::FMAX_LEGACY", SDTFPBinOp,
+  []
+>;
+
+def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPTernaryOp, []>;
+
+// out = max(a, b) a and b are signed ints
+def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,
+  [SDNPCommutative, SDNPAssociative]
+>;
+
+// out = max(a, b) a and b are unsigned ints
+def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp,
+  [SDNPCommutative, SDNPAssociative]
+>;
+
+// out = min(a, b) a and b are floats, where a nan comparison fails.
+def AMDGPUfmin_legacy : SDNode<"AMDGPUISD::FMIN_LEGACY", SDTFPBinOp,
+  []
+>;
+
+// FIXME: TableGen doesn't like commutative instructions with more
+// than 2 operands.
+// out = max(a, b, c) a, b and c are floats
+def AMDGPUfmax3 : SDNode<"AMDGPUISD::FMAX3", SDTFPTernaryOp,
+  [/*SDNPCommutative, SDNPAssociative*/]
+>;
+
+// out = max(a, b, c) a, b, and c are signed ints
+def AMDGPUsmax3 : SDNode<"AMDGPUISD::SMAX3", AMDGPUDTIntTernaryOp,
+  [/*SDNPCommutative, SDNPAssociative*/]
+>;
+
+// out = max(a, b, c) a, b and c are unsigned ints
+def AMDGPUumax3 : SDNode<"AMDGPUISD::UMAX3", AMDGPUDTIntTernaryOp,
+  [/*SDNPCommutative, SDNPAssociative*/]
+>;
+
+// out = min(a, b, c) a, b and c are floats
+def AMDGPUfmin3 : SDNode<"AMDGPUISD::FMIN3", SDTFPTernaryOp,
+  [/*SDNPCommutative, SDNPAssociative*/]
+>;
+
+// out = min(a, b, c) a, b and c are signed ints
+def AMDGPUsmin3 : SDNode<"AMDGPUISD::SMIN3", AMDGPUDTIntTernaryOp,
+  [/*SDNPCommutative, SDNPAssociative*/]
+>;
+
+// out = min(a, b) a and b are unsigned ints
+def AMDGPUumin3 : SDNode<"AMDGPUISD::UMIN3", AMDGPUDTIntTernaryOp,
+  [/*SDNPCommutative, SDNPAssociative*/]
+>;
+
+// out = (src0 + src1 > 0xFFFFFFFF) ? 1 : 0
+def AMDGPUcarry : SDNode<"AMDGPUISD::CARRY", SDTIntBinOp, []>;
+
+// out = (src1 > src0) ? 1 : 0
+def AMDGPUborrow : SDNode<"AMDGPUISD::BORROW", SDTIntBinOp, []>;
+
+
+def AMDGPUcvt_f32_ubyte0 : SDNode<"AMDGPUISD::CVT_F32_UBYTE0",
+  SDTIntToFPOp, []>;
+def AMDGPUcvt_f32_ubyte1 : SDNode<"AMDGPUISD::CVT_F32_UBYTE1",
+  SDTIntToFPOp, []>;
+def AMDGPUcvt_f32_ubyte2 : SDNode<"AMDGPUISD::CVT_F32_UBYTE2",
+  SDTIntToFPOp, []>;
+def AMDGPUcvt_f32_ubyte3 : SDNode<"AMDGPUISD::CVT_F32_UBYTE3",
+  SDTIntToFPOp, []>;
+
+
+// urecip - This operation is a helper for integer division, it returns the
+// result of 1 / a as a fractional unsigned integer.
+// out = (2^32 / a) + e
+// e is rounding error
+def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
+
+// Special case divide preop and flags.
+def AMDGPUdiv_scale : SDNode<"AMDGPUISD::DIV_SCALE", AMDGPUDivScaleOp>;
+
+//  Special case divide FMA with scale and flags (src0 = Quotient,
+//  src1 = Denominator, src2 = Numerator).
+def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp>;
+
+// Single or double precision division fixup.
+// Special case divide fixup and flags(src0 = Quotient, src1 =
+// Denominator, src2 = Numerator).
+def AMDGPUdiv_fixup : SDNode<"AMDGPUISD::DIV_FIXUP", SDTFPTernaryOp>;
+
+// Look Up 2.0 / pi src0 with segment select src1[4:0]
+def AMDGPUtrig_preop : SDNode<"AMDGPUISD::TRIG_PREOP", AMDGPUTrigPreOp>;
+
+def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD",
+                          SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
+                          [SDNPHasChain, SDNPMayLoad]>;
+
+def AMDGPUregister_store : SDNode<"AMDGPUISD::REGISTER_STORE",
+                           SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
+                           [SDNPHasChain, SDNPMayStore]>;
+
+// MSKOR instructions are atomic memory instructions used mainly for storing
+// 8-bit and 16-bit values.  The definition is:
+//
+// MSKOR(dst, mask, src) MEM[dst] = ((MEM[dst] & ~mask) | src)
+//
+// src0: vec4(src, 0, 0, mask)
+// src1: dst - rat offset (aka pointer) in dwords
+def AMDGPUstore_mskor : SDNode<"AMDGPUISD::STORE_MSKOR",
+                        SDTypeProfile<0, 2, []>,
+                        [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+def AMDGPUround : SDNode<"ISD::FROUND",
+                         SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>>;
+
+def AMDGPUbfe_u32 : SDNode<"AMDGPUISD::BFE_U32", AMDGPUDTIntTernaryOp>;
+def AMDGPUbfe_i32 : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>;
+def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>;
+def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>;
+
+def AMDGPUbrev : SDNode<"AMDGPUISD::BREV", SDTIntUnaryOp>;
+
+// Signed and unsigned 24-bit mulitply.  The highest 8-bits are ignore when
+// performing the mulitply.  The result is a 32-bit value.
+def AMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp,
+  [SDNPCommutative]
+>;
+def AMDGPUmul_i24 : SDNode<"AMDGPUISD::MUL_I24", SDTIntBinOp,
+  [SDNPCommutative]
+>;
+
+def AMDGPUmad_u24 : SDNode<"AMDGPUISD::MAD_U24", AMDGPUDTIntTernaryOp,
+  []
+>;
+def AMDGPUmad_i24 : SDNode<"AMDGPUISD::MAD_I24", AMDGPUDTIntTernaryOp,
+  []
+>;
+
+def AMDGPUsendmsg : SDNode<"AMDGPUISD::SENDMSG",
+                    SDTypeProfile<0, 1, [SDTCisInt<0>]>,
+                    [SDNPHasChain, SDNPInGlue]>;
+
+def AMDGPUinterp_mov : SDNode<"AMDGPUISD::INTERP_MOV",
+                        SDTypeProfile<1, 3, [SDTCisFP<0>]>,
+                        [SDNPInGlue]>;
+
+def AMDGPUinterp_p1 : SDNode<"AMDGPUISD::INTERP_P1",
+                      SDTypeProfile<1, 3, [SDTCisFP<0>]>,
+                      [SDNPInGlue, SDNPOutGlue]>;
+
+def AMDGPUinterp_p2 : SDNode<"AMDGPUISD::INTERP_P2",
+                      SDTypeProfile<1, 4, [SDTCisFP<0>]>,
+                      [SDNPInGlue]>;
+
+//===----------------------------------------------------------------------===//
+// Flow Control Profile Types
+//===----------------------------------------------------------------------===//
+// Branch instruction where second and third are basic blocks
+def SDTIL_BRCond : SDTypeProfile<0, 2, [
+    SDTCisVT<0, OtherVT>
+    ]>;
+
+//===----------------------------------------------------------------------===//
+// Flow Control DAG Nodes
+//===----------------------------------------------------------------------===//
+def IL_brcond      : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>;
+
+//===----------------------------------------------------------------------===//
+// Call/Return DAG Nodes
+//===----------------------------------------------------------------------===//
+def IL_retflag       : SDNode<"AMDGPUISD::RET_FLAG", SDTNone,
+    [SDNPHasChain, SDNPOptInGlue]>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
new file mode 100644
index 0000000..72cab39
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -0,0 +1,682 @@
+//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains instruction defs that are common to all hw codegen
+// targets.
+//
+//===----------------------------------------------------------------------===//
+
+class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction {
+  field bit isRegisterLoad = 0;
+  field bit isRegisterStore = 0;
+
+  let Namespace = "AMDGPU";
+  let OutOperandList = outs;
+  let InOperandList = ins;
+  let AsmString = asm;
+  let Pattern = pattern;
+  let Itinerary = NullALU;
+
+  let TSFlags{63} = isRegisterLoad;
+  let TSFlags{62} = isRegisterStore;
+}
+
+class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
+    : AMDGPUInst<outs, ins, asm, pattern> {
+
+  field bits<32> Inst = 0xffffffff;
+
+}
+
+def FP32Denormals : Predicate<"Subtarget.hasFP32Denormals()">;
+def FP64Denormals : Predicate<"Subtarget.hasFP64Denormals()">;
+def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
+
+def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
+def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
+
+let OperandType = "OPERAND_IMMEDIATE" in {
+
+def u32imm : Operand<i32> {
+  let PrintMethod = "printU32ImmOperand";
+}
+
+def u16imm : Operand<i16> {
+  let PrintMethod = "printU16ImmOperand";
+}
+
+def u8imm : Operand<i8> {
+  let PrintMethod = "printU8ImmOperand";
+}
+
+} // End OperandType = "OPERAND_IMMEDIATE"
+
+//===--------------------------------------------------------------------===//
+// Custom Operands
+//===--------------------------------------------------------------------===//
+def brtarget   : Operand<OtherVT>;
+
+//===----------------------------------------------------------------------===//
+// PatLeafs for floating-point comparisons
+//===----------------------------------------------------------------------===//
+
+def COND_OEQ : PatLeaf <
+  (cond),
+  [{return N->get() == ISD::SETOEQ || N->get() == ISD::SETEQ;}]
+>;
+
+def COND_ONE : PatLeaf <
+  (cond),
+  [{return N->get() == ISD::SETONE || N->get() == ISD::SETNE;}]
+>;
+
+def COND_OGT : PatLeaf <
+  (cond),
+  [{return N->get() == ISD::SETOGT || N->get() == ISD::SETGT;}]
+>;
+
+def COND_OGE : PatLeaf <
+  (cond),
+  [{return N->get() == ISD::SETOGE || N->get() == ISD::SETGE;}]
+>;
+
+def COND_OLT : PatLeaf <
+  (cond),
+  [{return N->get() == ISD::SETOLT || N->get() == ISD::SETLT;}]
+>;
+
+def COND_OLE : PatLeaf <
+  (cond),
+  [{return N->get() == ISD::SETOLE || N->get() == ISD::SETLE;}]
+>;
+
+
+def COND_O : PatLeaf <(cond), [{return N->get() == ISD::SETO;}]>;
+def COND_UO : PatLeaf <(cond), [{return N->get() == ISD::SETUO;}]>;
+
+//===----------------------------------------------------------------------===//
+// PatLeafs for unsigned / unordered comparisons
+//===----------------------------------------------------------------------===//
+
+def COND_UEQ : PatLeaf <(cond), [{return N->get() == ISD::SETUEQ;}]>;
+def COND_UNE : PatLeaf <(cond), [{return N->get() == ISD::SETUNE;}]>;
+def COND_UGT : PatLeaf <(cond), [{return N->get() == ISD::SETUGT;}]>;
+def COND_UGE : PatLeaf <(cond), [{return N->get() == ISD::SETUGE;}]>;
+def COND_ULT : PatLeaf <(cond), [{return N->get() == ISD::SETULT;}]>;
+def COND_ULE : PatLeaf <(cond), [{return N->get() == ISD::SETULE;}]>;
+
+// XXX - For some reason R600 version is preferring to use unordered
+// for setne?
+def COND_UNE_NE : PatLeaf <
+  (cond),
+  [{return N->get() == ISD::SETUNE || N->get() == ISD::SETNE;}]
+>;
+
+//===----------------------------------------------------------------------===//
+// PatLeafs for signed comparisons
+//===----------------------------------------------------------------------===//
+
+def COND_SGT : PatLeaf <(cond), [{return N->get() == ISD::SETGT;}]>;
+def COND_SGE : PatLeaf <(cond), [{return N->get() == ISD::SETGE;}]>;
+def COND_SLT : PatLeaf <(cond), [{return N->get() == ISD::SETLT;}]>;
+def COND_SLE : PatLeaf <(cond), [{return N->get() == ISD::SETLE;}]>;
+
+//===----------------------------------------------------------------------===//
+// PatLeafs for integer equality
+//===----------------------------------------------------------------------===//
+
+def COND_EQ : PatLeaf <
+  (cond),
+  [{return N->get() == ISD::SETEQ || N->get() == ISD::SETUEQ;}]
+>;
+
+def COND_NE : PatLeaf <
+  (cond),
+  [{return N->get() == ISD::SETNE || N->get() == ISD::SETUNE;}]
+>;
+
+def COND_NULL : PatLeaf <
+  (cond),
+  [{(void)N; return false;}]
+>;
+
+//===----------------------------------------------------------------------===//
+// Load/Store Pattern Fragments
+//===----------------------------------------------------------------------===//
+
+class PrivateMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{
+  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
+}]>;
+
+class PrivateLoad <SDPatternOperator op> : PrivateMemOp <
+  (ops node:$ptr), (op node:$ptr)
+>;
+
+class PrivateStore <SDPatternOperator op> : PrivateMemOp <
+  (ops node:$value, node:$ptr), (op node:$value, node:$ptr)
+>;
+
+def load_private : PrivateLoad <load>;
+
+def truncstorei8_private : PrivateStore <truncstorei8>;
+def truncstorei16_private : PrivateStore <truncstorei16>;
+def store_private : PrivateStore <store>;
+
+def global_store : PatFrag<(ops node:$val, node:$ptr),
+    (store node:$val, node:$ptr), [{
+        return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
+// Global address space loads
+def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+    return isGlobalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+// Constant address space loads
+def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+    return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
+}]>;
+
+class AZExtLoadBase <SDPatternOperator ld_node>: PatFrag<(ops node:$ptr),
+                                              (ld_node node:$ptr), [{
+  LoadSDNode *L = cast<LoadSDNode>(N);
+  return L->getExtensionType() == ISD::ZEXTLOAD ||
+         L->getExtensionType() == ISD::EXTLOAD;
+}]>;
+
+def az_extload : AZExtLoadBase <unindexedload>;
+
+def az_extloadi8 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+
+def az_extloadi8_global : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{
+    return isGlobalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+def sextloadi8_global : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
+    return isGlobalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+def az_extloadi8_flat : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{
+    return isFlatLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+def sextloadi8_flat : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
+    return isFlatLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+def az_extloadi8_constant : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{
+    return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
+}]>;
+
+def sextloadi8_constant : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
+    return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
+}]>;
+
+def az_extloadi8_local : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{
+    return isLocalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+def sextloadi8_local : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
+    return isLocalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+def extloadi8_private : PrivateLoad <az_extloadi8>;
+def sextloadi8_private : PrivateLoad <sextloadi8>;
+
+def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+
+def az_extloadi16_global : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{
+    return isGlobalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+def sextloadi16_global : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{
+    return isGlobalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+def az_extloadi16_flat : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{
+    return isFlatLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+def sextloadi16_flat : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{
+    return isFlatLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+def az_extloadi16_constant : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{
+    return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
+}]>;
+
+def sextloadi16_constant : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{
+    return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
+}]>;
+
+def az_extloadi16_local : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{
+    return isLocalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+def sextloadi16_local : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{
+    return isLocalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+def extloadi16_private : PrivateLoad <az_extloadi16>;
+def sextloadi16_private : PrivateLoad <sextloadi16>;
+
+def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
+def az_extloadi32_global : PatFrag<(ops node:$ptr),
+                                   (az_extloadi32 node:$ptr), [{
+  return isGlobalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+def az_extloadi32_flat : PatFrag<(ops node:$ptr),
+                                   (az_extloadi32 node:$ptr), [{
+  return isFlatLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+def az_extloadi32_constant : PatFrag<(ops node:$ptr),
+                                     (az_extloadi32 node:$ptr), [{
+  return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
+}]>;
+
+def truncstorei8_global : PatFrag<(ops node:$val, node:$ptr),
+                                  (truncstorei8 node:$val, node:$ptr), [{
+  return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
+def truncstorei16_global : PatFrag<(ops node:$val, node:$ptr),
+                                  (truncstorei16 node:$val, node:$ptr), [{
+  return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
+def truncstorei8_flat : PatFrag<(ops node:$val, node:$ptr),
+                                  (truncstorei8 node:$val, node:$ptr), [{
+  return isFlatStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
+def truncstorei16_flat : PatFrag<(ops node:$val, node:$ptr),
+                                  (truncstorei16 node:$val, node:$ptr), [{
+  return isFlatStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
+def local_store : PatFrag<(ops node:$val, node:$ptr),
+                             (store node:$val, node:$ptr), [{
+  return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
+def truncstorei8_local : PatFrag<(ops node:$val, node:$ptr),
+                                  (truncstorei8 node:$val, node:$ptr), [{
+  return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
+def truncstorei16_local : PatFrag<(ops node:$val, node:$ptr),
+                                  (truncstorei16 node:$val, node:$ptr), [{
+  return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
+def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+    return isLocalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+class Aligned8Bytes <dag ops, dag frag> : PatFrag <ops, frag, [{
+    return cast<MemSDNode>(N)->getAlignment() % 8 == 0;
+}]>;
+
+def local_load_aligned8bytes : Aligned8Bytes <
+  (ops node:$ptr), (local_load node:$ptr)
+>;
+
+def local_store_aligned8bytes : Aligned8Bytes <
+  (ops node:$val, node:$ptr), (local_store node:$val, node:$ptr)
+>;
+
+class local_binary_atomic_op<SDNode atomic_op> :
+  PatFrag<(ops node:$ptr, node:$value),
+    (atomic_op node:$ptr, node:$value), [{
+  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+}]>;
+
+
+def atomic_swap_local : local_binary_atomic_op<atomic_swap>;
+def atomic_load_add_local : local_binary_atomic_op<atomic_load_add>;
+def atomic_load_sub_local : local_binary_atomic_op<atomic_load_sub>;
+def atomic_load_and_local : local_binary_atomic_op<atomic_load_and>;
+def atomic_load_or_local : local_binary_atomic_op<atomic_load_or>;
+def atomic_load_xor_local : local_binary_atomic_op<atomic_load_xor>;
+def atomic_load_nand_local : local_binary_atomic_op<atomic_load_nand>;
+def atomic_load_min_local : local_binary_atomic_op<atomic_load_min>;
+def atomic_load_max_local : local_binary_atomic_op<atomic_load_max>;
+def atomic_load_umin_local : local_binary_atomic_op<atomic_load_umin>;
+def atomic_load_umax_local : local_binary_atomic_op<atomic_load_umax>;
+
+def mskor_global : PatFrag<(ops node:$val, node:$ptr),
+                            (AMDGPUstore_mskor node:$val, node:$ptr), [{
+  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
+}]>;
+
+multiclass AtomicCmpSwapLocal <SDNode cmp_swap_node> {
+
+  def _32_local : PatFrag <
+    (ops node:$ptr, node:$cmp, node:$swap),
+    (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
+      AtomicSDNode *AN = cast<AtomicSDNode>(N);
+      return AN->getMemoryVT() == MVT::i32 &&
+             AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+  }]>;
+
+  def _64_local : PatFrag<
+    (ops node:$ptr, node:$cmp, node:$swap),
+    (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
+      AtomicSDNode *AN = cast<AtomicSDNode>(N);
+      return AN->getMemoryVT() == MVT::i64 &&
+             AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+  }]>;
+}
+
+defm atomic_cmp_swap : AtomicCmpSwapLocal <atomic_cmp_swap>;
+
+def flat_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+    return isFlatLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+def flat_store : PatFrag<(ops node:$val, node:$ptr),
+                         (store node:$val, node:$ptr), [{
+  return isFlatStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
+def mskor_flat : PatFrag<(ops node:$val, node:$ptr),
+                            (AMDGPUstore_mskor node:$val, node:$ptr), [{
+  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;
+}]>;
+
+class global_binary_atomic_op<SDNode atomic_op> : PatFrag<
+  (ops node:$ptr, node:$value),
+  (atomic_op node:$ptr, node:$value),
+  [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]
+>;
+
+def atomic_swap_global : global_binary_atomic_op<atomic_swap>;
+def atomic_add_global : global_binary_atomic_op<atomic_load_add>;
+def atomic_and_global : global_binary_atomic_op<atomic_load_and>;
+def atomic_max_global : global_binary_atomic_op<atomic_load_max>;
+def atomic_min_global : global_binary_atomic_op<atomic_load_min>;
+def atomic_or_global : global_binary_atomic_op<atomic_load_or>;
+def atomic_sub_global : global_binary_atomic_op<atomic_load_sub>;
+def atomic_umax_global : global_binary_atomic_op<atomic_load_umax>;
+def atomic_umin_global : global_binary_atomic_op<atomic_load_umin>;
+def atomic_xor_global : global_binary_atomic_op<atomic_load_xor>;
+
+//===----------------------------------------------------------------------===//
+// Misc Pattern Fragments
+//===----------------------------------------------------------------------===//
+
+class Constants {
+int TWO_PI = 0x40c90fdb;
+int PI = 0x40490fdb;
+int TWO_PI_INV = 0x3e22f983;
+int FP_UINT_MAX_PLUS_1 = 0x4f800000;    // 1 << 32 in floating point encoding
+int FP32_NEG_ONE = 0xbf800000;
+int FP32_ONE = 0x3f800000;
+}
+def CONST : Constants;
+
+def FP_ZERO : PatLeaf <
+  (fpimm),
+  [{return N->getValueAPF().isZero();}]
+>;
+
+def FP_ONE : PatLeaf <
+  (fpimm),
+  [{return N->isExactlyValue(1.0);}]
+>;
+
+def FP_HALF : PatLeaf <
+  (fpimm),
+  [{return N->isExactlyValue(0.5);}]
+>;
+
+let isCodeGenOnly = 1, isPseudo = 1 in {
+
+let usesCustomInserter = 1  in {
+
+class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
+  (outs rc:$dst),
+  (ins rc:$src0),
+  "CLAMP $dst, $src0",
+  [(set f32:$dst, (AMDGPUclamp f32:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
+>;
+
+class FABS <RegisterClass rc> : AMDGPUShaderInst <
+  (outs rc:$dst),
+  (ins rc:$src0),
+  "FABS $dst, $src0",
+  [(set f32:$dst, (fabs f32:$src0))]
+>;
+
+class FNEG <RegisterClass rc> : AMDGPUShaderInst <
+  (outs rc:$dst),
+  (ins rc:$src0),
+  "FNEG $dst, $src0",
+  [(set f32:$dst, (fneg f32:$src0))]
+>;
+
+} // usesCustomInserter = 1
+
+multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass,
+                    ComplexPattern addrPat> {
+let UseNamedOperandTable = 1 in {
+
+  def RegisterLoad : AMDGPUShaderInst <
+    (outs dstClass:$dst),
+    (ins addrClass:$addr, i32imm:$chan),
+    "RegisterLoad $dst, $addr",
+    [(set i32:$dst, (AMDGPUregister_load addrPat:$addr, (i32 timm:$chan)))]
+  > {
+    let isRegisterLoad = 1;
+  }
+
+  def RegisterStore : AMDGPUShaderInst <
+    (outs),
+    (ins dstClass:$val, addrClass:$addr, i32imm:$chan),
+    "RegisterStore $val, $addr",
+    [(AMDGPUregister_store i32:$val, addrPat:$addr, (i32 timm:$chan))]
+  > {
+    let isRegisterStore = 1;
+  }
+}
+}
+
+} // End isCodeGenOnly = 1, isPseudo = 1
+
+/* Generic helper patterns for intrinsics */
+/* -------------------------------------- */
+
+class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul>
+  : Pat <
+  (fpow f32:$src0, f32:$src1),
+  (exp_ieee (mul f32:$src1, (log_ieee f32:$src0)))
+>;
+
+/* Other helper patterns */
+/* --------------------- */
+
+/* Extract element pattern */
+class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx,
+                       SubRegIndex sub_reg>
+  : Pat<
+  (sub_type (vector_extract vec_type:$src, sub_idx)),
+  (EXTRACT_SUBREG $src, sub_reg)
+>;
+
+/* Insert element pattern */
+class Insert_Element <ValueType elem_type, ValueType vec_type,
+                      int sub_idx, SubRegIndex sub_reg>
+  : Pat <
+  (vector_insert vec_type:$vec, elem_type:$elem, sub_idx),
+  (INSERT_SUBREG $vec, $elem, sub_reg)
+>;
+
+// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
+// can handle COPY instructions.
+// bitconvert pattern
+class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat <
+  (dt (bitconvert (st rc:$src0))),
+  (dt rc:$src0)
+>;
+
+// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
+// can handle COPY instructions.
+class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat <
+  (vt (AMDGPUdwordaddr (vt rc:$addr))),
+  (vt rc:$addr)
+>;
+
+// BFI_INT patterns
+
+multiclass BFIPatterns <Instruction BFI_INT,
+                        Instruction LoadImm32,
+                        RegisterClass RC64> {
+  // Definition from ISA doc:
+  // (y & x) | (z & ~x)
+  def : Pat <
+    (or (and i32:$y, i32:$x), (and i32:$z, (not i32:$x))),
+    (BFI_INT $x, $y, $z)
+  >;
+
+  // SHA-256 Ch function
+  // z ^ (x & (y ^ z))
+  def : Pat <
+    (xor i32:$z, (and i32:$x, (xor i32:$y, i32:$z))),
+    (BFI_INT $x, $y, $z)
+  >;
+
+  def : Pat <
+    (fcopysign f32:$src0, f32:$src1),
+    (BFI_INT (LoadImm32 0x7fffffff), $src0, $src1)
+  >;
+
+  def : Pat <
+    (f64 (fcopysign f64:$src0, f64:$src1)),
+    (REG_SEQUENCE RC64,
+      (i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
+      (BFI_INT (LoadImm32 0x7fffffff),
+               (i32 (EXTRACT_SUBREG $src0, sub1)),
+               (i32 (EXTRACT_SUBREG $src1, sub1))), sub1)
+  >;
+}
+
+// SHA-256 Ma patterns
+
+// ((x & z) | (y & (x | z))) -> BFI_INT (XOR x, y), z, y
+class SHA256MaPattern <Instruction BFI_INT, Instruction XOR> : Pat <
+  (or (and i32:$x, i32:$z), (and i32:$y, (or i32:$x, i32:$z))),
+  (BFI_INT (XOR i32:$x, i32:$y), i32:$z, i32:$y)
+>;
+
+// Bitfield extract patterns
+
+def IMMZeroBasedBitfieldMask : PatLeaf <(imm), [{
+  return isMask_32(N->getZExtValue());
+}]>;
+
+def IMMPopCount : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(countPopulation(N->getZExtValue()), SDLoc(N),
+                                   MVT::i32);
+}]>;
+
+class BFEPattern <Instruction BFE, Instruction MOV> : Pat <
+  (i32 (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask)),
+  (BFE $src, $rshift, (MOV (i32 (IMMPopCount $mask))))
+>;
+
+// rotr pattern
+class ROTRPattern <Instruction BIT_ALIGN> : Pat <
+  (rotr i32:$src0, i32:$src1),
+  (BIT_ALIGN $src0, $src0, $src1)
+>;
+
+// 24-bit arithmetic patterns
+def umul24 : PatFrag <(ops node:$x, node:$y), (mul node:$x, node:$y)>;
+
+// Special conversion patterns
+
+def cvt_rpi_i32_f32 : PatFrag <
+  (ops node:$src),
+  (fp_to_sint (ffloor (fadd $src, FP_HALF))),
+  [{ (void) N; return TM.Options.NoNaNsFPMath; }]
+>;
+
+def cvt_flr_i32_f32 : PatFrag <
+  (ops node:$src),
+  (fp_to_sint (ffloor $src)),
+  [{ (void)N; return TM.Options.NoNaNsFPMath; }]
+>;
+
+/*
+class UMUL24Pattern <Instruction UMUL24> : Pat <
+  (mul U24:$x, U24:$y),
+  (UMUL24 $x, $y)
+>;
+*/
+
+class IMad24Pat<Instruction Inst> : Pat <
+  (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2),
+  (Inst $src0, $src1, $src2)
+>;
+
+class UMad24Pat<Instruction Inst> : Pat <
+  (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2),
+  (Inst $src0, $src1, $src2)
+>;
+
+multiclass Expand24IBitOps<Instruction MulInst, Instruction AddInst> {
+  def _expand_imad24 : Pat <
+    (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2),
+    (AddInst (MulInst $src0, $src1), $src2)
+  >;
+
+  def _expand_imul24 : Pat <
+    (AMDGPUmul_i24 i32:$src0, i32:$src1),
+    (MulInst $src0, $src1)
+  >;
+}
+
+multiclass Expand24UBitOps<Instruction MulInst, Instruction AddInst> {
+  def _expand_umad24 : Pat <
+    (AMDGPUmad_u24 i32:$src0, i32:$src1, i32:$src2),
+    (AddInst (MulInst $src0, $src1), $src2)
+  >;
+
+  def _expand_umul24 : Pat <
+    (AMDGPUmul_u24 i32:$src0, i32:$src1),
+    (MulInst $src0, $src1)
+  >;
+}
+
+class RcpPat<Instruction RcpInst, ValueType vt> : Pat <
+  (fdiv FP_ONE, vt:$src),
+  (RcpInst $src)
+>;
+
+class RsqPat<Instruction RsqInst, ValueType vt> : Pat <
+  (AMDGPUrcp (fsqrt vt:$src)),
+  (RsqInst $src)
+>;
+
+include "R600Instructions.td"
+include "R700Instructions.td"
+include "EvergreenInstructions.td"
+include "CaymanInstructions.td"
+
+include "SIInstrInfo.td"
+
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp
new file mode 100644
index 0000000..e94bb60
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp
@@ -0,0 +1,77 @@
+//===- AMDGPUIntrinsicInfo.cpp - AMDGPU Intrinsic Information ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief AMDGPU Implementation of the IntrinsicInfo class.
+//
+//===-----------------------------------------------------------------------===//
+
+#include "AMDGPUIntrinsicInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+
+using namespace llvm;
+
+#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+#include "AMDGPUGenIntrinsics.inc"
+#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+
+AMDGPUIntrinsicInfo::AMDGPUIntrinsicInfo()
+    : TargetIntrinsicInfo() {}
+
+std::string AMDGPUIntrinsicInfo::getName(unsigned IntrID, Type **Tys,
+                                         unsigned numTys) const {
+  static const char *const names[] = {
+#define GET_INTRINSIC_NAME_TABLE
+#include "AMDGPUGenIntrinsics.inc"
+#undef GET_INTRINSIC_NAME_TABLE
+  };
+
+  if (IntrID < Intrinsic::num_intrinsics) {
+    return nullptr;
+  }
+  assert(IntrID < AMDGPUIntrinsic::num_AMDGPU_intrinsics &&
+         "Invalid intrinsic ID");
+
+  std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
+  return Result;
+}
+
+unsigned AMDGPUIntrinsicInfo::lookupName(const char *Name,
+                                         unsigned Len) const {
+  if (!StringRef(Name, Len).startswith("llvm."))
+    return 0; // All intrinsics start with 'llvm.'
+
+#define GET_FUNCTION_RECOGNIZER
+#include "AMDGPUGenIntrinsics.inc"
+#undef GET_FUNCTION_RECOGNIZER
+  AMDGPUIntrinsic::ID IntrinsicID =
+      (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic;
+  IntrinsicID = getIntrinsicForGCCBuiltin("AMDGPU", Name);
+
+  if (IntrinsicID != (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic) {
+    return IntrinsicID;
+  }
+  return 0;
+}
+
+bool AMDGPUIntrinsicInfo::isOverloaded(unsigned id) const {
+// Overload Table
+#define GET_INTRINSIC_OVERLOAD_TABLE
+#include "AMDGPUGenIntrinsics.inc"
+#undef GET_INTRINSIC_OVERLOAD_TABLE
+}
+
+Function *AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
+                                              Type **Tys,
+                                              unsigned numTys) const {
+  llvm_unreachable("Not implemented");
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.h
new file mode 100644
index 0000000..4c95b5e
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.h
@@ -0,0 +1,48 @@
+//===- AMDGPUIntrinsicInfo.h - AMDGPU Intrinsic Information ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface for the AMDGPU Implementation of the Intrinsic Info class.
+//
+//===-----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_R600_AMDGPUINTRINSICINFO_H
+#define LLVM_LIB_TARGET_R600_AMDGPUINTRINSICINFO_H
+
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+
+namespace llvm {
+class TargetMachine;
+
+namespace AMDGPUIntrinsic {
+enum ID {
+  last_non_AMDGPU_intrinsic = Intrinsic::num_intrinsics - 1,
+#define GET_INTRINSIC_ENUM_VALUES
+#include "AMDGPUGenIntrinsics.inc"
+#undef GET_INTRINSIC_ENUM_VALUES
+      , num_AMDGPU_intrinsics
+};
+
+} // end namespace AMDGPUIntrinsic
+
+class AMDGPUIntrinsicInfo : public TargetIntrinsicInfo {
+public:
+  AMDGPUIntrinsicInfo();
+  std::string getName(unsigned IntrId, Type **Tys = nullptr,
+                      unsigned numTys = 0) const override;
+  unsigned lookupName(const char *Name, unsigned Len) const override;
+  bool isOverloaded(unsigned IID) const override;
+  Function *getDeclaration(Module *M, unsigned ID,
+                           Type **Tys = nullptr,
+                           unsigned numTys = 0) const override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td
new file mode 100644
index 0000000..ab489cd
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td
@@ -0,0 +1,90 @@
+//===-- AMDGPUIntrinsics.td - Common intrinsics  -*- tablegen -*-----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines intrinsics that are used by all hw codegen targets.
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "AMDGPU", isTarget = 1 in {
+
+  def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
+  def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_abs : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+  def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+  def int_AMDGPU_fract : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+  def int_AMDGPU_clamp : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+
+  // This is named backwards (instead of rsq_legacy) so we don't have
+  // to define it with the public builtins intrinsics. This is a
+  // workaround for how intrinsic names are parsed. If the name is
+  // llvm.AMDGPU.rsq.legacy, the parser assumes that you meant
+  // llvm.AMDGPU.rsq.{f32 | f64} and incorrectly mangled the name.
+  def int_AMDGPU_legacy_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+
+  def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
+  def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
+  def int_AMDGPU_kilp : Intrinsic<[], [], []>;
+  def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+  def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+  def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+  def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+  def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+  def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+  def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+  def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+  def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+  def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_umul24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_imul24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_imad24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_umad24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_cvt_f32_ubyte0 : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_cvt_f32_ubyte1 : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_cvt_f32_ubyte2 : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_cvt_f32_ubyte3 : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_AMDGPU_bfi : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_bfe_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_bfe_u32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_bfm : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_brev : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_flbit_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_barrier_local  : Intrinsic<[], [], []>;
+  def int_AMDGPU_barrier_global  : Intrinsic<[], [], []>;
+}
+
+// Legacy names for compatibility.
+let TargetPrefix = "AMDIL", isTarget = 1 in {
+  def int_AMDIL_abs : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+  def int_AMDIL_fraction : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+  def int_AMDIL_clamp : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+  def int_AMDIL_exp : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+  def int_AMDIL_round_nearest : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+}
+
+let TargetPrefix = "TGSI", isTarget = 1 in {
+
+  def int_TGSI_lit_z : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],[IntrNoMem]>;
+}
+
+include "SIIntrinsics.td"
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
new file mode 100644
index 0000000..2083146
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -0,0 +1,154 @@
+//===- AMDGPUMCInstLower.cpp - Lower AMDGPU MachineInstr to an MCInst -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Code to lower AMDGPU MachineInstrs to their corresponding MCInst.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include "AMDGPUMCInstLower.h"
+#include "AMDGPUAsmPrinter.h"
+#include "AMDGPUTargetMachine.h"
+#include "InstPrinter/AMDGPUInstPrinter.h"
+#include "R600InstrInfo.h"
+#include "SIInstrInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include <algorithm>
+
+using namespace llvm;
+
+AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &st):
+  Ctx(ctx), ST(st)
+{ }
+
+void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
+
+  int MCOpcode = ST.getInstrInfo()->pseudoToMCOpcode(MI->getOpcode());
+
+  if (MCOpcode == -1) {
+    LLVMContext &C = MI->getParent()->getParent()->getFunction()->getContext();
+    C.emitError("AMDGPUMCInstLower::lower - Pseudo instruction doesn't have "
+                "a target-specific version: " + Twine(MI->getOpcode()));
+  }
+
+  OutMI.setOpcode(MCOpcode);
+
+  for (const MachineOperand &MO : MI->explicit_operands()) {
+    MCOperand MCOp;
+    switch (MO.getType()) {
+    default:
+      llvm_unreachable("unknown operand type");
+    case MachineOperand::MO_Immediate:
+      MCOp = MCOperand::createImm(MO.getImm());
+      break;
+    case MachineOperand::MO_Register:
+      MCOp = MCOperand::createReg(MO.getReg());
+      break;
+    case MachineOperand::MO_MachineBasicBlock:
+      MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(
+                                   MO.getMBB()->getSymbol(), Ctx));
+      break;
+    case MachineOperand::MO_GlobalAddress: {
+      const GlobalValue *GV = MO.getGlobal();
+      MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(GV->getName()));
+      MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(Sym, Ctx));
+      break;
+    }
+    case MachineOperand::MO_TargetIndex: {
+      assert(MO.getIndex() == AMDGPU::TI_CONSTDATA_START);
+      MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
+      const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx);
+      MCOp = MCOperand::createExpr(Expr);
+      break;
+    }
+    case MachineOperand::MO_ExternalSymbol: {
+      MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(MO.getSymbolName()));
+      const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx);
+      MCOp = MCOperand::createExpr(Expr);
+      break;
+    }
+    }
+    OutMI.addOperand(MCOp);
+  }
+}
+
+void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  const AMDGPUSubtarget &STI = MF->getSubtarget<AMDGPUSubtarget>();
+  AMDGPUMCInstLower MCInstLowering(OutContext, STI);
+
+#ifdef _DEBUG
+  StringRef Err;
+  if (!STI.getInstrInfo()->verifyInstruction(MI, Err)) {
+    errs() << "Warning: Illegal instruction detected: " << Err << "\n";
+    MI->dump();
+  }
+#endif
+  if (MI->isBundle()) {
+    const MachineBasicBlock *MBB = MI->getParent();
+    MachineBasicBlock::const_instr_iterator I = MI;
+    ++I;
+    while (I != MBB->end() && I->isInsideBundle()) {
+      EmitInstruction(I);
+      ++I;
+    }
+  } else {
+    MCInst TmpInst;
+    MCInstLowering.lower(MI, TmpInst);
+    EmitToStreamer(*OutStreamer, TmpInst);
+
+    if (STI.dumpCode()) {
+      // Disassemble instruction/operands to text.
+      DisasmLines.resize(DisasmLines.size() + 1);
+      std::string &DisasmLine = DisasmLines.back();
+      raw_string_ostream DisasmStream(DisasmLine);
+
+      AMDGPUInstPrinter InstPrinter(*TM.getMCAsmInfo(),
+                                    *MF->getSubtarget().getInstrInfo(),
+                                    *MF->getSubtarget().getRegisterInfo());
+      InstPrinter.printInst(&TmpInst, DisasmStream, StringRef(),
+                            MF->getSubtarget());
+
+      // Disassemble instruction/operands to hex representation.
+      SmallVector<MCFixup, 4> Fixups;
+      SmallVector<char, 16> CodeBytes;
+      raw_svector_ostream CodeStream(CodeBytes);
+
+      auto &ObjStreamer = static_cast<MCObjectStreamer&>(*OutStreamer);
+      MCCodeEmitter &InstEmitter = ObjStreamer.getAssembler().getEmitter();
+      InstEmitter.encodeInstruction(TmpInst, CodeStream, Fixups,
+                                    MF->getSubtarget<MCSubtargetInfo>());
+      CodeStream.flush();
+
+      HexLines.resize(HexLines.size() + 1);
+      std::string &HexLine = HexLines.back();
+      raw_string_ostream HexStream(HexLine);
+
+      for (size_t i = 0; i < CodeBytes.size(); i += 4) {
+        unsigned int CodeDWord = *(unsigned int *)&CodeBytes[i];
+        HexStream << format("%s%08X", (i > 0 ? " " : ""), CodeDWord);
+      }
+
+      DisasmStream.flush();
+      DisasmLineMaxLen = std::max(DisasmLineMaxLen, DisasmLine.size());
+    }
+  }
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h
new file mode 100644
index 0000000..d322fe0
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h
@@ -0,0 +1,35 @@
+//===- AMDGPUMCInstLower.h MachineInstr Lowering Interface ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_R600_AMDGPUMCINSTLOWER_H
+#define LLVM_LIB_TARGET_R600_AMDGPUMCINSTLOWER_H
+
+namespace llvm {
+
+class AMDGPUSubtarget;
+class MachineInstr;
+class MCContext;
+class MCInst;
+
+class AMDGPUMCInstLower {
+  MCContext &Ctx;
+  const AMDGPUSubtarget &ST;
+
+public:
+  AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &ST);
+
+  /// \brief Lower a MachineInstr to an MCInst
+  void lower(const MachineInstr *MI, MCInst &OutMI) const;
+
+};
+
+} // End namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
new file mode 100644
index 0000000..21c7da6
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -0,0 +1,25 @@
+#include "AMDGPUMachineFunction.h"
+#include "AMDGPU.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+using namespace llvm;
+
+static const char *const ShaderTypeAttribute = "ShaderType";
+
+// Pin the vtable to this file.
+void AMDGPUMachineFunction::anchor() {}
+
+AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
+  MachineFunctionInfo(),
+  ShaderType(ShaderType::COMPUTE),
+  LDSSize(0),
+  ScratchSize(0),
+  IsKernel(true) {
+  Attribute A = MF.getFunction()->getFnAttribute(ShaderTypeAttribute);
+
+  if (A.isStringAttribute()) {
+    StringRef Str = A.getValueAsString();
+    if (Str.getAsInteger(0, ShaderType))
+      llvm_unreachable("Can't parse shader type!");
+  }
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
new file mode 100644
index 0000000..e17b41a
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@@ -0,0 +1,45 @@
+//===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_R600_AMDGPUMACHINEFUNCTION_H
+#define LLVM_LIB_TARGET_R600_AMDGPUMACHINEFUNCTION_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include <map>
+
+namespace llvm {
+
+class AMDGPUMachineFunction : public MachineFunctionInfo {
+  virtual void anchor();
+  unsigned ShaderType;
+
+public:
+  AMDGPUMachineFunction(const MachineFunction &MF);
+  /// A map to keep track of local memory objects and their offsets within
+  /// the local memory space.
+  std::map<const GlobalValue *, unsigned> LocalMemoryObjects;
+  /// Number of bytes in the LDS that are being used.
+  unsigned LDSSize;
+
+  /// Start of implicit kernel args
+  unsigned ABIArgOffset;
+
+  unsigned getShaderType() const {
+    return ShaderType;
+  }
+
+  unsigned ScratchSize;
+  bool IsKernel;
+};
+
+} // namespace llvm
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
new file mode 100644
index 0000000..4a65bfc
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -0,0 +1,407 @@
+//===-- AMDGPUPromoteAlloca.cpp - Promote Allocas -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass eliminates allocas by either converting them into vectors or
+// by migrating them to local address space.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "amdgpu-promote-alloca"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUPromoteAlloca : public FunctionPass,
+                       public InstVisitor<AMDGPUPromoteAlloca> {
+
+  static char ID;
+  Module *Mod;
+  const AMDGPUSubtarget &ST;
+  int LocalMemAvailable;
+
+public:
+  AMDGPUPromoteAlloca(const AMDGPUSubtarget &st) : FunctionPass(ID), ST(st),
+                                                   LocalMemAvailable(0) { }
+  bool doInitialization(Module &M) override;
+  bool runOnFunction(Function &F) override;
+  const char *getPassName() const override { return "AMDGPU Promote Alloca"; }
+  void visitAlloca(AllocaInst &I);
+};
+
+} // End anonymous namespace
+
+char AMDGPUPromoteAlloca::ID = 0;
+
+bool AMDGPUPromoteAlloca::doInitialization(Module &M) {
+  Mod = &M;
+  return false;
+}
+
+bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
+
+  const FunctionType *FTy = F.getFunctionType();
+
+  LocalMemAvailable = ST.getLocalMemorySize();
+
+
+  // If the function has any arguments in the local address space, then it's
+  // possible these arguments require the entire local memory space, so
+  // we cannot use local memory in the pass.
+  for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) {
+    const Type *ParamTy = FTy->getParamType(i);
+    if (ParamTy->isPointerTy() &&
+        ParamTy->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
+      LocalMemAvailable = 0;
+      DEBUG(dbgs() << "Function has local memory argument.  Promoting to "
+                      "local memory disabled.\n");
+      break;
+    }
+  }
+
+  if (LocalMemAvailable > 0) {
+    // Check how much local memory is being used by global objects
+    for (Module::global_iterator I = Mod->global_begin(),
+                                 E = Mod->global_end(); I != E; ++I) {
+      GlobalVariable *GV = I;
+      PointerType *GVTy = GV->getType();
+      if (GVTy->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
+        continue;
+      for (Value::use_iterator U = GV->use_begin(),
+                               UE = GV->use_end(); U != UE; ++U) {
+        Instruction *Use = dyn_cast<Instruction>(*U);
+        if (!Use)
+          continue;
+        if (Use->getParent()->getParent() == &F)
+          LocalMemAvailable -=
+              Mod->getDataLayout().getTypeAllocSize(GVTy->getElementType());
+      }
+    }
+  }
+
+  LocalMemAvailable = std::max(0, LocalMemAvailable);
+  DEBUG(dbgs() << LocalMemAvailable << "bytes free in local memory.\n");
+
+  visit(F);
+
+  return false;
+}
+
+static VectorType *arrayTypeToVecType(const Type *ArrayTy) {
+  return VectorType::get(ArrayTy->getArrayElementType(),
+                         ArrayTy->getArrayNumElements());
+}
+
+static Value *
+calculateVectorIndex(Value *Ptr,
+                     const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
+  if (isa<AllocaInst>(Ptr))
+    return Constant::getNullValue(Type::getInt32Ty(Ptr->getContext()));
+
+  GetElementPtrInst *GEP = cast<GetElementPtrInst>(Ptr);
+
+  auto I = GEPIdx.find(GEP);
+  return I == GEPIdx.end() ? nullptr : I->second;
+}
+
+static Value* GEPToVectorIndex(GetElementPtrInst *GEP) {
+  // FIXME we only support simple cases
+  if (GEP->getNumOperands() != 3)
+    return NULL;
+
+  ConstantInt *I0 = dyn_cast<ConstantInt>(GEP->getOperand(1));
+  if (!I0 || !I0->isZero())
+    return NULL;
+
+  return GEP->getOperand(2);
+}
+
+// Not an instruction handled below to turn into a vector.
+//
+// TODO: Check isTriviallyVectorizable for calls and handle other
+// instructions.
+static bool canVectorizeInst(Instruction *Inst) {
+  switch (Inst->getOpcode()) {
+  case Instruction::Load:
+  case Instruction::Store:
+  case Instruction::BitCast:
+  case Instruction::AddrSpaceCast:
+    return true;
+  default:
+    return false;
+  }
+}
+
+static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
+  Type *AllocaTy = Alloca->getAllocatedType();
+
+  DEBUG(dbgs() << "Alloca Candidate for vectorization \n");
+
+  // FIXME: There is no reason why we can't support larger arrays, we
+  // are just being conservative for now.
+  if (!AllocaTy->isArrayTy() ||
+      AllocaTy->getArrayElementType()->isVectorTy() ||
+      AllocaTy->getArrayNumElements() > 4) {
+
+    DEBUG(dbgs() << "  Cannot convert type to vector");
+    return false;
+  }
+
+  std::map<GetElementPtrInst*, Value*> GEPVectorIdx;
+  std::vector<Value*> WorkList;
+  for (User *AllocaUser : Alloca->users()) {
+    GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(AllocaUser);
+    if (!GEP) {
+      if (!canVectorizeInst(cast<Instruction>(AllocaUser)))
+        return false;
+
+      WorkList.push_back(AllocaUser);
+      continue;
+    }
+
+    Value *Index = GEPToVectorIndex(GEP);
+
+    // If we can't compute a vector index from this GEP, then we can't
+    // promote this alloca to vector.
+    if (!Index) {
+      DEBUG(dbgs() << "  Cannot compute vector index for GEP " << *GEP << '\n');
+      return false;
+    }
+
+    GEPVectorIdx[GEP] = Index;
+    for (User *GEPUser : AllocaUser->users()) {
+      if (!canVectorizeInst(cast<Instruction>(GEPUser)))
+        return false;
+
+      WorkList.push_back(GEPUser);
+    }
+  }
+
+  VectorType *VectorTy = arrayTypeToVecType(AllocaTy);
+
+  DEBUG(dbgs() << "  Converting alloca to vector "
+        << *AllocaTy << " -> " << *VectorTy << '\n');
+
+  for (std::vector<Value*>::iterator I = WorkList.begin(),
+                                     E = WorkList.end(); I != E; ++I) {
+    Instruction *Inst = cast<Instruction>(*I);
+    IRBuilder<> Builder(Inst);
+    switch (Inst->getOpcode()) {
+    case Instruction::Load: {
+      Value *Ptr = Inst->getOperand(0);
+      Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
+      Value *BitCast = Builder.CreateBitCast(Alloca, VectorTy->getPointerTo(0));
+      Value *VecValue = Builder.CreateLoad(BitCast);
+      Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index);
+      Inst->replaceAllUsesWith(ExtractElement);
+      Inst->eraseFromParent();
+      break;
+    }
+    case Instruction::Store: {
+      Value *Ptr = Inst->getOperand(1);
+      Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
+      Value *BitCast = Builder.CreateBitCast(Alloca, VectorTy->getPointerTo(0));
+      Value *VecValue = Builder.CreateLoad(BitCast);
+      Value *NewVecValue = Builder.CreateInsertElement(VecValue,
+                                                       Inst->getOperand(0),
+                                                       Index);
+      Builder.CreateStore(NewVecValue, BitCast);
+      Inst->eraseFromParent();
+      break;
+    }
+    case Instruction::BitCast:
+    case Instruction::AddrSpaceCast:
+      break;
+
+    default:
+      Inst->dump();
+      llvm_unreachable("Inconsistency in instructions promotable to vector");
+    }
+  }
+  return true;
+}
+
+static bool collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) {
+  bool Success = true;
+  for (User *User : Val->users()) {
+    if(std::find(WorkList.begin(), WorkList.end(), User) != WorkList.end())
+      continue;
+    if (isa<CallInst>(User)) {
+      WorkList.push_back(User);
+      continue;
+    }
+
+    // FIXME: Correctly handle ptrtoint instructions.
+    Instruction *UseInst = dyn_cast<Instruction>(User);
+    if (UseInst && UseInst->getOpcode() == Instruction::PtrToInt)
+      return false;
+
+    if (!User->getType()->isPointerTy())
+      continue;
+
+    WorkList.push_back(User);
+
+    Success &= collectUsesWithPtrTypes(User, WorkList);
+  }
+  return Success;
+}
+
+void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
+  IRBuilder<> Builder(&I);
+
+  // First try to replace the alloca with a vector
+  Type *AllocaTy = I.getAllocatedType();
+
+  DEBUG(dbgs() << "Trying to promote " << I << '\n');
+
+  if (tryPromoteAllocaToVector(&I))
+    return;
+
+  DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n");
+
+  // FIXME: This is the maximum work group size.  We should try to get
+  // value from the reqd_work_group_size function attribute if it is
+  // available.
+  unsigned WorkGroupSize = 256;
+  int AllocaSize =
+      WorkGroupSize * Mod->getDataLayout().getTypeAllocSize(AllocaTy);
+
+  if (AllocaSize > LocalMemAvailable) {
+    DEBUG(dbgs() << " Not enough local memory to promote alloca.\n");
+    return;
+  }
+
+  std::vector<Value*> WorkList;
+
+  if (!collectUsesWithPtrTypes(&I, WorkList)) {
+    DEBUG(dbgs() << " Do not know how to convert all uses\n");
+    return;
+  }
+
+  DEBUG(dbgs() << "Promoting alloca to local memory\n");
+  LocalMemAvailable -= AllocaSize;
+
+  Type *GVTy = ArrayType::get(I.getAllocatedType(), 256);
+  GlobalVariable *GV = new GlobalVariable(
+      *Mod, GVTy, false, GlobalValue::ExternalLinkage, 0, I.getName(), 0,
+      GlobalVariable::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS);
+
+  FunctionType *FTy = FunctionType::get(
+      Type::getInt32Ty(Mod->getContext()), false);
+  AttributeSet AttrSet;
+  AttrSet.addAttribute(Mod->getContext(), 0, Attribute::ReadNone);
+
+  Value *ReadLocalSizeY = Mod->getOrInsertFunction(
+      "llvm.r600.read.local.size.y", FTy, AttrSet);
+  Value *ReadLocalSizeZ = Mod->getOrInsertFunction(
+      "llvm.r600.read.local.size.z", FTy, AttrSet);
+  Value *ReadTIDIGX = Mod->getOrInsertFunction(
+      "llvm.r600.read.tidig.x", FTy, AttrSet);
+  Value *ReadTIDIGY = Mod->getOrInsertFunction(
+      "llvm.r600.read.tidig.y", FTy, AttrSet);
+  Value *ReadTIDIGZ = Mod->getOrInsertFunction(
+      "llvm.r600.read.tidig.z", FTy, AttrSet);
+
+  Value *TCntY = Builder.CreateCall(ReadLocalSizeY, {});
+  Value *TCntZ = Builder.CreateCall(ReadLocalSizeZ, {});
+  Value *TIdX = Builder.CreateCall(ReadTIDIGX, {});
+  Value *TIdY = Builder.CreateCall(ReadTIDIGY, {});
+  Value *TIdZ = Builder.CreateCall(ReadTIDIGZ, {});
+
+  Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ);
+  Tmp0 = Builder.CreateMul(Tmp0, TIdX);
+  Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ);
+  Value *TID = Builder.CreateAdd(Tmp0, Tmp1);
+  TID = Builder.CreateAdd(TID, TIdZ);
+
+  std::vector<Value*> Indices;
+  Indices.push_back(Constant::getNullValue(Type::getInt32Ty(Mod->getContext())));
+  Indices.push_back(TID);
+
+  Value *Offset = Builder.CreateGEP(GVTy, GV, Indices);
+  I.mutateType(Offset->getType());
+  I.replaceAllUsesWith(Offset);
+  I.eraseFromParent();
+
+  for (std::vector<Value*>::iterator i = WorkList.begin(),
+                                     e = WorkList.end(); i != e; ++i) {
+    Value *V = *i;
+    CallInst *Call = dyn_cast<CallInst>(V);
+    if (!Call) {
+      Type *EltTy = V->getType()->getPointerElementType();
+      PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
+
+      // The operand's value should be corrected on its own.
+      if (isa<AddrSpaceCastInst>(V))
+        continue;
+
+      // FIXME: It doesn't really make sense to try to do this for all
+      // instructions.
+      V->mutateType(NewTy);
+      continue;
+    }
+
+    IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(Call);
+    if (!Intr) {
+      std::vector<Type*> ArgTypes;
+      for (unsigned ArgIdx = 0, ArgEnd = Call->getNumArgOperands();
+                                ArgIdx != ArgEnd; ++ArgIdx) {
+        ArgTypes.push_back(Call->getArgOperand(ArgIdx)->getType());
+      }
+      Function *F = Call->getCalledFunction();
+      FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes,
+                                                F->isVarArg());
+      Constant *C = Mod->getOrInsertFunction((F->getName() + ".local").str(),
+                                             NewType, F->getAttributes());
+      Function *NewF = cast<Function>(C);
+      Call->setCalledFunction(NewF);
+      continue;
+    }
+
+    Builder.SetInsertPoint(Intr);
+    switch (Intr->getIntrinsicID()) {
+    case Intrinsic::lifetime_start:
+    case Intrinsic::lifetime_end:
+      // These intrinsics are for address space 0 only
+      Intr->eraseFromParent();
+      continue;
+    case Intrinsic::memcpy: {
+      MemCpyInst *MemCpy = cast<MemCpyInst>(Intr);
+      Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getRawSource(),
+                           MemCpy->getLength(), MemCpy->getAlignment(),
+                           MemCpy->isVolatile());
+      Intr->eraseFromParent();
+      continue;
+    }
+    case Intrinsic::memset: {
+      MemSetInst *MemSet = cast<MemSetInst>(Intr);
+      Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(),
+                           MemSet->getLength(), MemSet->getAlignment(),
+                           MemSet->isVolatile());
+      Intr->eraseFromParent();
+      continue;
+    }
+    default:
+      Intr->dump();
+      llvm_unreachable("Don't know how to promote alloca intrinsic use.");
+    }
+  }
+}
+
+FunctionPass *llvm::createAMDGPUPromoteAlloca(const AMDGPUSubtarget &ST) {
+  return new AMDGPUPromoteAlloca(ST);
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
new file mode 100644
index 0000000..3ca0eca
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
@@ -0,0 +1,63 @@
+//===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Parent TargetRegisterInfo class common to all hw codegen targets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPURegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+
+using namespace llvm;
+
+AMDGPURegisterInfo::AMDGPURegisterInfo() : AMDGPUGenRegisterInfo(0) {}
+
+//===----------------------------------------------------------------------===//
+// Function handling callbacks - Functions are a seldom used feature of GPUS, so
+// they are not supported at this time.
+//===----------------------------------------------------------------------===//
+
+const MCPhysReg AMDGPURegisterInfo::CalleeSavedReg = AMDGPU::NoRegister;
+
+const MCPhysReg*
+AMDGPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  return &CalleeSavedReg;
+}
+
+void AMDGPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
+                                             int SPAdj,
+                                             unsigned FIOperandNum,
+                                             RegScavenger *RS) const {
+  llvm_unreachable("Subroutines not supported yet");
+}
+
+unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+  return AMDGPU::NoRegister;
+}
+
+unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel) const {
+  static const unsigned SubRegs[] = {
+    AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4,
+    AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, AMDGPU::sub8, AMDGPU::sub9,
+    AMDGPU::sub10, AMDGPU::sub11, AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14,
+    AMDGPU::sub15
+  };
+
+  assert(Channel < array_lengthof(SubRegs));
+  return SubRegs[Channel];
+}
+
+unsigned AMDGPURegisterInfo::getIndirectSubReg(unsigned IndirectIndex) const {
+
+  return getSubRegFromChannel(IndirectIndex);
+}
+
+#define GET_REGINFO_TARGET_DESC
+#include "AMDGPUGenRegisterInfo.inc"
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h
new file mode 100644
index 0000000..cfd800b
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h
@@ -0,0 +1,64 @@
+//===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief TargetRegisterInfo interface that is implemented by all hw codegen
+/// targets.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_R600_AMDGPUREGISTERINFO_H
+#define LLVM_LIB_TARGET_R600_AMDGPUREGISTERINFO_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#define GET_REGINFO_ENUM
+#include "AMDGPUGenRegisterInfo.inc"
+
+namespace llvm {
+
+class AMDGPUSubtarget;
+class TargetInstrInfo;
+
+struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
+  static const MCPhysReg CalleeSavedReg;
+
+  AMDGPURegisterInfo();
+
+  BitVector getReservedRegs(const MachineFunction &MF) const override {
+    assert(!"Unimplemented");  return BitVector();
+  }
+
+  virtual const TargetRegisterClass* getCFGStructurizerRegClass(MVT VT) const {
+    assert(!"Unimplemented"); return nullptr;
+  }
+
+  virtual unsigned getHWRegIndex(unsigned Reg) const {
+    assert(!"Unimplemented"); return 0;
+  }
+
+  /// \returns the sub reg enum value for the given \p Channel
+  /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0)
+  unsigned getSubRegFromChannel(unsigned Channel) const;
+
+  const MCPhysReg* getCalleeSavedRegs(const MachineFunction *MF) const override;
+  void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
+                           unsigned FIOperandNum,
+                           RegScavenger *RS) const override;
+  unsigned getFrameRegister(const MachineFunction &MF) const override;
+
+  unsigned getIndirectSubReg(unsigned IndirectIndex) const;
+
+};
+
+} // End namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.td
new file mode 100644
index 0000000..835a146
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.td
@@ -0,0 +1,26 @@
+//===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Tablegen register definitions common to all hw codegen targets.
+//
+//===----------------------------------------------------------------------===//
+
+let Namespace = "AMDGPU" in {
+
+foreach Index = 0-15 in {
+  // Indices are used in a variety of ways here, so don't set a size/offset.
+  def sub#Index : SubRegIndex<-1, -1>;
+}
+
+def INDIRECT_BASE_ADDR : Register <"INDIRECT_BASE_ADDR">;
+
+}
+
+include "R600RegisterInfo.td"
+include "SIRegisterInfo.td"
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
new file mode 100644
index 0000000..605ccd0
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -0,0 +1,133 @@
+//===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Implements the AMDGPU specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUSubtarget.h"
+#include "R600ISelLowering.h"
+#include "R600InstrInfo.h"
+#include "R600MachineScheduler.h"
+#include "SIISelLowering.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-subtarget"
+
+#define GET_SUBTARGETINFO_ENUM
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "AMDGPUGenSubtargetInfo.inc"
+
+AMDGPUSubtarget &
+AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
+                                                 StringRef GPU, StringRef FS) {
+  // Determine default and user-specified characteristics
+  // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
+  // enabled, but some instructions do not respect them and they run at the
+  // double precision rate, so don't enable by default.
+  //
+  // We want to be able to turn these off, but making this a subtarget feature
+  // for SI has the unhelpful behavior that it unsets everything else if you
+  // disable it.
+
+  SmallString<256> FullFS("+promote-alloca,+fp64-denormals,");
+  FullFS += FS;
+
+  if (GPU == "" && TT.getArch() == Triple::amdgcn)
+    GPU = "SI";
+
+  ParseSubtargetFeatures(GPU, FullFS);
+
+  // FIXME: I don't think think Evergreen has any useful support for
+  // denormals, but should be checked. Should we issue a warning somewhere
+  // if someone tries to enable these?
+  if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
+    FP32Denormals = false;
+    FP64Denormals = false;
+  }
+  return *this;
+}
+
+AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
+                                 TargetMachine &TM)
+    : AMDGPUGenSubtargetInfo(TT, GPU, FS), DevName(GPU), Is64bit(false),
+      DumpCode(false), R600ALUInst(false), HasVertexCache(false),
+      TexVTXClauseSize(0), Gen(AMDGPUSubtarget::R600), FP64(false),
+      FP64Denormals(false), FP32Denormals(false), FastFMAF32(false),
+      CaymanISA(false), FlatAddressSpace(false), EnableIRStructurizer(true),
+      EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false),
+      WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
+      EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false),
+      GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0),
+      FrameLowering(TargetFrameLowering::StackGrowsUp,
+                    64 * 16, // Maximum stack alignment (long16)
+                    0),
+      InstrItins(getInstrItineraryForCPU(GPU)), TargetTriple(TT) {
+
+  initializeSubtargetDependencies(TT, GPU, FS);
+
+  if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
+    InstrInfo.reset(new R600InstrInfo(*this));
+    TLInfo.reset(new R600TargetLowering(TM, *this));
+  } else {
+    InstrInfo.reset(new SIInstrInfo(*this));
+    TLInfo.reset(new SITargetLowering(TM, *this));
+  }
+}
+
+unsigned AMDGPUSubtarget::getStackEntrySize() const {
+  assert(getGeneration() <= NORTHERN_ISLANDS);
+  switch(getWavefrontSize()) {
+  case 16:
+    return 8;
+  case 32:
+    return hasCaymanISA() ? 4 : 8;
+  case 64:
+    return 4;
+  default:
+    llvm_unreachable("Illegal wavefront size.");
+  }
+}
+
+unsigned AMDGPUSubtarget::getAmdKernelCodeChipID() const {
+  switch(getGeneration()) {
+  default: llvm_unreachable("ChipID unknown");
+  case SEA_ISLANDS: return 12;
+  }
+}
+
+bool AMDGPUSubtarget::isVGPRSpillingEnabled(
+                                       const SIMachineFunctionInfo *MFI) const {
+  return MFI->getShaderType() == ShaderType::COMPUTE || EnableVGPRSpilling;
+}
+
+void AMDGPUSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
+                                          MachineInstr *begin,
+                                          MachineInstr *end,
+                                          unsigned NumRegionInstrs) const {
+  if (getGeneration() >= SOUTHERN_ISLANDS) {
+
+    // Track register pressure so the scheduler can try to decrease
+    // pressure once register usage is above the threshold defined by
+    // SIRegisterInfo::getRegPressureSetLimit()
+    Policy.ShouldTrackPressure = true;
+
+    // Enabling both top down and bottom up scheduling seems to give us less
+    // register spills than just using one of these approaches on its own.
+    Policy.OnlyTopDown = false;
+    Policy.OnlyBottomUp = false;
+  }
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
new file mode 100644
index 0000000..0d40d14
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -0,0 +1,282 @@
+//=====-- AMDGPUSubtarget.h - Define Subtarget for the AMDIL ---*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief AMDGPU specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_R600_AMDGPUSUBTARGET_H
+#define LLVM_LIB_TARGET_R600_AMDGPUSUBTARGET_H
+#include "AMDGPU.h"
+#include "AMDGPUFrameLowering.h"
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPUIntrinsicInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "R600ISelLowering.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define GET_SUBTARGETINFO_HEADER
+#include "AMDGPUGenSubtargetInfo.inc"
+
+namespace llvm {
+
+class SIMachineFunctionInfo;
+
+class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
+
+public:
+  enum Generation {
+    R600 = 0,
+    R700,
+    EVERGREEN,
+    NORTHERN_ISLANDS,
+    SOUTHERN_ISLANDS,
+    SEA_ISLANDS,
+    VOLCANIC_ISLANDS,
+  };
+
+  enum {
+    FIXED_SGPR_COUNT_FOR_INIT_BUG = 80
+  };
+
+private:
+  std::string DevName;
+  bool Is64bit;
+  bool DumpCode;
+  bool R600ALUInst;
+  bool HasVertexCache;
+  short TexVTXClauseSize;
+  Generation Gen;
+  bool FP64;
+  bool FP64Denormals;
+  bool FP32Denormals;
+  bool FastFMAF32;
+  bool CaymanISA;
+  bool FlatAddressSpace;
+  bool EnableIRStructurizer;
+  bool EnablePromoteAlloca;
+  bool EnableIfCvt;
+  bool EnableLoadStoreOpt;
+  unsigned WavefrontSize;
+  bool CFALUBug;
+  int LocalMemorySize;
+  bool EnableVGPRSpilling;
+  bool SGPRInitBug;
+  bool IsGCN;
+  bool GCN1Encoding;
+  bool GCN3Encoding;
+  bool CIInsts;
+  bool FeatureDisable;
+  int LDSBankCount;
+
+  AMDGPUFrameLowering FrameLowering;
+  std::unique_ptr<AMDGPUTargetLowering> TLInfo;
+  std::unique_ptr<AMDGPUInstrInfo> InstrInfo;
+  InstrItineraryData InstrItins;
+  Triple TargetTriple;
+
+public:
+  AMDGPUSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
+                  TargetMachine &TM);
+  AMDGPUSubtarget &initializeSubtargetDependencies(const Triple &TT,
+                                                   StringRef GPU, StringRef FS);
+
+  const AMDGPUFrameLowering *getFrameLowering() const override {
+    return &FrameLowering;
+  }
+  const AMDGPUInstrInfo *getInstrInfo() const override {
+    return InstrInfo.get();
+  }
+  const AMDGPURegisterInfo *getRegisterInfo() const override {
+    return &InstrInfo->getRegisterInfo();
+  }
+  AMDGPUTargetLowering *getTargetLowering() const override {
+    return TLInfo.get();
+  }
+  const InstrItineraryData *getInstrItineraryData() const override {
+    return &InstrItins;
+  }
+
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+  bool is64bit() const {
+    return Is64bit;
+  }
+
+  bool hasVertexCache() const {
+    return HasVertexCache;
+  }
+
+  short getTexVTXClauseSize() const {
+    return TexVTXClauseSize;
+  }
+
+  Generation getGeneration() const {
+    return Gen;
+  }
+
+  bool hasHWFP64() const {
+    return FP64;
+  }
+
+  bool hasCaymanISA() const {
+    return CaymanISA;
+  }
+
+  bool hasFP32Denormals() const {
+    return FP32Denormals;
+  }
+
+  bool hasFP64Denormals() const {
+    return FP64Denormals;
+  }
+
+  bool hasFastFMAF32() const {
+    return FastFMAF32;
+  }
+
+  bool hasFlatAddressSpace() const {
+    return FlatAddressSpace;
+  }
+
+  bool hasBFE() const {
+    return (getGeneration() >= EVERGREEN);
+  }
+
+  bool hasBFI() const {
+    return (getGeneration() >= EVERGREEN);
+  }
+
+  bool hasBFM() const {
+    return hasBFE();
+  }
+
+  bool hasBCNT(unsigned Size) const {
+    if (Size == 32)
+      return (getGeneration() >= EVERGREEN);
+
+    if (Size == 64)
+      return (getGeneration() >= SOUTHERN_ISLANDS);
+
+    return false;
+  }
+
+  bool hasMulU24() const {
+    return (getGeneration() >= EVERGREEN);
+  }
+
+  bool hasMulI24() const {
+    return (getGeneration() >= SOUTHERN_ISLANDS ||
+            hasCaymanISA());
+  }
+
+  bool hasFFBL() const {
+    return (getGeneration() >= EVERGREEN);
+  }
+
+  bool hasFFBH() const {
+    return (getGeneration() >= EVERGREEN);
+  }
+
+  bool hasCARRY() const {
+    return (getGeneration() >= EVERGREEN);
+  }
+
+  bool hasBORROW() const {
+    return (getGeneration() >= EVERGREEN);
+  }
+
+  bool IsIRStructurizerEnabled() const {
+    return EnableIRStructurizer;
+  }
+
+  bool isPromoteAllocaEnabled() const {
+    return EnablePromoteAlloca;
+  }
+
+  bool isIfCvtEnabled() const {
+    return EnableIfCvt;
+  }
+
+  bool loadStoreOptEnabled() const {
+    return EnableLoadStoreOpt;
+  }
+
+  unsigned getWavefrontSize() const {
+    return WavefrontSize;
+  }
+
+  unsigned getStackEntrySize() const;
+
+  bool hasCFAluBug() const {
+    assert(getGeneration() <= NORTHERN_ISLANDS);
+    return CFALUBug;
+  }
+
+  int getLocalMemorySize() const {
+    return LocalMemorySize;
+  }
+
+  bool hasSGPRInitBug() const {
+    return SGPRInitBug;
+  }
+
+  int getLDSBankCount() const {
+    return LDSBankCount;
+  }
+
+  unsigned getAmdKernelCodeChipID() const;
+
+  bool enableMachineScheduler() const override {
+    return true;
+  }
+
+  void overrideSchedPolicy(MachineSchedPolicy &Policy,
+                           MachineInstr *begin, MachineInstr *end,
+                           unsigned NumRegionInstrs) const override;
+
+  // Helper functions to simplify if statements
+  bool isTargetELF() const {
+    return false;
+  }
+
+  StringRef getDeviceName() const {
+    return DevName;
+  }
+
+  bool dumpCode() const {
+    return DumpCode;
+  }
+  bool r600ALUEncoding() const {
+    return R600ALUInst;
+  }
+  bool isAmdHsaOS() const {
+    return TargetTriple.getOS() == Triple::AMDHSA;
+  }
+  bool isVGPRSpillingEnabled(const SIMachineFunctionInfo *MFI) const;
+
+  unsigned getMaxWavesPerCU() const {
+    if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
+      return 10;
+
+    // FIXME: Not sure what this is for other subtagets.
+    llvm_unreachable("do not know max waves per CU for this subtarget.");
+  }
+
+  bool enableSubRegLiveness() const override {
+    return true;
+  }
+};
+
+} // End namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
new file mode 100644
index 0000000..a9a911a
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -0,0 +1,292 @@
+//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief The AMDGPU target machine contains all of the hardware specific
+/// information  needed to emit code for R600 and SI GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUTargetMachine.h"
+#include "AMDGPU.h"
+#include "AMDGPUTargetTransformInfo.h"
+#include "R600ISelLowering.h"
+#include "R600InstrInfo.h"
+#include "R600MachineScheduler.h"
+#include "SIISelLowering.h"
+#include "SIInstrInfo.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_os_ostream.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Scalar.h"
+#include <llvm/CodeGen/Passes.h>
+
+using namespace llvm;
+
+extern "C" void LLVMInitializeAMDGPUTarget() {
+  // Register the target
+  RegisterTargetMachine<R600TargetMachine> X(TheAMDGPUTarget);
+  RegisterTargetMachine<GCNTargetMachine> Y(TheGCNTarget);
+}
+
+static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
+  return new ScheduleDAGMILive(C, make_unique<R600SchedStrategy>());
+}
+
+static MachineSchedRegistry
+SchedCustomRegistry("r600", "Run R600's custom scheduler",
+                    createR600MachineScheduler);
+
+static std::string computeDataLayout(const Triple &TT) {
+  std::string Ret = "e-p:32:32";
+
+  if (TT.getArch() == Triple::amdgcn) {
+    // 32-bit private, local, and region pointers. 64-bit global and constant.
+    Ret += "-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64";
+  }
+
+  Ret += "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256"
+         "-v512:512-v1024:1024-v2048:2048-n32:64";
+
+  return Ret;
+}
+
+AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
+                                         StringRef CPU, StringRef FS,
+                                         TargetOptions Options, Reloc::Model RM,
+                                         CodeModel::Model CM,
+                                         CodeGenOpt::Level OptLevel)
+    : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, RM, CM,
+                        OptLevel),
+      TLOF(new TargetLoweringObjectFileELF()), Subtarget(TT, CPU, FS, *this),
+      IntrinsicInfo() {
+  setRequiresStructuredCFG(true);
+  initAsmInfo();
+}
+
+AMDGPUTargetMachine::~AMDGPUTargetMachine() {
+  delete TLOF;
+}
+
+//===----------------------------------------------------------------------===//
+// R600 Target Machine (R600 -> Cayman)
+//===----------------------------------------------------------------------===//
+
+R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT,
+                                     StringRef FS, StringRef CPU,
+                                     TargetOptions Options, Reloc::Model RM,
+                                     CodeModel::Model CM, CodeGenOpt::Level OL)
+    : AMDGPUTargetMachine(T, TT, FS, CPU, Options, RM, CM, OL) {}
+
+//===----------------------------------------------------------------------===//
+// GCN Target Machine (SI+)
+//===----------------------------------------------------------------------===//
+
+GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT,
+                                   StringRef FS, StringRef CPU,
+                                   TargetOptions Options, Reloc::Model RM,
+                                   CodeModel::Model CM, CodeGenOpt::Level OL)
+    : AMDGPUTargetMachine(T, TT, FS, CPU, Options, RM, CM, OL) {}
+
+//===----------------------------------------------------------------------===//
+// AMDGPU Pass Setup
+//===----------------------------------------------------------------------===//
+
+namespace {
+class AMDGPUPassConfig : public TargetPassConfig {
+public:
+  AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM)
+    : TargetPassConfig(TM, PM) {}
+
+  AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
+    return getTM<AMDGPUTargetMachine>();
+  }
+
+  ScheduleDAGInstrs *
+  createMachineScheduler(MachineSchedContext *C) const override {
+    const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
+    if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
+      return createR600MachineScheduler(C);
+    return nullptr;
+  }
+
+  void addIRPasses() override;
+  void addCodeGenPrepare() override;
+  virtual bool addPreISel() override;
+  virtual bool addInstSelector() override;
+};
+
+class R600PassConfig : public AMDGPUPassConfig {
+public:
+  R600PassConfig(TargetMachine *TM, PassManagerBase &PM)
+    : AMDGPUPassConfig(TM, PM) { }
+
+  bool addPreISel() override;
+  void addPreRegAlloc() override;
+  void addPreSched2() override;
+  void addPreEmitPass() override;
+};
+
+class GCNPassConfig : public AMDGPUPassConfig {
+public:
+  GCNPassConfig(TargetMachine *TM, PassManagerBase &PM)
+    : AMDGPUPassConfig(TM, PM) { }
+  bool addPreISel() override;
+  bool addInstSelector() override;
+  void addPreRegAlloc() override;
+  void addPostRegAlloc() override;
+  void addPreSched2() override;
+  void addPreEmitPass() override;
+};
+
+} // End of anonymous namespace
+
+TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() {
+  return TargetIRAnalysis(
+      [this](Function &F) { return TargetTransformInfo(AMDGPUTTIImpl(this)); });
+}
+
+void AMDGPUPassConfig::addIRPasses() {
+  // Function calls are not supported, so make sure we inline everything.
+  addPass(createAMDGPUAlwaysInlinePass());
+  addPass(createAlwaysInlinerPass());
+  // We need to add the barrier noop pass, otherwise adding the function
+  // inlining pass will cause all of the PassConfigs passes to be run
+  // one function at a time, which means if we have a nodule with two
+  // functions, then we will generate code for the first function
+  // without ever running any passes on the second.
+  addPass(createBarrierNoopPass());
+  TargetPassConfig::addIRPasses();
+}
+
+void AMDGPUPassConfig::addCodeGenPrepare() {
+  const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
+  if (ST.isPromoteAllocaEnabled()) {
+    addPass(createAMDGPUPromoteAlloca(ST));
+    addPass(createSROAPass());
+  }
+  TargetPassConfig::addCodeGenPrepare();
+}
+
+bool
+AMDGPUPassConfig::addPreISel() {
+  const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
+  addPass(createFlattenCFGPass());
+  if (ST.IsIRStructurizerEnabled())
+    addPass(createStructurizeCFGPass());
+  return false;
+}
+
+bool AMDGPUPassConfig::addInstSelector() {
+  addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+// R600 Pass Setup
+//===----------------------------------------------------------------------===//
+
+bool R600PassConfig::addPreISel() {
+  AMDGPUPassConfig::addPreISel();
+  addPass(createR600TextureIntrinsicsReplacer());
+  return false;
+}
+
+void R600PassConfig::addPreRegAlloc() {
+  addPass(createR600VectorRegMerger(*TM));
+}
+
+void R600PassConfig::addPreSched2() {
+  const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
+  addPass(createR600EmitClauseMarkers(), false);
+  if (ST.isIfCvtEnabled())
+    addPass(&IfConverterID, false);
+  addPass(createR600ClauseMergePass(*TM), false);
+}
+
+void R600PassConfig::addPreEmitPass() {
+  addPass(createAMDGPUCFGStructurizerPass(), false);
+  addPass(createR600ExpandSpecialInstrsPass(*TM), false);
+  addPass(&FinalizeMachineBundlesID, false);
+  addPass(createR600Packetizer(*TM), false);
+  addPass(createR600ControlFlowFinalizer(*TM), false);
+}
+
+TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) {
+  return new R600PassConfig(this, PM);
+}
+
+//===----------------------------------------------------------------------===//
+// GCN Pass Setup
+//===----------------------------------------------------------------------===//
+
+bool GCNPassConfig::addPreISel() {
+  AMDGPUPassConfig::addPreISel();
+  addPass(createSinkingPass());
+  addPass(createSITypeRewriter());
+  addPass(createSIAnnotateControlFlowPass());
+  return false;
+}
+
+bool GCNPassConfig::addInstSelector() {
+  AMDGPUPassConfig::addInstSelector();
+  addPass(createSILowerI1CopiesPass());
+  addPass(createSIFixSGPRCopiesPass(*TM));
+  addPass(createSIFoldOperandsPass());
+  return false;
+}
+
+void GCNPassConfig::addPreRegAlloc() {
+  const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
+
+  // This needs to be run directly before register allocation because
+  // earlier passes might recompute live intervals.
+  // TODO: handle CodeGenOpt::None; fast RA ignores spill weights set by the pass
+  if (getOptLevel() > CodeGenOpt::None) {
+    initializeSIFixControlFlowLiveIntervalsPass(*PassRegistry::getPassRegistry());
+    insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID);
+  }
+
+  if (getOptLevel() > CodeGenOpt::None && ST.loadStoreOptEnabled()) {
+    // Don't do this with no optimizations since it throws away debug info by
+    // merging nonadjacent loads.
+
+    // This should be run after scheduling, but before register allocation. It
+    // also need extra copies to the address operand to be eliminated.
+    initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry());
+    insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID);
+  }
+  addPass(createSIShrinkInstructionsPass(), false);
+  addPass(createSIFixSGPRLiveRangesPass(), false);
+}
+
+void GCNPassConfig::addPostRegAlloc() {
+  addPass(createSIPrepareScratchRegs(), false);
+  addPass(createSIShrinkInstructionsPass(), false);
+}
+
+void GCNPassConfig::addPreSched2() {
+  addPass(createSIInsertWaits(*TM), false);
+}
+
+void GCNPassConfig::addPreEmitPass() {
+  addPass(createSILowerControlFlowPass(*TM), false);
+}
+
+TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) {
+  return new GCNPassConfig(this, PM);
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
new file mode 100644
index 0000000..14792e3
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -0,0 +1,89 @@
+//===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief The AMDGPU TargetMachine interface definition for hw codgen targets.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_R600_AMDGPUTARGETMACHINE_H
+#define LLVM_LIB_TARGET_R600_AMDGPUTARGETMACHINE_H
+
+#include "AMDGPUFrameLowering.h"
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPUIntrinsicInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "R600ISelLowering.h"
+#include "llvm/IR/DataLayout.h"
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+// AMDGPU Target Machine (R600+)
+//===----------------------------------------------------------------------===//
+
+class AMDGPUTargetMachine : public LLVMTargetMachine {
+private:
+
+protected:
+  TargetLoweringObjectFile *TLOF;
+  AMDGPUSubtarget Subtarget;
+  AMDGPUIntrinsicInfo IntrinsicInfo;
+
+public:
+  AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef FS,
+                      StringRef CPU, TargetOptions Options, Reloc::Model RM,
+                      CodeModel::Model CM, CodeGenOpt::Level OL);
+  ~AMDGPUTargetMachine();
+
+  const AMDGPUSubtarget *getSubtargetImpl() const { return &Subtarget; }
+  const AMDGPUSubtarget *getSubtargetImpl(const Function &) const override {
+    return &Subtarget;
+  }
+  const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override {
+    return &IntrinsicInfo;
+  }
+  TargetIRAnalysis getTargetIRAnalysis() override;
+
+  TargetLoweringObjectFile *getObjFileLowering() const override {
+    return TLOF;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// R600 Target Machine (R600 -> Cayman)
+//===----------------------------------------------------------------------===//
+
+class R600TargetMachine : public AMDGPUTargetMachine {
+
+public:
+  R600TargetMachine(const Target &T, const Triple &TT, StringRef FS,
+                    StringRef CPU, TargetOptions Options, Reloc::Model RM,
+                    CodeModel::Model CM, CodeGenOpt::Level OL);
+
+  TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+};
+
+//===----------------------------------------------------------------------===//
+// GCN Target Machine (SI+)
+//===----------------------------------------------------------------------===//
+
+class GCNTargetMachine : public AMDGPUTargetMachine {
+
+public:
+  GCNTargetMachine(const Target &T, const Triple &TT, StringRef FS,
+                   StringRef CPU, TargetOptions Options, Reloc::Model RM,
+                   CodeModel::Model CM, CodeGenOpt::Level OL);
+
+  TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+};
+
+} // End namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
new file mode 100644
index 0000000..6dacc74
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -0,0 +1,82 @@
+//===-- AMDGPUTargetTransformInfo.cpp - AMDGPU specific TTI pass ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// This file implements a TargetTransformInfo analysis pass specific to the
+// AMDGPU target machine. It uses the target's detailed information to provide
+// more precise answers to certain TTI queries, while letting the target
+// independent and default TTI implementations handle the rest.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUTargetTransformInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/CostTable.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "AMDGPUtti"
+
+void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L,
+                                            TTI::UnrollingPreferences &UP) {
+  UP.Threshold = 300; // Twice the default.
+  UP.MaxCount = UINT_MAX;
+  UP.Partial = true;
+
+  // TODO: Do we want runtime unrolling?
+
+  for (const BasicBlock *BB : L->getBlocks()) {
+    const DataLayout &DL = BB->getModule()->getDataLayout();
+    for (const Instruction &I : *BB) {
+      const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I);
+      if (!GEP || GEP->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
+        continue;
+
+      const Value *Ptr = GEP->getPointerOperand();
+      const AllocaInst *Alloca =
+          dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr, DL));
+      if (Alloca) {
+        // We want to do whatever we can to limit the number of alloca
+        // instructions that make it through to the code generator.  allocas
+        // require us to use indirect addressing, which is slow and prone to
+        // compiler bugs.  If this loop does an address calculation on an
+        // alloca ptr, then we want to use a higher than normal loop unroll
+        // threshold. This will give SROA a better chance to eliminate these
+        // allocas.
+        //
+        // Don't use the maximum allowed value here as it will make some
+        // programs way too big.
+        UP.Threshold = 800;
+      }
+    }
+  }
+}
+
+unsigned AMDGPUTTIImpl::getNumberOfRegisters(bool Vec) {
+  if (Vec)
+    return 0;
+
+  // Number of VGPRs on SI.
+  if (ST->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
+    return 256;
+
+  return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
+}
+
+unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool) { return 32; }
+
+unsigned AMDGPUTTIImpl::getMaxInterleaveFactor(unsigned VF) {
+  // Semi-arbitrary large amount.
+  return 64;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
new file mode 100644
index 0000000..791c84e
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -0,0 +1,78 @@
+//===-- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file a TargetTransformInfo::Concept conforming object specific to the
+/// AMDGPU target machine. It uses the target's detailed information to
+/// provide more precise answers to certain TTI queries, while letting the
+/// target independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_R600_AMDGPUTARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_R600_AMDGPUTARGETTRANSFORMINFO_H
+
+#include "AMDGPU.h"
+#include "AMDGPUTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+
+class AMDGPUTTIImpl : public BasicTTIImplBase<AMDGPUTTIImpl> {
+  typedef BasicTTIImplBase<AMDGPUTTIImpl> BaseT;
+  typedef TargetTransformInfo TTI;
+  friend BaseT;
+
+  const AMDGPUSubtarget *ST;
+  const AMDGPUTargetLowering *TLI;
+
+  const AMDGPUSubtarget *getST() const { return ST; }
+  const AMDGPUTargetLowering *getTLI() const { return TLI; }
+
+public:
+  explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM)
+      : BaseT(TM), ST(TM->getSubtargetImpl()), TLI(ST->getTargetLowering()) {}
+
+  // Provide value semantics. MSVC requires that we spell all of these out.
+  AMDGPUTTIImpl(const AMDGPUTTIImpl &Arg)
+      : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
+  AMDGPUTTIImpl(AMDGPUTTIImpl &&Arg)
+      : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
+        TLI(std::move(Arg.TLI)) {}
+  AMDGPUTTIImpl &operator=(const AMDGPUTTIImpl &RHS) {
+    BaseT::operator=(static_cast<const BaseT &>(RHS));
+    ST = RHS.ST;
+    TLI = RHS.TLI;
+    return *this;
+  }
+  AMDGPUTTIImpl &operator=(AMDGPUTTIImpl &&RHS) {
+    BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
+    ST = std::move(RHS.ST);
+    TLI = std::move(RHS.TLI);
+    return *this;
+  }
+
+  bool hasBranchDivergence() { return true; }
+
+  void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
+
+  TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
+    assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
+    return ST->hasBCNT(TyWidth) ? TTI::PSK_FastHardware : TTI::PSK_Software;
+  }
+
+  unsigned getNumberOfRegisters(bool Vector);
+  unsigned getRegisterBitWidth(bool Vector);
+  unsigned getMaxInterleaveFactor(unsigned VF);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
new file mode 100644
index 0000000..c9b25a1
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
@@ -0,0 +1,1912 @@
+//===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//==-----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "R600InstrInfo.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <deque>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "structcfg"
+
+#define DEFAULT_VEC_SLOTS 8
+
+// TODO: move-begin.
+
+//===----------------------------------------------------------------------===//
+//
+// Statistics for CFGStructurizer.
+//
+//===----------------------------------------------------------------------===//
+
+STATISTIC(numSerialPatternMatch,    "CFGStructurizer number of serial pattern "
+    "matched");
+STATISTIC(numIfPatternMatch,        "CFGStructurizer number of if pattern "
+    "matched");
+STATISTIC(numLoopcontPatternMatch,  "CFGStructurizer number of loop-continue "
+    "pattern matched");
+STATISTIC(numClonedBlock,           "CFGStructurizer cloned blocks");
+STATISTIC(numClonedInstr,           "CFGStructurizer cloned instructions");
+
+namespace llvm {
+  void initializeAMDGPUCFGStructurizerPass(PassRegistry&);
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Miscellaneous utility for CFGStructurizer.
+//
+//===----------------------------------------------------------------------===//
+namespace {
+#define SHOWNEWINSTR(i) \
+  DEBUG(dbgs() << "New instr: " << *i << "\n");
+
+#define SHOWNEWBLK(b, msg) \
+DEBUG( \
+  dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
+  dbgs() << "\n"; \
+);
+
+#define SHOWBLK_DETAIL(b, msg) \
+DEBUG( \
+  if (b) { \
+  dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
+  b->print(dbgs()); \
+  dbgs() << "\n"; \
+  } \
+);
+
+#define INVALIDSCCNUM -1
+
+template<class NodeT>
+void ReverseVector(SmallVectorImpl<NodeT *> &Src) {
+  size_t sz = Src.size();
+  for (size_t i = 0; i < sz/2; ++i) {
+    NodeT *t = Src[i];
+    Src[i] = Src[sz - i - 1];
+    Src[sz - i - 1] = t;
+  }
+}
+
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+//
+// supporting data structure for CFGStructurizer
+//
+//===----------------------------------------------------------------------===//
+
+
+namespace {
+
+class BlockInformation {
+public:
+  bool IsRetired;
+  int  SccNum;
+  BlockInformation() : IsRetired(false), SccNum(INVALIDSCCNUM) {}
+};
+
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+//
+// CFGStructurizer
+//
+//===----------------------------------------------------------------------===//
+
+namespace {
+class AMDGPUCFGStructurizer : public MachineFunctionPass {
+public:
+  typedef SmallVector<MachineBasicBlock *, 32> MBBVector;
+  typedef std::map<MachineBasicBlock *, BlockInformation *> MBBInfoMap;
+  typedef std::map<MachineLoop *, MachineBasicBlock *> LoopLandInfoMap;
+
+  enum PathToKind {
+    Not_SinglePath = 0,
+    SinglePath_InPath = 1,
+    SinglePath_NotInPath = 2
+  };
+
+  static char ID;
+
+  AMDGPUCFGStructurizer() :
+      MachineFunctionPass(ID), TII(nullptr), TRI(nullptr) {
+    initializeAMDGPUCFGStructurizerPass(*PassRegistry::getPassRegistry());
+  }
+
+   const char *getPassName() const override {
+    return "AMDGPU Control Flow Graph structurizer Pass";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addPreserved<MachineFunctionAnalysis>();
+    AU.addRequired<MachineFunctionAnalysis>();
+    AU.addRequired<MachineDominatorTree>();
+    AU.addRequired<MachinePostDominatorTree>();
+    AU.addRequired<MachineLoopInfo>();
+  }
+
+  /// Perform the CFG structurization
+  bool run();
+
+  /// Perform the CFG preparation
+  /// This step will remove every unconditionnal/dead jump instructions and make
+  /// sure all loops have an exit block
+  bool prepare();
+
+  bool runOnMachineFunction(MachineFunction &MF) override {
+    TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
+    TRI = &TII->getRegisterInfo();
+    DEBUG(MF.dump(););
+    OrderedBlks.clear();
+    Visited.clear();
+    FuncRep = &MF;
+    MLI = &getAnalysis<MachineLoopInfo>();
+    DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI););
+    MDT = &getAnalysis<MachineDominatorTree>();
+    DEBUG(MDT->print(dbgs(), (const llvm::Module*)nullptr););
+    PDT = &getAnalysis<MachinePostDominatorTree>();
+    DEBUG(PDT->print(dbgs()););
+    prepare();
+    run();
+    DEBUG(MF.dump(););
+    return true;
+  }
+
+protected:
+  MachineDominatorTree *MDT;
+  MachinePostDominatorTree *PDT;
+  MachineLoopInfo *MLI;
+  const R600InstrInfo *TII;
+  const AMDGPURegisterInfo *TRI;
+
+  // PRINT FUNCTIONS
+  /// Print the ordered Blocks.
+  void printOrderedBlocks() const {
+    size_t i = 0;
+    for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(),
+        iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) {
+      dbgs() << "BB" << (*iterBlk)->getNumber();
+      dbgs() << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
+      if (i != 0 && i % 10 == 0) {
+        dbgs() << "\n";
+      } else {
+        dbgs() << " ";
+      }
+    }
+  }
+  static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) {
+    for (MachineLoop::iterator iter = LoopInfo.begin(),
+         iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) {
+      (*iter)->print(dbgs(), 0);
+    }
+  }
+
+  // UTILITY FUNCTIONS
+  int getSCCNum(MachineBasicBlock *MBB) const;
+  MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const;
+  bool hasBackEdge(MachineBasicBlock *MBB) const;
+  static unsigned getLoopDepth(MachineLoop *LoopRep);
+  bool isRetiredBlock(MachineBasicBlock *MBB) const;
+  bool isActiveLoophead(MachineBasicBlock *MBB) const;
+  PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
+      bool AllowSideEntry = true) const;
+  int countActiveBlock(MBBVector::const_iterator It,
+      MBBVector::const_iterator E) const;
+  bool needMigrateBlock(MachineBasicBlock *MBB) const;
+
+  // Utility Functions
+  void reversePredicateSetter(MachineBasicBlock::iterator I);
+  /// Compute the reversed DFS post order of Blocks
+  void orderBlocks(MachineFunction *MF);
+
+  // Function originally from CFGStructTraits
+  void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode,
+      DebugLoc DL = DebugLoc());
+  MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode,
+    DebugLoc DL = DebugLoc());
+  MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode);
+  void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode,
+      DebugLoc DL);
+  void insertCondBranchBefore(MachineBasicBlock *MBB,
+      MachineBasicBlock::iterator I, int NewOpcode, int RegNum,
+      DebugLoc DL);
+  void insertCondBranchEnd(MachineBasicBlock *MBB, int NewOpcode, int RegNum);
+  static int getBranchNzeroOpcode(int OldOpcode);
+  static int getBranchZeroOpcode(int OldOpcode);
+  static int getContinueNzeroOpcode(int OldOpcode);
+  static int getContinueZeroOpcode(int OldOpcode);
+  static MachineBasicBlock *getTrueBranch(MachineInstr *MI);
+  static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB);
+  static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB,
+      MachineInstr *MI);
+  static bool isCondBranch(MachineInstr *MI);
+  static bool isUncondBranch(MachineInstr *MI);
+  static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB);
+  static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB);
+  /// The correct naming for this is getPossibleLoopendBlockBranchInstr.
+  ///
+  /// BB with backward-edge could have move instructions after the branch
+  /// instruction.  Such move instruction "belong to" the loop backward-edge.
+  MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB);
+  static MachineInstr *getReturnInstr(MachineBasicBlock *MBB);
+  static MachineInstr *getContinueInstr(MachineBasicBlock *MBB);
+  static bool isReturnBlock(MachineBasicBlock *MBB);
+  static void cloneSuccessorList(MachineBasicBlock *DstMBB,
+      MachineBasicBlock *SrcMBB) ;
+  static MachineBasicBlock *clone(MachineBasicBlock *MBB);
+  /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose
+  /// because the AMDGPU instruction is not recognized as terminator fix this
+  /// and retire this routine
+  void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB,
+      MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk);
+  static void wrapup(MachineBasicBlock *MBB);
+
+
+  int patternMatch(MachineBasicBlock *MBB);
+  int patternMatchGroup(MachineBasicBlock *MBB);
+  int serialPatternMatch(MachineBasicBlock *MBB);
+  int ifPatternMatch(MachineBasicBlock *MBB);
+  int loopendPatternMatch();
+  int mergeLoop(MachineLoop *LoopRep);
+  int loopcontPatternMatch(MachineLoop *LoopRep, MachineBasicBlock *LoopHeader);
+
+  void handleLoopcontBlock(MachineBasicBlock *ContingMBB,
+      MachineLoop *ContingLoop, MachineBasicBlock *ContMBB,
+      MachineLoop *ContLoop);
+  /// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in
+  /// the same loop with LoopLandInfo without explicitly keeping track of
+  /// loopContBlks and loopBreakBlks, this is a method to get the information.
+  bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB,
+      MachineBasicBlock *Src2MBB);
+  int handleJumpintoIf(MachineBasicBlock *HeadMBB,
+      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
+  int handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
+      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
+  int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
+      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
+      MachineBasicBlock **LandMBBPtr);
+  void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
+      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
+      MachineBasicBlock *LandMBB, bool Detail = false);
+  int cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
+      MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB);
+  void mergeSerialBlock(MachineBasicBlock *DstMBB,
+      MachineBasicBlock *SrcMBB);
+
+  void mergeIfthenelseBlock(MachineInstr *BranchMI,
+      MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
+      MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB);
+  void mergeLooplandBlock(MachineBasicBlock *DstMBB,
+      MachineBasicBlock *LandMBB);
+  void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
+      MachineBasicBlock *LandMBB);
+  void settleLoopcontBlock(MachineBasicBlock *ContingMBB,
+      MachineBasicBlock *ContMBB);
+  /// normalizeInfiniteLoopExit change
+  ///   B1:
+  ///        uncond_br LoopHeader
+  ///
+  /// to
+  ///   B1:
+  ///        cond_br 1 LoopHeader dummyExit
+  /// and return the newly added dummy exit block
+  MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep);
+  void removeUnconditionalBranch(MachineBasicBlock *MBB);
+  /// Remove duplicate branches instructions in a block.
+  /// For instance
+  /// B0:
+  ///    cond_br X B1 B2
+  ///    cond_br X B1 B2
+  /// is transformed to
+  /// B0:
+  ///    cond_br X B1 B2
+  void removeRedundantConditionalBranch(MachineBasicBlock *MBB);
+  void addDummyExitBlock(SmallVectorImpl<MachineBasicBlock *> &RetMBB);
+  void removeSuccessor(MachineBasicBlock *MBB);
+  MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB,
+      MachineBasicBlock *PredMBB);
+  void migrateInstruction(MachineBasicBlock *SrcMBB,
+      MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I);
+  void recordSccnum(MachineBasicBlock *MBB, int SCCNum);
+  void retireBlock(MachineBasicBlock *MBB);
+  void setLoopLandBlock(MachineLoop *LoopRep, MachineBasicBlock *MBB = nullptr);
+
+  MachineBasicBlock *findNearestCommonPostDom(std::set<MachineBasicBlock *>&);
+  /// This is work around solution for findNearestCommonDominator not available
+  /// to post dom a proper fix should go to Dominators.h.
+  MachineBasicBlock *findNearestCommonPostDom(MachineBasicBlock *MBB1,
+      MachineBasicBlock *MBB2);
+
+private:
+  MBBInfoMap BlockInfoMap;
+  LoopLandInfoMap LLInfoMap;
+  std::map<MachineLoop *, bool> Visited;
+  MachineFunction *FuncRep;
+  SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> OrderedBlks;
+};
+
+int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const {
+  MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
+  if (It == BlockInfoMap.end())
+    return INVALIDSCCNUM;
+  return (*It).second->SccNum;
+}
+
+MachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep)
+    const {
+  LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep);
+  if (It == LLInfoMap.end())
+    return nullptr;
+  return (*It).second;
+}
+
+bool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const {
+  MachineLoop *LoopRep = MLI->getLoopFor(MBB);
+  if (!LoopRep)
+    return false;
+  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
+  return MBB->isSuccessor(LoopHeader);
+}
+
+unsigned AMDGPUCFGStructurizer::getLoopDepth(MachineLoop *LoopRep) {
+  return LoopRep ? LoopRep->getLoopDepth() : 0;
+}
+
+bool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const {
+  MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
+  if (It == BlockInfoMap.end())
+    return false;
+  return (*It).second->IsRetired;
+}
+
+bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const {
+  MachineLoop *LoopRep = MLI->getLoopFor(MBB);
+  while (LoopRep && LoopRep->getHeader() == MBB) {
+    MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep);
+    if(!LoopLand)
+      return true;
+    if (!isRetiredBlock(LoopLand))
+      return true;
+    LoopRep = LoopRep->getParentLoop();
+  }
+  return false;
+}
+AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo(
+    MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
+    bool AllowSideEntry) const {
+  assert(DstMBB);
+  if (SrcMBB == DstMBB)
+    return SinglePath_InPath;
+  while (SrcMBB && SrcMBB->succ_size() == 1) {
+    SrcMBB = *SrcMBB->succ_begin();
+    if (SrcMBB == DstMBB)
+      return SinglePath_InPath;
+    if (!AllowSideEntry && SrcMBB->pred_size() > 1)
+      return Not_SinglePath;
+  }
+  if (SrcMBB && SrcMBB->succ_size()==0)
+    return SinglePath_NotInPath;
+  return Not_SinglePath;
+}
+
+int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It,
+    MBBVector::const_iterator E) const {
+  int Count = 0;
+  while (It != E) {
+    if (!isRetiredBlock(*It))
+      ++Count;
+    ++It;
+  }
+  return Count;
+}
+
+bool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const {
+  unsigned BlockSizeThreshold = 30;
+  unsigned CloneInstrThreshold = 100;
+  bool MultiplePreds = MBB && (MBB->pred_size() > 1);
+
+  if(!MultiplePreds)
+    return false;
+  unsigned BlkSize = MBB->size();
+  return ((BlkSize > BlockSizeThreshold) &&
+      (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold));
+}
+
+void AMDGPUCFGStructurizer::reversePredicateSetter(
+    MachineBasicBlock::iterator I) {
+  while (I--) {
+    if (I->getOpcode() == AMDGPU::PRED_X) {
+      switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
+      case OPCODE_IS_ZERO_INT:
+        static_cast<MachineInstr *>(I)->getOperand(2)
+            .setImm(OPCODE_IS_NOT_ZERO_INT);
+        return;
+      case OPCODE_IS_NOT_ZERO_INT:
+        static_cast<MachineInstr *>(I)->getOperand(2)
+            .setImm(OPCODE_IS_ZERO_INT);
+        return;
+      case OPCODE_IS_ZERO:
+        static_cast<MachineInstr *>(I)->getOperand(2)
+            .setImm(OPCODE_IS_NOT_ZERO);
+        return;
+      case OPCODE_IS_NOT_ZERO:
+        static_cast<MachineInstr *>(I)->getOperand(2)
+            .setImm(OPCODE_IS_ZERO);
+        return;
+      default:
+        llvm_unreachable("PRED_X Opcode invalid!");
+      }
+    }
+  }
+}
+
+void AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB,
+    int NewOpcode, DebugLoc DL) {
+ MachineInstr *MI = MBB->getParent()
+    ->CreateMachineInstr(TII->get(NewOpcode), DL);
+  MBB->push_back(MI);
+  //assume the instruction doesn't take any reg operand ...
+  SHOWNEWINSTR(MI);
+}
+
+MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB,
+    int NewOpcode, DebugLoc DL) {
+  MachineInstr *MI =
+      MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL);
+  if (MBB->begin() != MBB->end())
+    MBB->insert(MBB->begin(), MI);
+  else
+    MBB->push_back(MI);
+  SHOWNEWINSTR(MI);
+  return MI;
+}
+
+MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(
+    MachineBasicBlock::iterator I, int NewOpcode) {
+  MachineInstr *OldMI = &(*I);
+  MachineBasicBlock *MBB = OldMI->getParent();
+  MachineInstr *NewMBB =
+      MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DebugLoc());
+  MBB->insert(I, NewMBB);
+  //assume the instruction doesn't take any reg operand ...
+  SHOWNEWINSTR(NewMBB);
+  return NewMBB;
+}
+
+void AMDGPUCFGStructurizer::insertCondBranchBefore(
+    MachineBasicBlock::iterator I, int NewOpcode, DebugLoc DL) {
+  MachineInstr *OldMI = &(*I);
+  MachineBasicBlock *MBB = OldMI->getParent();
+  MachineFunction *MF = MBB->getParent();
+  MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
+  MBB->insert(I, NewMI);
+  MachineInstrBuilder MIB(*MF, NewMI);
+  MIB.addReg(OldMI->getOperand(1).getReg(), false);
+  SHOWNEWINSTR(NewMI);
+  //erase later oldInstr->eraseFromParent();
+}
+
+void AMDGPUCFGStructurizer::insertCondBranchBefore(MachineBasicBlock *blk,
+    MachineBasicBlock::iterator I, int NewOpcode, int RegNum,
+    DebugLoc DL) {
+  MachineFunction *MF = blk->getParent();
+  MachineInstr *NewInstr = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
+  //insert before
+  blk->insert(I, NewInstr);
+  MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false);
+  SHOWNEWINSTR(NewInstr);
+}
+
+void AMDGPUCFGStructurizer::insertCondBranchEnd(MachineBasicBlock *MBB,
+    int NewOpcode, int RegNum) {
+  MachineFunction *MF = MBB->getParent();
+  MachineInstr *NewInstr =
+    MF->CreateMachineInstr(TII->get(NewOpcode), DebugLoc());
+  MBB->push_back(NewInstr);
+  MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false);
+  SHOWNEWINSTR(NewInstr);
+}
+
+int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) {
+  switch(OldOpcode) {
+  case AMDGPU::JUMP_COND:
+  case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
+  case AMDGPU::BRANCH_COND_i32:
+  case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
+  default: llvm_unreachable("internal error");
+  }
+  return -1;
+}
+
+int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) {
+  switch(OldOpcode) {
+  case AMDGPU::JUMP_COND:
+  case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
+  case AMDGPU::BRANCH_COND_i32:
+  case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
+  default: llvm_unreachable("internal error");
+  }
+  return -1;
+}
+
+int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) {
+  switch(OldOpcode) {
+  case AMDGPU::JUMP_COND:
+  case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
+  default: llvm_unreachable("internal error");
+  };
+  return -1;
+}
+
+int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) {
+  switch(OldOpcode) {
+  case AMDGPU::JUMP_COND:
+  case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
+  default: llvm_unreachable("internal error");
+  }
+  return -1;
+}
+
+MachineBasicBlock *AMDGPUCFGStructurizer::getTrueBranch(MachineInstr *MI) {
+  return MI->getOperand(0).getMBB();
+}
+
+void AMDGPUCFGStructurizer::setTrueBranch(MachineInstr *MI,
+    MachineBasicBlock *MBB) {
+  MI->getOperand(0).setMBB(MBB);
+}
+
+MachineBasicBlock *
+AMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB,
+    MachineInstr *MI) {
+  assert(MBB->succ_size() == 2);
+  MachineBasicBlock *TrueBranch = getTrueBranch(MI);
+  MachineBasicBlock::succ_iterator It = MBB->succ_begin();
+  MachineBasicBlock::succ_iterator Next = It;
+  ++Next;
+  return (*It == TrueBranch) ? *Next : *It;
+}
+
+bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) {
+  switch (MI->getOpcode()) {
+    case AMDGPU::JUMP_COND:
+    case AMDGPU::BRANCH_COND_i32:
+    case AMDGPU::BRANCH_COND_f32: return true;
+  default:
+    return false;
+  }
+  return false;
+}
+
+bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) {
+  switch (MI->getOpcode()) {
+  case AMDGPU::JUMP:
+  case AMDGPU::BRANCH:
+    return true;
+  default:
+    return false;
+  }
+  return false;
+}
+
+DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) {
+  //get DebugLoc from the first MachineBasicBlock instruction with debug info
+  DebugLoc DL;
+  for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end();
+      ++It) {
+    MachineInstr *instr = &(*It);
+    if (instr->getDebugLoc())
+      DL = instr->getDebugLoc();
+  }
+  return DL;
+}
+
+MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr(
+    MachineBasicBlock *MBB) {
+  MachineBasicBlock::reverse_iterator It = MBB->rbegin();
+  MachineInstr *MI = &*It;
+  if (MI && (isCondBranch(MI) || isUncondBranch(MI)))
+    return MI;
+  return nullptr;
+}
+
+MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr(
+    MachineBasicBlock *MBB) {
+  for (MachineBasicBlock::reverse_iterator It = MBB->rbegin(), E = MBB->rend();
+      It != E; ++It) {
+    // FIXME: Simplify
+    MachineInstr *MI = &*It;
+    if (MI) {
+      if (isCondBranch(MI) || isUncondBranch(MI))
+        return MI;
+      else if (!TII->isMov(MI->getOpcode()))
+        break;
+    }
+  }
+  return nullptr;
+}
+
+MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) {
+  MachineBasicBlock::reverse_iterator It = MBB->rbegin();
+  if (It != MBB->rend()) {
+    MachineInstr *instr = &(*It);
+    if (instr->getOpcode() == AMDGPU::RETURN)
+      return instr;
+  }
+  return nullptr;
+}
+
+MachineInstr *AMDGPUCFGStructurizer::getContinueInstr(MachineBasicBlock *MBB) {
+  MachineBasicBlock::reverse_iterator It = MBB->rbegin();
+  if (It != MBB->rend()) {
+    MachineInstr *MI = &(*It);
+    if (MI->getOpcode() == AMDGPU::CONTINUE)
+      return MI;
+  }
+  return nullptr;
+}
+
+bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) {
+  MachineInstr *MI = getReturnInstr(MBB);
+  bool IsReturn = (MBB->succ_size() == 0);
+  if (MI)
+    assert(IsReturn);
+  else if (IsReturn)
+    DEBUG(
+      dbgs() << "BB" << MBB->getNumber()
+             <<" is return block without RETURN instr\n";);
+  return  IsReturn;
+}
+
+void AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB,
+    MachineBasicBlock *SrcMBB) {
+  for (MachineBasicBlock::succ_iterator It = SrcMBB->succ_begin(),
+       iterEnd = SrcMBB->succ_end(); It != iterEnd; ++It)
+    DstMBB->addSuccessor(*It);  // *iter's predecessor is also taken care of
+}
+
+MachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) {
+  MachineFunction *Func = MBB->getParent();
+  MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock();
+  Func->push_back(NewMBB);  //insert to function
+  for (MachineBasicBlock::iterator It = MBB->begin(), E = MBB->end();
+      It != E; ++It) {
+    MachineInstr *MI = Func->CloneMachineInstr(It);
+    NewMBB->push_back(MI);
+  }
+  return NewMBB;
+}
+
+void AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith(
+    MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB,
+    MachineBasicBlock *NewBlk) {
+  MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB);
+  if (BranchMI && isCondBranch(BranchMI) &&
+      getTrueBranch(BranchMI) == OldMBB)
+    setTrueBranch(BranchMI, NewBlk);
+}
+
+void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) {
+  assert((!MBB->getParent()->getJumpTableInfo()
+          || MBB->getParent()->getJumpTableInfo()->isEmpty())
+         && "found a jump table");
+
+   //collect continue right before endloop
+   SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> ContInstr;
+   MachineBasicBlock::iterator Pre = MBB->begin();
+   MachineBasicBlock::iterator E = MBB->end();
+   MachineBasicBlock::iterator It = Pre;
+   while (It != E) {
+     if (Pre->getOpcode() == AMDGPU::CONTINUE
+         && It->getOpcode() == AMDGPU::ENDLOOP)
+       ContInstr.push_back(Pre);
+     Pre = It;
+     ++It;
+   }
+
+   //delete continue right before endloop
+   for (unsigned i = 0; i < ContInstr.size(); ++i)
+      ContInstr[i]->eraseFromParent();
+
+   // TODO to fix up jump table so later phase won't be confused.  if
+   // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
+   // there isn't such an interface yet.  alternatively, replace all the other
+   // blocks in the jump table with the entryBlk //}
+
+}
+
+
+bool AMDGPUCFGStructurizer::prepare() {
+  bool Changed = false;
+
+  //FIXME: if not reducible flow graph, make it so ???
+
+  DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";);
+
+  orderBlocks(FuncRep);
+
+  SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> RetBlks;
+
+  // Add an ExitBlk to loop that don't have one
+  for (MachineLoopInfo::iterator It = MLI->begin(),
+       E = MLI->end(); It != E; ++It) {
+    MachineLoop *LoopRep = (*It);
+    MBBVector ExitingMBBs;
+    LoopRep->getExitingBlocks(ExitingMBBs);
+
+    if (ExitingMBBs.size() == 0) {
+      MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep);
+      if (DummyExitBlk)
+        RetBlks.push_back(DummyExitBlk);
+    }
+  }
+
+  // Remove unconditional branch instr.
+  // Add dummy exit block iff there are multiple returns.
+  for (SmallVectorImpl<MachineBasicBlock *>::const_iterator
+       It = OrderedBlks.begin(), E = OrderedBlks.end(); It != E; ++It) {
+    MachineBasicBlock *MBB = *It;
+    removeUnconditionalBranch(MBB);
+    removeRedundantConditionalBranch(MBB);
+    if (isReturnBlock(MBB)) {
+      RetBlks.push_back(MBB);
+    }
+    assert(MBB->succ_size() <= 2);
+  }
+
+  if (RetBlks.size() >= 2) {
+    addDummyExitBlock(RetBlks);
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+bool AMDGPUCFGStructurizer::run() {
+
+  //Assume reducible CFG...
+  DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n");
+
+#ifdef STRESSTEST
+  //Use the worse block ordering to test the algorithm.
+  ReverseVector(orderedBlks);
+#endif
+
+  DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks(););
+  int NumIter = 0;
+  bool Finish = false;
+  MachineBasicBlock *MBB;
+  bool MakeProgress = false;
+  int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(),
+                                        OrderedBlks.end());
+
+  do {
+    ++NumIter;
+    DEBUG(
+      dbgs() << "numIter = " << NumIter
+             << ", numRemaintedBlk = " << NumRemainedBlk << "\n";
+    );
+
+    SmallVectorImpl<MachineBasicBlock *>::const_iterator It =
+        OrderedBlks.begin();
+    SmallVectorImpl<MachineBasicBlock *>::const_iterator E =
+        OrderedBlks.end();
+
+    SmallVectorImpl<MachineBasicBlock *>::const_iterator SccBeginIter =
+        It;
+    MachineBasicBlock *SccBeginMBB = nullptr;
+    int SccNumBlk = 0;  // The number of active blocks, init to a
+                        // maximum possible number.
+    int SccNumIter;     // Number of iteration in this SCC.
+
+    while (It != E) {
+      MBB = *It;
+
+      if (!SccBeginMBB) {
+        SccBeginIter = It;
+        SccBeginMBB = MBB;
+        SccNumIter = 0;
+        SccNumBlk = NumRemainedBlk; // Init to maximum possible number.
+        DEBUG(
+              dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB);
+              dbgs() << "\n";
+        );
+      }
+
+      if (!isRetiredBlock(MBB))
+        patternMatch(MBB);
+
+      ++It;
+
+      bool ContNextScc = true;
+      if (It == E
+          || getSCCNum(SccBeginMBB) != getSCCNum(*It)) {
+        // Just finish one scc.
+        ++SccNumIter;
+        int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It);
+        if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) {
+          DEBUG(
+            dbgs() << "Can't reduce SCC " << getSCCNum(MBB)
+                   << ", sccNumIter = " << SccNumIter;
+            dbgs() << "doesn't make any progress\n";
+          );
+          ContNextScc = true;
+        } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) {
+          SccNumBlk = sccRemainedNumBlk;
+          It = SccBeginIter;
+          ContNextScc = false;
+          DEBUG(
+            dbgs() << "repeat processing SCC" << getSCCNum(MBB)
+                   << "sccNumIter = " << SccNumIter << '\n';
+          );
+        } else {
+          // Finish the current scc.
+          ContNextScc = true;
+        }
+      } else {
+        // Continue on next component in the current scc.
+        ContNextScc = false;
+      }
+
+      if (ContNextScc)
+        SccBeginMBB = nullptr;
+    } //while, "one iteration" over the function.
+
+    MachineBasicBlock *EntryMBB =
+        GraphTraits<MachineFunction *>::nodes_begin(FuncRep);
+    if (EntryMBB->succ_size() == 0) {
+      Finish = true;
+      DEBUG(
+        dbgs() << "Reduce to one block\n";
+      );
+    } else {
+      int NewnumRemainedBlk
+        = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end());
+      // consider cloned blocks ??
+      if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) {
+        MakeProgress = true;
+        NumRemainedBlk = NewnumRemainedBlk;
+      } else {
+        MakeProgress = false;
+        DEBUG(
+          dbgs() << "No progress\n";
+        );
+      }
+    }
+  } while (!Finish && MakeProgress);
+
+  // Misc wrap up to maintain the consistency of the Function representation.
+  wrapup(GraphTraits<MachineFunction *>::nodes_begin(FuncRep));
+
+  // Detach retired Block, release memory.
+  for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end();
+      It != E; ++It) {
+    if ((*It).second && (*It).second->IsRetired) {
+      assert(((*It).first)->getNumber() != -1);
+      DEBUG(
+        dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n";
+      );
+      (*It).first->eraseFromParent();  //Remove from the parent Function.
+    }
+    delete (*It).second;
+  }
+  BlockInfoMap.clear();
+  LLInfoMap.clear();
+
+  if (!Finish) {
+    DEBUG(FuncRep->viewCFG());
+    llvm_unreachable("IRREDUCIBLE_CFG");
+  }
+
+  return true;
+}
+
+
+
+void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) {
+  int SccNum = 0;
+  MachineBasicBlock *MBB;
+  for (scc_iterator<MachineFunction *> It = scc_begin(MF); !It.isAtEnd();
+       ++It, ++SccNum) {
+    const std::vector<MachineBasicBlock *> &SccNext = *It;
+    for (std::vector<MachineBasicBlock *>::const_iterator
+         blockIter = SccNext.begin(), blockEnd = SccNext.end();
+         blockIter != blockEnd; ++blockIter) {
+      MBB = *blockIter;
+      OrderedBlks.push_back(MBB);
+      recordSccnum(MBB, SccNum);
+    }
+  }
+
+  //walk through all the block in func to check for unreachable
+  typedef GraphTraits<MachineFunction *> GTM;
+  MachineFunction::iterator It = GTM::nodes_begin(MF), E = GTM::nodes_end(MF);
+  for (; It != E; ++It) {
+    MachineBasicBlock *MBB = &(*It);
+    SccNum = getSCCNum(MBB);
+    if (SccNum == INVALIDSCCNUM)
+      dbgs() << "unreachable block BB" << MBB->getNumber() << "\n";
+  }
+}
+
+int AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) {
+  int NumMatch = 0;
+  int CurMatch;
+
+  DEBUG(
+        dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n";
+  );
+
+  while ((CurMatch = patternMatchGroup(MBB)) > 0)
+    NumMatch += CurMatch;
+
+  DEBUG(
+        dbgs() << "End patternMatch BB" << MBB->getNumber()
+      << ", numMatch = " << NumMatch << "\n";
+  );
+
+  return NumMatch;
+}
+
+int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) {
+  int NumMatch = 0;
+  NumMatch += loopendPatternMatch();
+  NumMatch += serialPatternMatch(MBB);
+  NumMatch += ifPatternMatch(MBB);
+  return NumMatch;
+}
+
+
+int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) {
+  if (MBB->succ_size() != 1)
+    return 0;
+
+  MachineBasicBlock *childBlk = *MBB->succ_begin();
+  if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk))
+    return 0;
+
+  mergeSerialBlock(MBB, childBlk);
+  ++numSerialPatternMatch;
+  return 1;
+}
+
+int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
+  //two edges
+  if (MBB->succ_size() != 2)
+    return 0;
+  if (hasBackEdge(MBB))
+    return 0;
+  MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
+  if (!BranchMI)
+    return 0;
+
+  assert(isCondBranch(BranchMI));
+  int NumMatch = 0;
+
+  MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI);
+  NumMatch += serialPatternMatch(TrueMBB);
+  NumMatch += ifPatternMatch(TrueMBB);
+  MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI);
+  NumMatch += serialPatternMatch(FalseMBB);
+  NumMatch += ifPatternMatch(FalseMBB);
+  MachineBasicBlock *LandBlk;
+  int Cloned = 0;
+
+  assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty());
+  // TODO: Simplify
+  if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1
+    && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) {
+    // Diamond pattern
+    LandBlk = *TrueMBB->succ_begin();
+  } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) {
+    // Triangle pattern, false is empty
+    LandBlk = FalseMBB;
+    FalseMBB = nullptr;
+  } else if (FalseMBB->succ_size() == 1
+             && *FalseMBB->succ_begin() == TrueMBB) {
+    // Triangle pattern, true is empty
+    // We reverse the predicate to make a triangle, empty false pattern;
+    std::swap(TrueMBB, FalseMBB);
+    reversePredicateSetter(MBB->end());
+    LandBlk = FalseMBB;
+    FalseMBB = nullptr;
+  } else if (FalseMBB->succ_size() == 1
+             && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) {
+    LandBlk = *FalseMBB->succ_begin();
+  } else if (TrueMBB->succ_size() == 1
+    && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) {
+    LandBlk = *TrueMBB->succ_begin();
+  } else {
+    return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB);
+  }
+
+  // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
+  // new BB created for landBlk==NULL may introduce new challenge to the
+  // reduction process.
+  if (LandBlk &&
+      ((TrueMBB && TrueMBB->pred_size() > 1)
+      || (FalseMBB && FalseMBB->pred_size() > 1))) {
+     Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk);
+  }
+
+  if (TrueMBB && TrueMBB->pred_size() > 1) {
+    TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB);
+    ++Cloned;
+  }
+
+  if (FalseMBB && FalseMBB->pred_size() > 1) {
+    FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB);
+    ++Cloned;
+  }
+
+  mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk);
+
+  ++numIfPatternMatch;
+
+  numClonedBlock += Cloned;
+
+  return 1 + Cloned + NumMatch;
+}
+
+int AMDGPUCFGStructurizer::loopendPatternMatch() {
+  std::deque<MachineLoop *> NestedLoops;
+  for (auto &It: *MLI)
+    for (MachineLoop *ML : depth_first(It))
+      NestedLoops.push_front(ML);
+
+  if (NestedLoops.size() == 0)
+    return 0;
+
+  // Process nested loop outside->inside (we did push_front),
+  // so "continue" to a outside loop won't be mistaken as "break"
+  // of the current loop.
+  int Num = 0;
+  for (MachineLoop *ExaminedLoop : NestedLoops) {
+    if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop])
+      continue;
+    DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump(););
+    int NumBreak = mergeLoop(ExaminedLoop);
+    if (NumBreak == -1)
+      break;
+    Num += NumBreak;
+  }
+  return Num;
+}
+
+int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) {
+  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
+  MBBVector ExitingMBBs;
+  LoopRep->getExitingBlocks(ExitingMBBs);
+  assert(!ExitingMBBs.empty() && "Infinite Loop not supported");
+  DEBUG(dbgs() << "Loop has " << ExitingMBBs.size() << " exiting blocks\n";);
+  // We assume a single ExitBlk
+  MBBVector ExitBlks;
+  LoopRep->getExitBlocks(ExitBlks);
+  SmallPtrSet<MachineBasicBlock *, 2> ExitBlkSet;
+  for (unsigned i = 0, e = ExitBlks.size(); i < e; ++i)
+    ExitBlkSet.insert(ExitBlks[i]);
+  assert(ExitBlkSet.size() == 1);
+  MachineBasicBlock *ExitBlk = *ExitBlks.begin();
+  assert(ExitBlk && "Loop has several exit block");
+  MBBVector LatchBlks;
+  typedef GraphTraits<Inverse<MachineBasicBlock*> > InvMBBTraits;
+  InvMBBTraits::ChildIteratorType PI = InvMBBTraits::child_begin(LoopHeader),
+      PE = InvMBBTraits::child_end(LoopHeader);
+  for (; PI != PE; PI++) {
+    if (LoopRep->contains(*PI))
+      LatchBlks.push_back(*PI);
+  }
+
+  for (unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i)
+    mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk);
+  for (unsigned i = 0, e = LatchBlks.size(); i < e; ++i)
+    settleLoopcontBlock(LatchBlks[i], LoopHeader);
+  int Match = 0;
+  do {
+    Match = 0;
+    Match += serialPatternMatch(LoopHeader);
+    Match += ifPatternMatch(LoopHeader);
+  } while (Match > 0);
+  mergeLooplandBlock(LoopHeader, ExitBlk);
+  MachineLoop *ParentLoop = LoopRep->getParentLoop();
+  if (ParentLoop)
+    MLI->changeLoopFor(LoopHeader, ParentLoop);
+  else
+    MLI->removeBlock(LoopHeader);
+  Visited[LoopRep] = true;
+  return 1;
+}
+
+int AMDGPUCFGStructurizer::loopcontPatternMatch(MachineLoop *LoopRep,
+    MachineBasicBlock *LoopHeader) {
+  int NumCont = 0;
+  SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> ContMBB;
+  typedef GraphTraits<Inverse<MachineBasicBlock *> > GTIM;
+  GTIM::ChildIteratorType It = GTIM::child_begin(LoopHeader),
+      E = GTIM::child_end(LoopHeader);
+  for (; It != E; ++It) {
+    MachineBasicBlock *MBB = *It;
+    if (LoopRep->contains(MBB)) {
+      handleLoopcontBlock(MBB, MLI->getLoopFor(MBB),
+                          LoopHeader, LoopRep);
+      ContMBB.push_back(MBB);
+      ++NumCont;
+    }
+  }
+
+  for (SmallVectorImpl<MachineBasicBlock *>::iterator It = ContMBB.begin(),
+      E = ContMBB.end(); It != E; ++It) {
+    (*It)->removeSuccessor(LoopHeader);
+  }
+
+  numLoopcontPatternMatch += NumCont;
+
+  return NumCont;
+}
+
+
+bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak(
+    MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) {
+  if (Src1MBB->succ_size() == 0) {
+    MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB);
+    if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) {
+      MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep];
+      if (TheEntry) {
+        DEBUG(
+          dbgs() << "isLoopContBreakBlock yes src1 = BB"
+                 << Src1MBB->getNumber()
+                 << " src2 = BB" << Src2MBB->getNumber() << "\n";
+        );
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB,
+    MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
+  int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB);
+  if (Num == 0) {
+    DEBUG(
+      dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
+    );
+    Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB);
+  }
+  return Num;
+}
+
+int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
+    MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
+  int Num = 0;
+  MachineBasicBlock *DownBlk;
+
+  //trueBlk could be the common post dominator
+  DownBlk = TrueMBB;
+
+  DEBUG(
+    dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber()
+           << " true = BB" << TrueMBB->getNumber()
+           << ", numSucc=" << TrueMBB->succ_size()
+           << " false = BB" << FalseMBB->getNumber() << "\n";
+  );
+
+  while (DownBlk) {
+    DEBUG(
+      dbgs() << "check down = BB" << DownBlk->getNumber();
+    );
+
+    if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) {
+      DEBUG(
+        dbgs() << " working\n";
+      );
+
+      Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk);
+      Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk);
+
+      numClonedBlock += Num;
+      Num += serialPatternMatch(*HeadMBB->succ_begin());
+      Num += serialPatternMatch(*std::next(HeadMBB->succ_begin()));
+      Num += ifPatternMatch(HeadMBB);
+      assert(Num > 0);
+
+      break;
+    }
+    DEBUG(
+      dbgs() << " not working\n";
+    );
+    DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : nullptr;
+  } // walk down the postDomTree
+
+  return Num;
+}
+
+void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf(
+    MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB,
+    MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) {
+  dbgs() << "head = BB" << HeadMBB->getNumber()
+         << " size = " << HeadMBB->size();
+  if (Detail) {
+    dbgs() << "\n";
+    HeadMBB->print(dbgs());
+    dbgs() << "\n";
+  }
+
+  if (TrueMBB) {
+    dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = "
+           << TrueMBB->size() << " numPred = " << TrueMBB->pred_size();
+    if (Detail) {
+      dbgs() << "\n";
+      TrueMBB->print(dbgs());
+      dbgs() << "\n";
+    }
+  }
+  if (FalseMBB) {
+    dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = "
+           << FalseMBB->size() << " numPred = " << FalseMBB->pred_size();
+    if (Detail) {
+      dbgs() << "\n";
+      FalseMBB->print(dbgs());
+      dbgs() << "\n";
+    }
+  }
+  if (LandMBB) {
+    dbgs() << ", land = BB" << LandMBB->getNumber() << " size = "
+           << LandMBB->size() << " numPred = " << LandMBB->pred_size();
+    if (Detail) {
+      dbgs() << "\n";
+      LandMBB->print(dbgs());
+      dbgs() << "\n";
+    }
+  }
+
+    dbgs() << "\n";
+}
+
+int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
+    MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
+    MachineBasicBlock **LandMBBPtr) {
+  bool MigrateTrue = false;
+  bool MigrateFalse = false;
+
+  MachineBasicBlock *LandBlk = *LandMBBPtr;
+
+  assert((!TrueMBB || TrueMBB->succ_size() <= 1)
+         && (!FalseMBB || FalseMBB->succ_size() <= 1));
+
+  if (TrueMBB == FalseMBB)
+    return 0;
+
+  MigrateTrue = needMigrateBlock(TrueMBB);
+  MigrateFalse = needMigrateBlock(FalseMBB);
+
+  if (!MigrateTrue && !MigrateFalse)
+    return 0;
+
+  // If we need to migrate either trueBlk and falseBlk, migrate the rest that
+  // have more than one predecessors.  without doing this, its predecessor
+  // rather than headBlk will have undefined value in initReg.
+  if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1)
+    MigrateTrue = true;
+  if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1)
+    MigrateFalse = true;
+
+  DEBUG(
+    dbgs() << "before improveSimpleJumpintoIf: ";
+    showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
+  );
+
+  // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
+  //
+  // new: headBlk => if () {initReg = 1; org trueBlk branch} else
+  //      {initReg = 0; org falseBlk branch }
+  //      => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
+  //      => org landBlk
+  //      if landBlk->pred_size() > 2, put the about if-else inside
+  //      if (initReg !=2) {...}
+  //
+  // add initReg = initVal to headBlk
+
+  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
+  if (!MigrateTrue || !MigrateFalse) {
+    // XXX: We have an opportunity here to optimize the "branch into if" case
+    // here.  Branch into if looks like this:
+    //                        entry
+    //                       /     |
+    //           diamond_head       branch_from
+    //             /      \           |
+    // diamond_false        diamond_true
+    //             \      /
+    //               done
+    //
+    // The diamond_head block begins the "if" and the diamond_true block
+    // is the block being "branched into".
+    //
+    // If MigrateTrue is true, then TrueBB is the block being "branched into"
+    // and if MigrateFalse is true, then FalseBB is the block being
+    // "branched into"
+    // 
+    // Here is the pseudo code for how I think the optimization should work:
+    // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head.
+    // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from.
+    // 3. Move the branch instruction from diamond_head into its own basic
+    //    block (new_block).
+    // 4. Add an unconditional branch from diamond_head to new_block
+    // 5. Replace the branch instruction in branch_from with an unconditional
+    //    branch to new_block.  If branch_from has multiple predecessors, then
+    //    we need to replace the True/False block in the branch
+    //    instruction instead of replacing it.
+    // 6. Change the condition of the branch instruction in new_block from
+    //    COND to (COND || GPR0)
+    //
+    // In order insert these MOV instruction, we will need to use the
+    // RegisterScavenger.  Usually liveness stops being tracked during
+    // the late machine optimization passes, however if we implement
+    // bool TargetRegisterInfo::requiresRegisterScavenging(
+    //                                                const MachineFunction &MF)
+    // and have it return true, liveness will be tracked correctly 
+    // by generic optimization passes.  We will also need to make sure that
+    // all of our target-specific passes that run after regalloc and before
+    // the CFGStructurizer track liveness and we will need to modify this pass
+    // to correctly track liveness.
+    //
+    // After the above changes, the new CFG should look like this:
+    //                        entry
+    //                       /     |
+    //           diamond_head       branch_from
+    //                       \     /
+    //                      new_block
+    //                      /      |
+    //         diamond_false        diamond_true
+    //                      \      /
+    //                        done
+    //
+    // Without this optimization, we are forced to duplicate the diamond_true
+    // block and we will end up with a CFG like this:
+    //
+    //                        entry
+    //                       /     |
+    //           diamond_head       branch_from
+    //             /      \                   |
+    // diamond_false        diamond_true      diamond_true (duplicate)
+    //             \      /                   |
+    //               done --------------------|
+    //
+    // Duplicating diamond_true can be very costly especially if it has a
+    // lot of instructions.
+    return 0;
+  }
+
+  int NumNewBlk = 0;
+
+  bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2);
+
+  //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
+  MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, AMDGPU::ENDIF);
+
+  if (LandBlkHasOtherPred) {
+    llvm_unreachable("Extra register needed to handle CFG");
+    unsigned CmpResReg =
+      HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
+    llvm_unreachable("Extra compare instruction needed to handle CFG");
+    insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET,
+        CmpResReg, DebugLoc());
+  }
+
+  // XXX: We are running this after RA, so creating virtual registers will
+  // cause an assertion failure in the PostRA scheduling pass.
+  unsigned InitReg =
+    HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
+  insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg,
+      DebugLoc());
+
+  if (MigrateTrue) {
+    migrateInstruction(TrueMBB, LandBlk, I);
+    // need to uncondionally insert the assignment to ensure a path from its
+    // predecessor rather than headBlk has valid value in initReg if
+    // (initVal != 1).
+    llvm_unreachable("Extra register needed to handle CFG");
+  }
+  insertInstrBefore(I, AMDGPU::ELSE);
+
+  if (MigrateFalse) {
+    migrateInstruction(FalseMBB, LandBlk, I);
+    // need to uncondionally insert the assignment to ensure a path from its
+    // predecessor rather than headBlk has valid value in initReg if
+    // (initVal != 0)
+    llvm_unreachable("Extra register needed to handle CFG");
+  }
+
+  if (LandBlkHasOtherPred) {
+    // add endif
+    insertInstrBefore(I, AMDGPU::ENDIF);
+
+    // put initReg = 2 to other predecessors of landBlk
+    for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(),
+         PE = LandBlk->pred_end(); PI != PE; ++PI) {
+      MachineBasicBlock *MBB = *PI;
+      if (MBB != TrueMBB && MBB != FalseMBB)
+        llvm_unreachable("Extra register needed to handle CFG");
+    }
+  }
+  DEBUG(
+    dbgs() << "result from improveSimpleJumpintoIf: ";
+    showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
+  );
+
+  // update landBlk
+  *LandMBBPtr = LandBlk;
+
+  return NumNewBlk;
+}
+
+void AMDGPUCFGStructurizer::handleLoopcontBlock(MachineBasicBlock *ContingMBB,
+    MachineLoop *ContingLoop, MachineBasicBlock *ContMBB,
+    MachineLoop *ContLoop) {
+  DEBUG(dbgs() << "loopcontPattern cont = BB" << ContingMBB->getNumber()
+               << " header = BB" << ContMBB->getNumber() << "\n";
+        dbgs() << "Trying to continue loop-depth = "
+               << getLoopDepth(ContLoop)
+               << " from loop-depth = " << getLoopDepth(ContingLoop) << "\n";);
+  settleLoopcontBlock(ContingMBB, ContMBB);
+}
+
+void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB,
+    MachineBasicBlock *SrcMBB) {
+  DEBUG(
+    dbgs() << "serialPattern BB" << DstMBB->getNumber()
+           << " <= BB" << SrcMBB->getNumber() << "\n";
+  );
+  DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end());
+
+  DstMBB->removeSuccessor(SrcMBB);
+  cloneSuccessorList(DstMBB, SrcMBB);
+
+  removeSuccessor(SrcMBB);
+  MLI->removeBlock(SrcMBB);
+  retireBlock(SrcMBB);
+}
+
+void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
+    MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
+    MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) {
+  assert (TrueMBB);
+  DEBUG(
+    dbgs() << "ifPattern BB" << MBB->getNumber();
+    dbgs() << "{  ";
+    if (TrueMBB) {
+      dbgs() << "BB" << TrueMBB->getNumber();
+    }
+    dbgs() << "  } else ";
+    dbgs() << "{  ";
+    if (FalseMBB) {
+      dbgs() << "BB" << FalseMBB->getNumber();
+    }
+    dbgs() << "  }\n ";
+    dbgs() << "landBlock: ";
+    if (!LandMBB) {
+      dbgs() << "NULL";
+    } else {
+      dbgs() << "BB" << LandMBB->getNumber();
+    }
+    dbgs() << "\n";
+  );
+
+  int OldOpcode = BranchMI->getOpcode();
+  DebugLoc BranchDL = BranchMI->getDebugLoc();
+
+//    transform to
+//    if cond
+//       trueBlk
+//    else
+//       falseBlk
+//    endif
+//    landBlk
+
+  MachineBasicBlock::iterator I = BranchMI;
+  insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode),
+      BranchDL);
+
+  if (TrueMBB) {
+    MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end());
+    MBB->removeSuccessor(TrueMBB);
+    if (LandMBB && TrueMBB->succ_size()!=0)
+      TrueMBB->removeSuccessor(LandMBB);
+    retireBlock(TrueMBB);
+    MLI->removeBlock(TrueMBB);
+  }
+
+  if (FalseMBB) {
+    insertInstrBefore(I, AMDGPU::ELSE);
+    MBB->splice(I, FalseMBB, FalseMBB->begin(),
+                   FalseMBB->end());
+    MBB->removeSuccessor(FalseMBB);
+    if (LandMBB && FalseMBB->succ_size() != 0)
+      FalseMBB->removeSuccessor(LandMBB);
+    retireBlock(FalseMBB);
+    MLI->removeBlock(FalseMBB);
+  }
+  insertInstrBefore(I, AMDGPU::ENDIF);
+
+  BranchMI->eraseFromParent();
+
+  if (LandMBB && TrueMBB && FalseMBB)
+    MBB->addSuccessor(LandMBB);
+
+}
+
+void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,
+    MachineBasicBlock *LandMBB) {
+  DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber()
+               << " land = BB" << LandMBB->getNumber() << "\n";);
+
+  insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DebugLoc());
+  insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DebugLoc());
+  DstBlk->addSuccessor(LandMBB);
+  DstBlk->removeSuccessor(DstBlk);
+}
+
+
+void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
+    MachineBasicBlock *LandMBB) {
+  DEBUG(dbgs() << "loopbreakPattern exiting = BB" << ExitingMBB->getNumber()
+               << " land = BB" << LandMBB->getNumber() << "\n";);
+  MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB);
+  assert(BranchMI && isCondBranch(BranchMI));
+  DebugLoc DL = BranchMI->getDebugLoc();
+  MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI);
+  MachineBasicBlock::iterator I = BranchMI;
+  if (TrueBranch != LandMBB)
+    reversePredicateSetter(I);
+  insertCondBranchBefore(ExitingMBB, I, AMDGPU::IF_PREDICATE_SET, AMDGPU::PREDICATE_BIT, DL);
+  insertInstrBefore(I, AMDGPU::BREAK);
+  insertInstrBefore(I, AMDGPU::ENDIF);
+  //now branchInst can be erase safely
+  BranchMI->eraseFromParent();
+  //now take care of successors, retire blocks
+  ExitingMBB->removeSuccessor(LandMBB);
+}
+
+void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
+    MachineBasicBlock *ContMBB) {
+  DEBUG(dbgs() << "settleLoopcontBlock conting = BB"
+               << ContingMBB->getNumber()
+               << ", cont = BB" << ContMBB->getNumber() << "\n";);
+
+  MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB);
+  if (MI) {
+    assert(isCondBranch(MI));
+    MachineBasicBlock::iterator I = MI;
+    MachineBasicBlock *TrueBranch = getTrueBranch(MI);
+    int OldOpcode = MI->getOpcode();
+    DebugLoc DL = MI->getDebugLoc();
+
+    bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI);
+
+    if (!UseContinueLogical) {
+      int BranchOpcode =
+          TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) :
+          getBranchZeroOpcode(OldOpcode);
+      insertCondBranchBefore(I, BranchOpcode, DL);
+      // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+      insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, DL);
+      insertInstrEnd(ContingMBB, AMDGPU::ENDIF, DL);
+    } else {
+      int BranchOpcode =
+          TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) :
+          getContinueZeroOpcode(OldOpcode);
+      insertCondBranchBefore(I, BranchOpcode, DL);
+    }
+
+    MI->eraseFromParent();
+  } else {
+    // if we've arrived here then we've already erased the branch instruction
+    // travel back up the basic block to see the last reference of our debug
+    // location we've just inserted that reference here so it should be
+    // representative insertEnd to ensure phi-moves, if exist, go before the
+    // continue-instr.
+    insertInstrEnd(ContingMBB, AMDGPU::CONTINUE,
+        getLastDebugLocInBB(ContingMBB));
+  }
+}
+
+int AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
+    MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) {
+  int Cloned = 0;
+  assert(PreMBB->isSuccessor(SrcMBB));
+  while (SrcMBB && SrcMBB != DstMBB) {
+    assert(SrcMBB->succ_size() == 1);
+    if (SrcMBB->pred_size() > 1) {
+      SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB);
+      ++Cloned;
+    }
+
+    PreMBB = SrcMBB;
+    SrcMBB = *SrcMBB->succ_begin();
+  }
+
+  return Cloned;
+}
+
+MachineBasicBlock *
+AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB,
+    MachineBasicBlock *PredMBB) {
+  assert(PredMBB->isSuccessor(MBB) &&
+         "succBlk is not a prececessor of curBlk");
+
+  MachineBasicBlock *CloneMBB = clone(MBB);  //clone instructions
+  replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB);
+  //srcBlk, oldBlk, newBlk
+
+  PredMBB->removeSuccessor(MBB);
+  PredMBB->addSuccessor(CloneMBB);
+
+  // add all successor to cloneBlk
+  cloneSuccessorList(CloneMBB, MBB);
+
+  numClonedInstr += MBB->size();
+
+  DEBUG(
+    dbgs() << "Cloned block: " << "BB"
+           << MBB->getNumber() << "size " << MBB->size() << "\n";
+  );
+
+  SHOWNEWBLK(CloneMBB, "result of Cloned block: ");
+
+  return CloneMBB;
+}
+
+void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB,
+    MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I) {
+  MachineBasicBlock::iterator SpliceEnd;
+  //look for the input branchinstr, not the AMDGPU branchinstr
+  MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB);
+  if (!BranchMI) {
+    DEBUG(
+      dbgs() << "migrateInstruction don't see branch instr\n" ;
+    );
+    SpliceEnd = SrcMBB->end();
+  } else {
+    DEBUG(
+      dbgs() << "migrateInstruction see branch instr\n" ;
+      BranchMI->dump();
+    );
+    SpliceEnd = BranchMI;
+  }
+  DEBUG(
+    dbgs() << "migrateInstruction before splice dstSize = " << DstMBB->size()
+      << "srcSize = " << SrcMBB->size() << "\n";
+  );
+
+  //splice insert before insertPos
+  DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd);
+
+  DEBUG(
+    dbgs() << "migrateInstruction after splice dstSize = " << DstMBB->size()
+      << "srcSize = " << SrcMBB->size() << "\n";
+  );
+}
+
+MachineBasicBlock *
+AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) {
+  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
+  MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch();
+  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
+
+  if (!LoopHeader || !LoopLatch)
+    return nullptr;
+  MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch);
+  // Is LoopRep an infinite loop ?
+  if (!BranchMI || !isUncondBranch(BranchMI))
+    return nullptr;
+
+  MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
+  FuncRep->push_back(DummyExitBlk);  //insert to function
+  SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
+  DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";);
+  MachineBasicBlock::iterator I = BranchMI;
+  unsigned ImmReg = FuncRep->getRegInfo().createVirtualRegister(I32RC);
+  llvm_unreachable("Extra register needed to handle CFG");
+  MachineInstr *NewMI = insertInstrBefore(I, AMDGPU::BRANCH_COND_i32);
+  MachineInstrBuilder MIB(*FuncRep, NewMI);
+  MIB.addMBB(LoopHeader);
+  MIB.addReg(ImmReg, false);
+  SHOWNEWINSTR(NewMI);
+  BranchMI->eraseFromParent();
+  LoopLatch->addSuccessor(DummyExitBlk);
+
+  return DummyExitBlk;
+}
+
+void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) {
+  MachineInstr *BranchMI;
+
+  // I saw two unconditional branch in one basic block in example
+  // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
+  while ((BranchMI = getLoopendBlockBranchInstr(MBB))
+          && isUncondBranch(BranchMI)) {
+    DEBUG(dbgs() << "Removing uncond branch instr"; BranchMI->dump(););
+    BranchMI->eraseFromParent();
+  }
+}
+
+void AMDGPUCFGStructurizer::removeRedundantConditionalBranch(
+    MachineBasicBlock *MBB) {
+  if (MBB->succ_size() != 2)
+    return;
+  MachineBasicBlock *MBB1 = *MBB->succ_begin();
+  MachineBasicBlock *MBB2 = *std::next(MBB->succ_begin());
+  if (MBB1 != MBB2)
+    return;
+
+  MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
+  assert(BranchMI && isCondBranch(BranchMI));
+  DEBUG(dbgs() << "Removing unneeded cond branch instr"; BranchMI->dump(););
+  BranchMI->eraseFromParent();
+  SHOWNEWBLK(MBB1, "Removing redundant successor");
+  MBB->removeSuccessor(MBB1);
+}
+
+void AMDGPUCFGStructurizer::addDummyExitBlock(
+    SmallVectorImpl<MachineBasicBlock*> &RetMBB) {
+  MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
+  FuncRep->push_back(DummyExitBlk);  //insert to function
+  insertInstrEnd(DummyExitBlk, AMDGPU::RETURN);
+
+  for (SmallVectorImpl<MachineBasicBlock *>::iterator It = RetMBB.begin(),
+       E = RetMBB.end(); It != E; ++It) {
+    MachineBasicBlock *MBB = *It;
+    MachineInstr *MI = getReturnInstr(MBB);
+    if (MI)
+      MI->eraseFromParent();
+    MBB->addSuccessor(DummyExitBlk);
+    DEBUG(
+      dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber()
+             << " successors\n";
+    );
+  }
+  SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: ");
+}
+
+void AMDGPUCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) {
+  while (MBB->succ_size())
+    MBB->removeSuccessor(*MBB->succ_begin());
+}
+
+void AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB,
+    int SccNum) {
+  BlockInformation *&srcBlkInfo = BlockInfoMap[MBB];
+  if (!srcBlkInfo)
+    srcBlkInfo = new BlockInformation();
+  srcBlkInfo->SccNum = SccNum;
+}
+
+void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) {
+  DEBUG(
+        dbgs() << "Retiring BB" << MBB->getNumber() << "\n";
+  );
+
+  BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB];
+
+  if (!SrcBlkInfo)
+    SrcBlkInfo = new BlockInformation();
+
+  SrcBlkInfo->IsRetired = true;
+  assert(MBB->succ_size() == 0 && MBB->pred_size() == 0
+         && "can't retire block yet");
+}
+
+void AMDGPUCFGStructurizer::setLoopLandBlock(MachineLoop *loopRep,
+    MachineBasicBlock *MBB) {
+  MachineBasicBlock *&TheEntry = LLInfoMap[loopRep];
+  if (!MBB) {
+    MBB = FuncRep->CreateMachineBasicBlock();
+    FuncRep->push_back(MBB);  //insert to function
+    SHOWNEWBLK(MBB, "DummyLandingBlock for loop without break: ");
+  }
+  TheEntry = MBB;
+  DEBUG(
+    dbgs() << "setLoopLandBlock loop-header = BB"
+           << loopRep->getHeader()->getNumber()
+           << "  landing-block = BB" << MBB->getNumber() << "\n";
+  );
+}
+
+MachineBasicBlock *
+AMDGPUCFGStructurizer::findNearestCommonPostDom(MachineBasicBlock *MBB1,
+    MachineBasicBlock *MBB2) {
+
+  if (PDT->dominates(MBB1, MBB2))
+    return MBB1;
+  if (PDT->dominates(MBB2, MBB1))
+    return MBB2;
+
+  MachineDomTreeNode *Node1 = PDT->getNode(MBB1);
+  MachineDomTreeNode *Node2 = PDT->getNode(MBB2);
+
+  // Handle newly cloned node.
+  if (!Node1 && MBB1->succ_size() == 1)
+    return findNearestCommonPostDom(*MBB1->succ_begin(), MBB2);
+  if (!Node2 && MBB2->succ_size() == 1)
+    return findNearestCommonPostDom(MBB1, *MBB2->succ_begin());
+
+  if (!Node1 || !Node2)
+    return nullptr;
+
+  Node1 = Node1->getIDom();
+  while (Node1) {
+    if (PDT->dominates(Node1, Node2))
+      return Node1->getBlock();
+    Node1 = Node1->getIDom();
+  }
+
+  return nullptr;
+}
+
+MachineBasicBlock *
+AMDGPUCFGStructurizer::findNearestCommonPostDom(
+    std::set<MachineBasicBlock *> &MBBs) {
+  MachineBasicBlock *CommonDom;
+  std::set<MachineBasicBlock *>::const_iterator It = MBBs.begin();
+  std::set<MachineBasicBlock *>::const_iterator E = MBBs.end();
+  for (CommonDom = *It; It != E && CommonDom; ++It) {
+    MachineBasicBlock *MBB = *It;
+    if (MBB != CommonDom)
+      CommonDom = findNearestCommonPostDom(MBB, CommonDom);
+  }
+
+  DEBUG(
+    dbgs() << "Common post dominator for exit blocks is ";
+    if (CommonDom)
+          dbgs() << "BB" << CommonDom->getNumber() << "\n";
+    else
+      dbgs() << "NULL\n";
+  );
+
+  return CommonDom;
+}
+
+char AMDGPUCFGStructurizer::ID = 0;
+
+} // end anonymous namespace
+
+
+INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer",
+                      "AMDGPU CFG Structurizer", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(AMDGPUCFGStructurizer, "amdgpustructurizer",
+                      "AMDGPU CFG Structurizer", false, false)
+
+FunctionPass *llvm::createAMDGPUCFGStructurizerPass() {
+  return new AMDGPUCFGStructurizer();
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDKernelCodeT.h b/contrib/llvm/lib/Target/AMDGPU/AMDKernelCodeT.h
new file mode 100644
index 0000000..eaffb85
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDKernelCodeT.h
@@ -0,0 +1,704 @@
+//===-- AMDGPUKernelCodeT.h - Print AMDGPU assembly code ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file AMDKernelCodeT.h
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDKERNELCODET_H
+#define AMDKERNELCODET_H
+
+#include <cstddef>
+#include <cstdint>
+
+//---------------------------------------------------------------------------//
+// AMD Kernel Code, and its dependencies                                     //
+//---------------------------------------------------------------------------//
+
+typedef uint8_t hsa_powertwo8_t;
+typedef uint32_t hsa_ext_code_kind_t;
+typedef uint8_t hsa_ext_brig_profile8_t;
+typedef uint8_t hsa_ext_brig_machine_model8_t;
+typedef uint64_t hsa_ext_control_directive_present64_t;
+typedef uint16_t hsa_ext_exception_kind16_t;
+typedef uint32_t hsa_ext_code_kind32_t;
+
+typedef struct hsa_dim3_s {
+  uint32_t x;
+  uint32_t y;
+  uint32_t z;
+} hsa_dim3_t;
+
+/// The version of the amd_*_code_t struct. Minor versions must be
+/// backward compatible.
+typedef uint32_t amd_code_version32_t;
+enum amd_code_version_t {
+  AMD_CODE_VERSION_MAJOR = 0,
+  AMD_CODE_VERSION_MINOR = 1
+};
+
+/// The values used to define the number of bytes to use for the
+/// swizzle element size.
+enum amd_element_byte_size_t {
+  AMD_ELEMENT_2_BYTES = 0,
+  AMD_ELEMENT_4_BYTES = 1,
+  AMD_ELEMENT_8_BYTES = 2,
+  AMD_ELEMENT_16_BYTES = 3
+};
+
+/// Shader program settings for CS. Contains COMPUTE_PGM_RSRC1 and
+/// COMPUTE_PGM_RSRC2 registers.
+typedef uint64_t amd_compute_pgm_resource_register64_t;
+
+/// Every amd_*_code_t has the following properties, which are composed of
+/// a number of bit fields. Every bit field has a mask (AMD_CODE_PROPERTY_*),
+/// bit width (AMD_CODE_PROPERTY_*_WIDTH, and bit shift amount
+/// (AMD_CODE_PROPERTY_*_SHIFT) for convenient access. Unused bits must be 0.
+///
+/// (Note that bit fields cannot be used as their layout is
+/// implementation defined in the C standard and so cannot be used to
+/// specify an ABI)
+typedef uint32_t amd_code_property32_t;
+enum amd_code_property_mask_t {
+
+  /// Enable the setup of the SGPR user data registers
+  /// (AMD_CODE_PROPERTY_ENABLE_SGPR_*), see documentation of amd_kernel_code_t
+  /// for initial register state.
+  ///
+  /// The total number of SGPRuser data registers requested must not
+  /// exceed 16. Any requests beyond 16 will be ignored.
+  ///
+  /// Used to set COMPUTE_PGM_RSRC2.USER_SGPR (set to total count of
+  /// SGPR user data registers enabled up to 16).
+
+  AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT = 0,
+  AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_WIDTH = 1,
+  AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT,
+
+  AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT = 1,
+  AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_WIDTH = 1,
+  AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT,
+
+  AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT = 2,
+  AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_WIDTH = 1,
+  AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT,
+
+  AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT = 3,
+  AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_WIDTH = 1,
+  AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT,
+
+  AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT = 4,
+  AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_WIDTH = 1,
+  AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT,
+
+  AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT = 5,
+  AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_WIDTH = 1,
+  AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT,
+
+  AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT = 6,
+  AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_WIDTH = 1,
+  AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT,
+
+  AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_SHIFT = 7,
+  AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_WIDTH = 1,
+  AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_SHIFT,
+
+  AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_SHIFT = 8,
+  AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_WIDTH = 1,
+  AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_SHIFT,
+
+  AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_SHIFT = 9,
+  AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_WIDTH = 1,
+  AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_SHIFT,
+
+  /// Control wave ID base counter for GDS ordered-append. Used to set
+  /// COMPUTE_DISPATCH_INITIATOR.ORDERED_APPEND_ENBL. (Not sure if
+  /// ORDERED_APPEND_MODE also needs to be settable)
+  AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_SHIFT = 10,
+  AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_WIDTH = 1,
+  AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS = ((1 << AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_SHIFT,
+
+  /// The interleave (swizzle) element size in bytes required by the
+  /// code for private memory. This must be 2, 4, 8 or 16. This value
+  /// is provided to the finalizer when it is invoked and is recorded
+  /// here. The hardware will interleave the memory requests of each
+  /// lane of a wavefront by this element size to ensure each
+  /// work-item gets a distinct memory memory location. Therefore, the
+  /// finalizer ensures that all load and store operations done to
+  /// private memory do not exceed this size. For example, if the
+  /// element size is 4 (32-bits or dword) and a 64-bit value must be
+  /// loaded, the finalizer will generate two 32-bit loads. This
+  /// ensures that the interleaving will get the work-item
+  /// specific dword for both halves of the 64-bit value. If it just
+  /// did a 64-bit load then it would get one dword which belonged to
+  /// its own work-item, but the second dword would belong to the
+  /// adjacent lane work-item since the interleaving is in dwords.
+  ///
+  /// The value used must match the value that the runtime configures
+  /// the GPU flat scratch (SH_STATIC_MEM_CONFIG.ELEMENT_SIZE). This
+  /// is generally DWORD.
+  ///
+  /// Use values from the amd_element_byte_size_t enum.
+  AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT = 11,
+  AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_WIDTH = 2,
+  AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE = ((1 << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_WIDTH) - 1) << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT,
+
+  /// Are global memory addresses 64 bits. Must match
+  /// amd_kernel_code_t.hsail_machine_model ==
+  /// HSA_MACHINE_LARGE. Must also match
+  /// SH_MEM_CONFIG.PTR32 (GFX6 (SI)/GFX7 (CI)),
+  /// SH_MEM_CONFIG.ADDRESS_MODE (GFX8 (VI)+).
+  AMD_CODE_PROPERTY_IS_PTR64_SHIFT = 13,
+  AMD_CODE_PROPERTY_IS_PTR64_WIDTH = 1,
+  AMD_CODE_PROPERTY_IS_PTR64 = ((1 << AMD_CODE_PROPERTY_IS_PTR64_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_PTR64_SHIFT,
+
+  /// Indicate if the generated ISA is using a dynamically sized call
+  /// stack. This can happen if calls are implemented using a call
+  /// stack and recursion, alloca or calls to indirect functions are
+  /// present. In these cases the Finalizer cannot compute the total
+  /// private segment size at compile time. In this case the
+  /// workitem_private_segment_byte_size only specifies the statically
+  /// know private segment size, and additional space must be added
+  /// for the call stack.
+  AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT = 14,
+  AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_WIDTH = 1,
+  AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK = ((1 << AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT,
+
+  /// Indicate if code generated has support for debugging.
+  AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT = 15,
+  AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH = 1,
+  AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT
+};
+
+/// @brief The hsa_ext_control_directives_t specifies the values for the HSAIL
+/// control directives. These control how the finalizer generates code. This
+/// struct is used both as an argument to hsaFinalizeKernel to specify values for
+/// the control directives, and is used in HsaKernelCode to record the values of
+/// the control directives that the finalize used when generating the code which
+/// either came from the finalizer argument or explicit HSAIL control
+/// directives. See the definition of the control directives in HSA Programmer's
+/// Reference Manual which also defines how the values specified as finalizer
+/// arguments have to agree with the control directives in the HSAIL code.
+typedef struct hsa_ext_control_directives_s {
+  /// This is a bit set indicating which control directives have been
+  /// specified. If the value is 0 then there are no control directives specified
+  /// and the rest of the fields can be ignored. The bits are accessed using the
+  /// hsa_ext_control_directives_present_mask_t. Any control directive that is not
+  /// enabled in this bit set must have the value of all 0s.
+  hsa_ext_control_directive_present64_t enabled_control_directives;
+
+  /// If enableBreakExceptions is not enabled then must be 0, otherwise must be
+  /// non-0 and specifies the set of HSAIL exceptions that must have the BREAK
+  /// policy enabled. If this set is not empty then the generated code may have
+  /// lower performance than if the set is empty. If the kernel being finalized
+  /// has any enablebreakexceptions control directives, then the values specified
+  /// by this argument are unioned with the values in these control
+  /// directives. If any of the functions the kernel calls have an
+  /// enablebreakexceptions control directive, then they must be equal or a
+  /// subset of, this union.
+  hsa_ext_exception_kind16_t enable_break_exceptions;
+
+  /// If enableDetectExceptions is not enabled then must be 0, otherwise must be
+  /// non-0 and specifies the set of HSAIL exceptions that must have the DETECT
+  /// policy enabled. If this set is not empty then the generated code may have
+  /// lower performance than if the set is empty. However, an implementation
+  /// should endeavour to make the performance impact small. If the kernel being
+  /// finalized has any enabledetectexceptions control directives, then the
+  /// values specified by this argument are unioned with the values in these
+  /// control directives. If any of the functions the kernel calls have an
+  /// enabledetectexceptions control directive, then they must be equal or a
+  /// subset of, this union.
+  hsa_ext_exception_kind16_t enable_detect_exceptions;
+
+  /// If maxDynamicGroupSize is not enabled then must be 0, and any amount of
+  /// dynamic group segment can be allocated for a dispatch, otherwise the value
+  /// specifies the maximum number of bytes of dynamic group segment that can be
+  /// allocated for a dispatch. If the kernel being finalized has any
+  /// maxdynamicsize control directives, then the values must be the same, and
+  /// must be the same as this argument if it is enabled. This value can be used
+  /// by the finalizer to determine the maximum number of bytes of group memory
+  /// used by each work-group by adding this value to the group memory required
+  /// for all group segment variables used by the kernel and all functions it
+  /// calls, and group memory used to implement other HSAIL features such as
+  /// fbarriers and the detect exception operations. This can allow the finalizer
+  /// to determine the expected number of work-groups that can be executed by a
+  /// compute unit and allow more resources to be allocated to the work-items if
+  /// it is known that fewer work-groups can be executed due to group memory
+  /// limitations.
+  uint32_t max_dynamic_group_size;
+
+  /// If maxFlatGridSize is not enabled then must be 0, otherwise must be greater
+  /// than 0. See HSA Programmer's Reference Manual description of
+  /// maxflatgridsize control directive.
+  uint32_t max_flat_grid_size;
+
+  /// If maxFlatWorkgroupSize is not enabled then must be 0, otherwise must be
+  /// greater than 0. See HSA Programmer's Reference Manual description of
+  /// maxflatworkgroupsize control directive.
+  uint32_t max_flat_workgroup_size;
+
+  /// If requestedWorkgroupsPerCu is not enabled then must be 0, and the
+  /// finalizer is free to generate ISA that may result in any number of
+  /// work-groups executing on a single compute unit. Otherwise, the finalizer
+  /// should attempt to generate ISA that will allow the specified number of
+  /// work-groups to execute on a single compute unit. This is only a hint and
+  /// can be ignored by the finalizer. If the kernel being finalized, or any of
+  /// the functions it calls, has a requested control directive, then the values
+  /// must be the same. This can be used to determine the number of resources
+  /// that should be allocated to a single work-group and work-item. For example,
+  /// a low value may allow more resources to be allocated, resulting in higher
+  /// per work-item performance, as it is known there will never be more than the
+  /// specified number of work-groups actually executing on the compute
+  /// unit. Conversely, a high value may allocate fewer resources, resulting in
+  /// lower per work-item performance, which is offset by the fact it allows more
+  /// work-groups to actually execute on the compute unit.
+  uint32_t requested_workgroups_per_cu;
+
+  /// If not enabled then all elements for Dim3 must be 0, otherwise every
+  /// element must be greater than 0. See HSA Programmer's Reference Manual
+  /// description of requiredgridsize control directive.
+  hsa_dim3_t required_grid_size;
+
+  /// If requiredWorkgroupSize is not enabled then all elements for Dim3 must be
+  /// 0, and the produced code can be dispatched with any legal work-group range
+  /// consistent with the dispatch dimensions. Otherwise, the code produced must
+  /// always be dispatched with the specified work-group range. No element of the
+  /// specified range must be 0. It must be consistent with required_dimensions
+  /// and max_flat_workgroup_size. If the kernel being finalized, or any of the
+  /// functions it calls, has a requiredworkgroupsize control directive, then the
+  /// values must be the same. Specifying a value can allow the finalizer to
+  /// optimize work-group id operations, and if the number of work-items in the
+  /// work-group is less than the WAVESIZE then barrier operations can be
+  /// optimized to just a memory fence.
+  hsa_dim3_t required_workgroup_size;
+
+  /// If requiredDim is not enabled then must be 0 and the produced kernel code
+  /// can be dispatched with 1, 2 or 3 dimensions. If enabled then the value is
+  /// 1..3 and the code produced must only be dispatched with a dimension that
+  /// matches. Other values are illegal. If the kernel being finalized, or any of
+  /// the functions it calls, has a requireddimsize control directive, then the
+  /// values must be the same. This can be used to optimize the code generated to
+  /// compute the absolute and flat work-group and work-item id, and the dim
+  /// HSAIL operations.
+  uint8_t required_dim;
+
+  /// Reserved. Must be 0.
+  uint8_t reserved[75];
+} hsa_ext_control_directives_t;
+
+/// AMD Kernel Code Object (amd_kernel_code_t). GPU CP uses the AMD Kernel
+/// Code Object to set up the hardware to execute the kernel dispatch.
+///
+/// Initial Kernel Register State.
+///
+/// Initial kernel register state will be set up by CP/SPI prior to the start
+/// of execution of every wavefront. This is limited by the constraints of the
+/// current hardware.
+///
+/// The order of the SGPR registers is defined, but the Finalizer can specify
+/// which ones are actually setup in the amd_kernel_code_t object using the
+/// enable_sgpr_* bit fields. The register numbers used for enabled registers
+/// are dense starting at SGPR0: the first enabled register is SGPR0, the next
+/// enabled register is SGPR1 etc.; disabled registers do not have an SGPR
+/// number.
+///
+/// The initial SGPRs comprise up to 16 User SRGPs that are set up by CP and
+/// apply to all waves of the grid. It is possible to specify more than 16 User
+/// SGPRs using the enable_sgpr_* bit fields, in which case only the first 16
+/// are actually initialized. These are then immediately followed by the System
+/// SGPRs that are set up by ADC/SPI and can have different values for each wave
+/// of the grid dispatch.
+///
+/// SGPR register initial state is defined as follows:
+///
+/// Private Segment Buffer (enable_sgpr_private_segment_buffer):
+///   Number of User SGPR registers: 4. V# that can be used, together with
+///   Scratch Wave Offset as an offset, to access the Private/Spill/Arg
+///   segments using a segment address. It must be set as follows:
+///     - Base address: of the scratch memory area used by the dispatch. It
+///       does not include the scratch wave offset. It will be the per process
+///       SH_HIDDEN_PRIVATE_BASE_VMID plus any offset from this dispatch (for
+///       example there may be a per pipe offset, or per AQL Queue offset).
+///     - Stride + data_format: Element Size * Index Stride (???)
+///     - Cache swizzle: ???
+///     - Swizzle enable: SH_STATIC_MEM_CONFIG.SWIZZLE_ENABLE (must be 1 for
+///       scratch)
+///     - Num records: Flat Scratch Work Item Size / Element Size (???)
+///     - Dst_sel_*: ???
+///     - Num_format: ???
+///     - Element_size: SH_STATIC_MEM_CONFIG.ELEMENT_SIZE (will be DWORD, must
+///       agree with amd_kernel_code_t.privateElementSize)
+///     - Index_stride: SH_STATIC_MEM_CONFIG.INDEX_STRIDE (will be 64 as must
+///       be number of wavefront lanes for scratch, must agree with
+///       amd_kernel_code_t.wavefrontSize)
+///     - Add tid enable: 1
+///     - ATC: from SH_MEM_CONFIG.PRIVATE_ATC,
+///     - Hash_enable: ???
+///     - Heap: ???
+///     - Mtype: from SH_STATIC_MEM_CONFIG.PRIVATE_MTYPE
+///     - Type: 0 (a buffer) (???)
+///
+/// Dispatch Ptr (enable_sgpr_dispatch_ptr):
+///   Number of User SGPR registers: 2. 64 bit address of AQL dispatch packet
+///   for kernel actually executing.
+///
+/// Queue Ptr (enable_sgpr_queue_ptr):
+///   Number of User SGPR registers: 2. 64 bit address of AmdQueue object for
+///   AQL queue on which the dispatch packet was queued.
+///
+/// Kernarg Segment Ptr (enable_sgpr_kernarg_segment_ptr):
+///   Number of User SGPR registers: 2. 64 bit address of Kernarg segment. This
+///   is directly copied from the kernargPtr in the dispatch packet. Having CP
+///   load it once avoids loading it at the beginning of every wavefront.
+///
+/// Dispatch Id (enable_sgpr_dispatch_id):
+///   Number of User SGPR registers: 2. 64 bit Dispatch ID of the dispatch
+///   packet being executed.
+///
+/// Flat Scratch Init (enable_sgpr_flat_scratch_init):
+///   Number of User SGPR registers: 2. This is 2 SGPRs.
+///
+///   For CI/VI:
+///     The first SGPR is a 32 bit byte offset from SH_MEM_HIDDEN_PRIVATE_BASE
+///     to base of memory for scratch for this dispatch. This is the same offset
+///     used in computing the Scratch Segment Buffer base address. The value of
+///     Scratch Wave Offset must be added by the kernel code and moved to
+///     SGPRn-4 for use as the FLAT SCRATCH BASE in flat memory instructions.
+///
+///     The second SGPR is 32 bit byte size of a single work-item�s scratch
+///     memory usage. This is directly loaded from the dispatch packet Private
+///     Segment Byte Size and rounded up to a multiple of DWORD.
+///
+///     \todo [Does CP need to round this to >4 byte alignment?]
+///
+///     The kernel code must move to SGPRn-3 for use as the FLAT SCRATCH SIZE in
+///     flat memory instructions. Having CP load it once avoids loading it at
+///     the beginning of every wavefront.
+///
+///   For PI:
+///     This is the 64 bit base address of the scratch backing memory for
+///     allocated by CP for this dispatch.
+///
+/// Private Segment Size (enable_sgpr_private_segment_size):
+///   Number of User SGPR registers: 1. The 32 bit byte size of a single
+///   work-item�s scratch memory allocation. This is the value from the dispatch
+///   packet. Private Segment Byte Size rounded up by CP to a multiple of DWORD.
+///
+///   \todo [Does CP need to round this to >4 byte alignment?]
+///
+///   Having CP load it once avoids loading it at the beginning of every
+///   wavefront.
+///
+///   \todo [This will not be used for CI/VI since it is the same value as
+///   the second SGPR of Flat Scratch Init. However, it is need for PI which
+///   changes meaning of Flat Scratchg Init..]
+///
+/// Grid Work-Group Count X (enable_sgpr_grid_workgroup_count_x):
+///   Number of User SGPR registers: 1. 32 bit count of the number of
+///   work-groups in the X dimension for the grid being executed. Computed from
+///   the fields in the HsaDispatchPacket as
+///   ((gridSize.x+workgroupSize.x-1)/workgroupSize.x).
+///
+/// Grid Work-Group Count Y (enable_sgpr_grid_workgroup_count_y):
+///   Number of User SGPR registers: 1. 32 bit count of the number of
+///   work-groups in the Y dimension for the grid being executed. Computed from
+///   the fields in the HsaDispatchPacket as
+///   ((gridSize.y+workgroupSize.y-1)/workgroupSize.y).
+///
+///   Only initialized if <16 previous SGPRs initialized.
+///
+/// Grid Work-Group Count Z (enable_sgpr_grid_workgroup_count_z):
+///   Number of User SGPR registers: 1. 32 bit count of the number of
+///   work-groups in the Z dimension for the grid being executed. Computed
+///   from the fields in the HsaDispatchPacket as
+///   ((gridSize.z+workgroupSize.z-1)/workgroupSize.z).
+///
+///   Only initialized if <16 previous SGPRs initialized.
+///
+/// Work-Group Id X (enable_sgpr_workgroup_id_x):
+///   Number of System SGPR registers: 1. 32 bit work group id in X dimension
+///   of grid for wavefront. Always present.
+///
+/// Work-Group Id Y (enable_sgpr_workgroup_id_y):
+///   Number of System SGPR registers: 1. 32 bit work group id in Y dimension
+///   of grid for wavefront.
+///
+/// Work-Group Id Z (enable_sgpr_workgroup_id_z):
+///   Number of System SGPR registers: 1. 32 bit work group id in Z dimension
+///   of grid for wavefront. If present then Work-group Id Y will also be
+///   present
+///
+/// Work-Group Info (enable_sgpr_workgroup_info):
+///   Number of System SGPR registers: 1. {first_wave, 14�b0000,
+///   ordered_append_term[10:0], threadgroup_size_in_waves[5:0]}
+///
+/// Private Segment Wave Byte Offset
+/// (enable_sgpr_private_segment_wave_byte_offset):
+///   Number of System SGPR registers: 1. 32 bit byte offset from base of
+///   dispatch scratch base. Must be used as an offset with Private/Spill/Arg
+///   segment address when using Scratch Segment Buffer. It must be added to
+///   Flat Scratch Offset if setting up FLAT SCRATCH for flat addressing.
+///
+///
+/// The order of the VGPR registers is defined, but the Finalizer can specify
+/// which ones are actually setup in the amd_kernel_code_t object using the
+/// enableVgpr*  bit fields. The register numbers used for enabled registers
+/// are dense starting at VGPR0: the first enabled register is VGPR0, the next
+/// enabled register is VGPR1 etc.; disabled registers do not have an VGPR
+/// number.
+///
+/// VGPR register initial state is defined as follows:
+///
+/// Work-Item Id X (always initialized):
+///   Number of registers: 1. 32 bit work item id in X dimension of work-group
+///   for wavefront lane.
+///
+/// Work-Item Id X (enable_vgpr_workitem_id > 0):
+///   Number of registers: 1. 32 bit work item id in Y dimension of work-group
+///   for wavefront lane.
+///
+/// Work-Item Id X (enable_vgpr_workitem_id > 0):
+///   Number of registers: 1. 32 bit work item id in Z dimension of work-group
+///   for wavefront lane.
+///
+///
+/// The setting of registers is being done by existing GPU hardware as follows:
+///   1) SGPRs before the Work-Group Ids are set by CP using the 16 User Data
+///      registers.
+///   2) Work-group Id registers X, Y, Z are set by SPI which supports any
+///      combination including none.
+///   3) Scratch Wave Offset is also set by SPI which is why its value cannot
+///      be added into the value Flat Scratch Offset which would avoid the
+///      Finalizer generated prolog having to do the add.
+///   4) The VGPRs are set by SPI which only supports specifying either (X),
+///      (X, Y) or (X, Y, Z).
+///
+/// Flat Scratch Dispatch Offset and Flat Scratch Size are adjacent SGRRs so
+/// they can be moved as a 64 bit value to the hardware required SGPRn-3 and
+/// SGPRn-4 respectively using the Finalizer ?FLAT_SCRATCH? Register.
+///
+/// The global segment can be accessed either using flat operations or buffer
+/// operations. If buffer operations are used then the Global Buffer used to
+/// access HSAIL Global/Readonly/Kernarg (which are combine) segments using a
+/// segment address is not passed into the kernel code by CP since its base
+/// address is always 0. Instead the Finalizer generates prolog code to
+/// initialize 4 SGPRs with a V# that has the following properties, and then
+/// uses that in the buffer instructions:
+///   - base address of 0
+///   - no swizzle
+///   - ATC=1
+///   - MTYPE set to support memory coherence specified in
+///     amd_kernel_code_t.globalMemoryCoherence
+///
+/// When the Global Buffer is used to access the Kernarg segment, must add the
+/// dispatch packet kernArgPtr to a kernarg segment address before using this V#.
+/// Alternatively scalar loads can be used if the kernarg offset is uniform, as
+/// the kernarg segment is constant for the duration of the kernel execution.
+///
+typedef struct amd_kernel_code_s {
+  /// The AMD major version of the Code Object. Must be the value
+  /// AMD_CODE_VERSION_MAJOR.
+  amd_code_version32_t amd_code_version_major;
+
+  /// The AMD minor version of the Code Object. Minor versions must be
+  /// backward compatible. Must be the value
+  /// AMD_CODE_VERSION_MINOR.
+  amd_code_version32_t amd_code_version_minor;
+
+  /// The byte size of this struct. Must be set to
+  /// sizeof(amd_kernel_code_t). Used for backward
+  /// compatibility.
+  uint32_t struct_byte_size;
+
+  /// The target chip instruction set for which code has been
+  /// generated. Values are from the E_SC_INSTRUCTION_SET enumeration
+  /// in sc/Interface/SCCommon.h.
+  uint32_t target_chip;
+
+  /// Byte offset (possibly negative) from start of amd_kernel_code_t
+  /// object to kernel's entry point instruction. The actual code for
+  /// the kernel is required to be 256 byte aligned to match hardware
+  /// requirements (SQ cache line is 16). The code must be position
+  /// independent code (PIC) for AMD devices to give runtime the
+  /// option of copying code to discrete GPU memory or APU L2
+  /// cache. The Finalizer should endeavour to allocate all kernel
+  /// machine code in contiguous memory pages so that a device
+  /// pre-fetcher will tend to only pre-fetch Kernel Code objects,
+  /// improving cache performance.
+  int64_t kernel_code_entry_byte_offset;
+
+  /// Range of bytes to consider prefetching expressed as an offset
+  /// and size. The offset is from the start (possibly negative) of
+  /// amd_kernel_code_t object. Set both to 0 if no prefetch
+  /// information is available.
+  ///
+  /// \todo ttye 11/15/2013 Is the prefetch definition we want? Did
+  /// not make the size a uint64_t as prefetching more than 4GiB seems
+  /// excessive.
+  int64_t kernel_code_prefetch_byte_offset;
+  uint64_t kernel_code_prefetch_byte_size;
+
+  /// Number of bytes of scratch backing memory required for full
+  /// occupancy of target chip. This takes into account the number of
+  /// bytes of scratch per work-item, the wavefront size, the maximum
+  /// number of wavefronts per CU, and the number of CUs. This is an
+  /// upper limit on scratch. If the grid being dispatched is small it
+  /// may only need less than this. If the kernel uses no scratch, or
+  /// the Finalizer has not computed this value, it must be 0.
+  uint64_t max_scratch_backing_memory_byte_size;
+
+  /// Shader program settings for CS. Contains COMPUTE_PGM_RSRC1 and
+  /// COMPUTE_PGM_RSRC2 registers.
+  amd_compute_pgm_resource_register64_t compute_pgm_resource_registers;
+
+  /// Code properties. See amd_code_property_mask_t for a full list of
+  /// properties.
+  amd_code_property32_t code_properties;
+
+  /// The amount of memory required for the combined private, spill
+  /// and arg segments for a work-item in bytes. If
+  /// is_dynamic_callstack is 1 then additional space must be added to
+  /// this value for the call stack.
+  uint32_t workitem_private_segment_byte_size;
+
+  /// The amount of group segment memory required by a work-group in
+  /// bytes. This does not include any dynamically allocated group
+  /// segment memory that may be added when the kernel is
+  /// dispatched.
+  uint32_t workgroup_group_segment_byte_size;
+
+  /// Number of byte of GDS required by kernel dispatch. Must be 0 if
+  /// not using GDS.
+  uint32_t gds_segment_byte_size;
+
+  /// The size in bytes of the kernarg segment that holds the values
+  /// of the arguments to the kernel. This could be used by CP to
+  /// prefetch the kernarg segment pointed to by the dispatch packet.
+  uint64_t kernarg_segment_byte_size;
+
+  /// Number of fbarrier's used in the kernel and all functions it
+  /// calls. If the implementation uses group memory to allocate the
+  /// fbarriers then that amount must already be included in the
+  /// workgroup_group_segment_byte_size total.
+  uint32_t workgroup_fbarrier_count;
+
+  /// Number of scalar registers used by a wavefront. This includes
+  /// the special SGPRs for VCC, Flat Scratch Base, Flat Scratch Size
+  /// and XNACK (for GFX8 (VI)). It does not include the 16 SGPR added if a
+  /// trap handler is enabled. Used to set COMPUTE_PGM_RSRC1.SGPRS.
+  uint16_t wavefront_sgpr_count;
+
+  /// Number of vector registers used by each work-item. Used to set
+  /// COMPUTE_PGM_RSRC1.VGPRS.
+  uint16_t workitem_vgpr_count;
+
+  /// If reserved_vgpr_count is 0 then must be 0. Otherwise, this is the
+  /// first fixed VGPR number reserved.
+  uint16_t reserved_vgpr_first;
+
+  /// The number of consecutive VGPRs reserved by the client. If
+  /// is_debug_supported then this count includes VGPRs reserved
+  /// for debugger use.
+  uint16_t reserved_vgpr_count;
+
+  /// If reserved_sgpr_count is 0 then must be 0. Otherwise, this is the
+  /// first fixed SGPR number reserved.
+  uint16_t reserved_sgpr_first;
+
+  /// The number of consecutive SGPRs reserved by the client. If
+  /// is_debug_supported then this count includes SGPRs reserved
+  /// for debugger use.
+  uint16_t reserved_sgpr_count;
+
+  /// If is_debug_supported is 0 then must be 0. Otherwise, this is the
+  /// fixed SGPR number used to hold the wave scratch offset for the
+  /// entire kernel execution, or uint16_t(-1) if the register is not
+  /// used or not known.
+  uint16_t debug_wavefront_private_segment_offset_sgpr;
+
+  /// If is_debug_supported is 0 then must be 0. Otherwise, this is the
+  /// fixed SGPR number of the first of 4 SGPRs used to hold the
+  /// scratch V# used for the entire kernel execution, or uint16_t(-1)
+  /// if the registers are not used or not known.
+  uint16_t debug_private_segment_buffer_sgpr;
+
+  /// The maximum byte alignment of variables used by the kernel in
+  /// the specified memory segment. Expressed as a power of two. Must
+  /// be at least HSA_POWERTWO_16.
+  hsa_powertwo8_t kernarg_segment_alignment;
+  hsa_powertwo8_t group_segment_alignment;
+  hsa_powertwo8_t private_segment_alignment;
+
+  uint8_t reserved3;
+
+  /// Type of code object.
+  hsa_ext_code_kind32_t code_type;
+
+  /// Reserved for code properties if any are defined in the future.
+  /// There are currently no code properties so this field must be 0.
+  uint32_t reserved4;
+
+  /// Wavefront size expressed as a power of two. Must be a power of 2
+  /// in range 1..64 inclusive. Used to support runtime query that
+  /// obtains wavefront size, which may be used by application to
+  /// allocated dynamic group memory and set the dispatch work-group
+  /// size.
+  hsa_powertwo8_t wavefront_size;
+
+  /// The optimization level specified when the kernel was
+  /// finalized.
+  uint8_t optimization_level;
+
+  /// The HSAIL profile defines which features are used. This
+  /// information is from the HSAIL version directive. If this
+  /// amd_kernel_code_t is not generated from an HSAIL compilation
+  /// unit then must be 0.
+  hsa_ext_brig_profile8_t hsail_profile;
+
+  /// The HSAIL machine model gives the address sizes used by the
+  /// code. This information is from the HSAIL version directive. If
+  /// not generated from an HSAIL compilation unit then must still
+  /// indicate for what machine mode the code is generated.
+  hsa_ext_brig_machine_model8_t hsail_machine_model;
+
+  /// The HSAIL major version. This information is from the HSAIL
+  /// version directive. If this amd_kernel_code_t is not
+  /// generated from an HSAIL compilation unit then must be 0.
+  uint32_t hsail_version_major;
+
+  /// The HSAIL minor version. This information is from the HSAIL
+  /// version directive. If this amd_kernel_code_t is not
+  /// generated from an HSAIL compilation unit then must be 0.
+  uint32_t hsail_version_minor;
+
+  /// Reserved for HSAIL target options if any are defined in the
+  /// future. There are currently no target options so this field
+  /// must be 0.
+  uint16_t reserved5;
+
+  /// Reserved. Must be 0.
+  uint16_t reserved6;
+
+  /// The values should be the actually values used by the finalizer
+  /// in generating the code. This may be the union of values
+  /// specified as finalizer arguments and explicit HSAIL control
+  /// directives. If the finalizer chooses to ignore a control
+  /// directive, and not generate constrained code, then the control
+  /// directive should not be marked as enabled even though it was
+  /// present in the HSAIL or finalizer argument. The values are
+  /// intended to reflect the constraints that the code actually
+  /// requires to correctly execute, not the values that were
+  /// actually specified at finalize time.
+  hsa_ext_control_directives_t control_directive;
+
+  /// The code can immediately follow the amd_kernel_code_t, or can
+  /// come after subsequent amd_kernel_code_t structs when there are
+  /// multiple kernels in the compilation unit.
+
+} amd_kernel_code_t;
+
+#endif // AMDKERNELCODET_H
diff --git a/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
new file mode 100644
index 0000000..80081d4
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -0,0 +1,1380 @@
+//===-- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIDefines.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+namespace {
+
+struct OptionalOperand;
+
+class AMDGPUOperand : public MCParsedAsmOperand {
+  enum KindTy {
+    Token,
+    Immediate,
+    Register,
+    Expression
+  } Kind;
+
+  SMLoc StartLoc, EndLoc;
+
+public:
+  AMDGPUOperand(enum KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+
+  MCContext *Ctx;
+
+  enum ImmTy {
+    ImmTyNone,
+    ImmTyDSOffset0,
+    ImmTyDSOffset1,
+    ImmTyGDS,
+    ImmTyOffset,
+    ImmTyGLC,
+    ImmTySLC,
+    ImmTyTFE,
+    ImmTyClamp,
+    ImmTyOMod
+  };
+
+  struct TokOp {
+    const char *Data;
+    unsigned Length;
+  };
+
+  struct ImmOp {
+    bool IsFPImm;
+    ImmTy Type;
+    int64_t Val;
+  };
+
+  struct RegOp {
+    unsigned RegNo;
+    int Modifiers;
+    const MCRegisterInfo *TRI;
+    bool IsForcedVOP3;
+  };
+
+  union {
+    TokOp Tok;
+    ImmOp Imm;
+    RegOp Reg;
+    const MCExpr *Expr;
+  };
+
+  void addImmOperands(MCInst &Inst, unsigned N) const {
+    Inst.addOperand(MCOperand::createImm(getImm()));
+  }
+
+  StringRef getToken() const {
+    return StringRef(Tok.Data, Tok.Length);
+  }
+
+  void addRegOperands(MCInst &Inst, unsigned N) const {
+    Inst.addOperand(MCOperand::createReg(getReg()));
+  }
+
+  void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
+    if (isReg())
+      addRegOperands(Inst, N);
+    else
+      addImmOperands(Inst, N);
+  }
+
+  void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
+    Inst.addOperand(MCOperand::createImm(
+        Reg.Modifiers == -1 ? 0 : Reg.Modifiers));
+    addRegOperands(Inst, N);
+  }
+
+  void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
+    if (isImm())
+      addImmOperands(Inst, N);
+    else {
+      assert(isExpr());
+      Inst.addOperand(MCOperand::createExpr(Expr));
+    }
+  }
+
+  bool defaultTokenHasSuffix() const {
+    StringRef Token(Tok.Data, Tok.Length);
+
+    return Token.endswith("_e32") || Token.endswith("_e64");
+  }
+
+  bool isToken() const override {
+    return Kind == Token;
+  }
+
+  bool isImm() const override {
+    return Kind == Immediate;
+  }
+
+  bool isInlineImm() const {
+    float F = BitsToFloat(Imm.Val);
+    // TODO: Add 0.5pi for VI
+    return isImm() && ((Imm.Val <= 64 && Imm.Val >= -16) ||
+           (F == 0.0 || F == 0.5 || F == -0.5 || F == 1.0 || F == -1.0 ||
+           F == 2.0 || F == -2.0 || F == 4.0 || F == -4.0));
+  }
+
+  bool isDSOffset0() const {
+    assert(isImm());
+    return Imm.Type == ImmTyDSOffset0;
+  }
+
+  bool isDSOffset1() const {
+    assert(isImm());
+    return Imm.Type == ImmTyDSOffset1;
+  }
+
+  int64_t getImm() const {
+    return Imm.Val;
+  }
+
+  enum ImmTy getImmTy() const {
+    assert(isImm());
+    return Imm.Type;
+  }
+
+  bool isRegKind() const {
+    return Kind == Register;
+  }
+
+  bool isReg() const override {
+    return Kind == Register && Reg.Modifiers == -1;
+  }
+
+  bool isRegWithInputMods() const {
+    return Kind == Register && (Reg.IsForcedVOP3 || Reg.Modifiers != -1);
+  }
+
+  void setModifiers(unsigned Mods) {
+    assert(isReg());
+    Reg.Modifiers = Mods;
+  }
+
+  bool hasModifiers() const {
+    assert(isRegKind());
+    return Reg.Modifiers != -1;
+  }
+
+  unsigned getReg() const override {
+    return Reg.RegNo;
+  }
+
+  bool isRegOrImm() const {
+    return isReg() || isImm();
+  }
+
+  bool isRegClass(unsigned RCID) const {
+    return Reg.TRI->getRegClass(RCID).contains(getReg());
+  }
+
+  bool isSCSrc32() const {
+    return isInlineImm() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID));
+  }
+
+  bool isSSrc32() const {
+    return isImm() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID));
+  }
+
+  bool isSSrc64() const {
+    return isImm() || isInlineImm() ||
+           (isReg() && isRegClass(AMDGPU::SReg_64RegClassID));
+  }
+
+  bool isVCSrc32() const {
+    return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID));
+  }
+
+  bool isVCSrc64() const {
+    return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID));
+  }
+
+  bool isVSrc32() const {
+    return isImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID));
+  }
+
+  bool isVSrc64() const {
+    return isImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID));
+  }
+
+  bool isMem() const override {
+    return false;
+  }
+
+  bool isExpr() const {
+    return Kind == Expression;
+  }
+
+  bool isSoppBrTarget() const {
+    return isExpr() || isImm();
+  }
+
+  SMLoc getStartLoc() const override {
+    return StartLoc;
+  }
+
+  SMLoc getEndLoc() const override {
+    return EndLoc;
+  }
+
+  void print(raw_ostream &OS) const override { }
+
+  static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val, SMLoc Loc,
+                                                  enum ImmTy Type = ImmTyNone,
+                                                  bool IsFPImm = false) {
+    auto Op = llvm::make_unique<AMDGPUOperand>(Immediate);
+    Op->Imm.Val = Val;
+    Op->Imm.IsFPImm = IsFPImm;
+    Op->Imm.Type = Type;
+    Op->StartLoc = Loc;
+    Op->EndLoc = Loc;
+    return Op;
+  }
+
+  static std::unique_ptr<AMDGPUOperand> CreateToken(StringRef Str, SMLoc Loc,
+                                           bool HasExplicitEncodingSize = true) {
+    auto Res = llvm::make_unique<AMDGPUOperand>(Token);
+    Res->Tok.Data = Str.data();
+    Res->Tok.Length = Str.size();
+    Res->StartLoc = Loc;
+    Res->EndLoc = Loc;
+    return Res;
+  }
+
+  static std::unique_ptr<AMDGPUOperand> CreateReg(unsigned RegNo, SMLoc S,
+                                                  SMLoc E,
+                                                  const MCRegisterInfo *TRI,
+                                                  bool ForceVOP3) {
+    auto Op = llvm::make_unique<AMDGPUOperand>(Register);
+    Op->Reg.RegNo = RegNo;
+    Op->Reg.TRI = TRI;
+    Op->Reg.Modifiers = -1;
+    Op->Reg.IsForcedVOP3 = ForceVOP3;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static std::unique_ptr<AMDGPUOperand> CreateExpr(const class MCExpr *Expr, SMLoc S) {
+    auto Op = llvm::make_unique<AMDGPUOperand>(Expression);
+    Op->Expr = Expr;
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
+  bool isDSOffset() const;
+  bool isDSOffset01() const;
+  bool isSWaitCnt() const;
+  bool isMubufOffset() const;
+};
+
+class AMDGPUAsmParser : public MCTargetAsmParser {
+  MCSubtargetInfo &STI;
+  const MCInstrInfo &MII;
+  MCAsmParser &Parser;
+
+  unsigned ForcedEncodingSize;
+  /// @name Auto-generated Match Functions
+  /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "AMDGPUGenAsmMatcher.inc"
+
+  /// }
+
+public:
+  AMDGPUAsmParser(MCSubtargetInfo &STI, MCAsmParser &_Parser,
+               const MCInstrInfo &MII,
+               const MCTargetOptions &Options)
+      : MCTargetAsmParser(), STI(STI), MII(MII), Parser(_Parser),
+        ForcedEncodingSize(0){
+
+    if (STI.getFeatureBits().none()) {
+      // Set default features.
+      STI.ToggleFeature("SOUTHERN_ISLANDS");
+    }
+
+    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+  }
+
+  unsigned getForcedEncodingSize() const {
+    return ForcedEncodingSize;
+  }
+
+  void setForcedEncodingSize(unsigned Size) {
+    ForcedEncodingSize = Size;
+  }
+
+  bool isForcedVOP3() const {
+    return ForcedEncodingSize == 64;
+  }
+
+  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+  unsigned checkTargetMatchPredicate(MCInst &Inst) override;
+  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+                               OperandVector &Operands, MCStreamer &Out,
+                               uint64_t &ErrorInfo,
+                               bool MatchingInlineAsm) override;
+  bool ParseDirective(AsmToken DirectiveID) override;
+  OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
+  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+                        SMLoc NameLoc, OperandVector &Operands) override;
+
+  OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int,
+                                          int64_t Default = 0);
+  OperandMatchResultTy parseIntWithPrefix(const char *Prefix,
+                                          OperandVector &Operands,
+                                          enum AMDGPUOperand::ImmTy ImmTy =
+                                                      AMDGPUOperand::ImmTyNone);
+  OperandMatchResultTy parseNamedBit(const char *Name, OperandVector &Operands,
+                                     enum AMDGPUOperand::ImmTy ImmTy =
+                                                      AMDGPUOperand::ImmTyNone);
+  OperandMatchResultTy parseOptionalOps(
+                                   const ArrayRef<OptionalOperand> &OptionalOps,
+                                   OperandVector &Operands);
+
+
+  void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
+  void cvtDS(MCInst &Inst, const OperandVector &Operands);
+  OperandMatchResultTy parseDSOptionalOps(OperandVector &Operands);
+  OperandMatchResultTy parseDSOff01OptionalOps(OperandVector &Operands);
+  OperandMatchResultTy parseDSOffsetOptional(OperandVector &Operands);
+
+  bool parseCnt(int64_t &IntVal);
+  OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
+  OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
+
+  OperandMatchResultTy parseFlatOptionalOps(OperandVector &Operands);
+  OperandMatchResultTy parseFlatAtomicOptionalOps(OperandVector &Operands);
+  void cvtFlat(MCInst &Inst, const OperandVector &Operands);
+
+  void cvtMubuf(MCInst &Inst, const OperandVector &Operands);
+  OperandMatchResultTy parseOffset(OperandVector &Operands);
+  OperandMatchResultTy parseMubufOptionalOps(OperandVector &Operands);
+  OperandMatchResultTy parseGLC(OperandVector &Operands);
+  OperandMatchResultTy parseSLC(OperandVector &Operands);
+  OperandMatchResultTy parseTFE(OperandVector &Operands);
+
+  OperandMatchResultTy parseDMask(OperandVector &Operands);
+  OperandMatchResultTy parseUNorm(OperandVector &Operands);
+  OperandMatchResultTy parseR128(OperandVector &Operands);
+
+  void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
+  OperandMatchResultTy parseVOP3OptionalOps(OperandVector &Operands);
+};
+
+struct OptionalOperand {
+  const char *Name;
+  AMDGPUOperand::ImmTy Type;
+  bool IsBit;
+  int64_t Default;
+  bool (*ConvertResult)(int64_t&);
+};
+
+} // namespace
+
+static unsigned getRegClass(bool IsVgpr, unsigned RegWidth) {
+  if (IsVgpr) {
+    switch (RegWidth) {
+      default: llvm_unreachable("Unknown register width");
+      case 1: return AMDGPU::VGPR_32RegClassID;
+      case 2: return AMDGPU::VReg_64RegClassID;
+      case 3: return AMDGPU::VReg_96RegClassID;
+      case 4: return AMDGPU::VReg_128RegClassID;
+      case 8: return AMDGPU::VReg_256RegClassID;
+      case 16: return AMDGPU::VReg_512RegClassID;
+    }
+  }
+
+  switch (RegWidth) {
+    default: llvm_unreachable("Unknown register width");
+    case 1: return AMDGPU::SGPR_32RegClassID;
+    case 2: return AMDGPU::SGPR_64RegClassID;
+    case 4: return AMDGPU::SReg_128RegClassID;
+    case 8: return AMDGPU::SReg_256RegClassID;
+    case 16: return AMDGPU::SReg_512RegClassID;
+  }
+}
+
+static unsigned getRegForName(const StringRef &RegName) {
+
+  return StringSwitch<unsigned>(RegName)
+    .Case("exec", AMDGPU::EXEC)
+    .Case("vcc", AMDGPU::VCC)
+    .Case("flat_scr", AMDGPU::FLAT_SCR)
+    .Case("m0", AMDGPU::M0)
+    .Case("scc", AMDGPU::SCC)
+    .Case("flat_scr_lo", AMDGPU::FLAT_SCR_LO)
+    .Case("flat_scr_hi", AMDGPU::FLAT_SCR_HI)
+    .Case("vcc_lo", AMDGPU::VCC_LO)
+    .Case("vcc_hi", AMDGPU::VCC_HI)
+    .Case("exec_lo", AMDGPU::EXEC_LO)
+    .Case("exec_hi", AMDGPU::EXEC_HI)
+    .Default(0);
+}
+
+bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
+  const AsmToken Tok = Parser.getTok();
+  StartLoc = Tok.getLoc();
+  EndLoc = Tok.getEndLoc();
+  const StringRef &RegName = Tok.getString();
+  RegNo = getRegForName(RegName);
+
+  if (RegNo) {
+    Parser.Lex();
+    return false;
+  }
+
+  // Match vgprs and sgprs
+  if (RegName[0] != 's' && RegName[0] != 'v')
+    return true;
+
+  bool IsVgpr = RegName[0] == 'v';
+  unsigned RegWidth;
+  unsigned RegIndexInClass;
+  if (RegName.size() > 1) {
+    // We have a 32-bit register
+    RegWidth = 1;
+    if (RegName.substr(1).getAsInteger(10, RegIndexInClass))
+      return true;
+    Parser.Lex();
+  } else {
+    // We have a register greater than 32-bits.
+
+    int64_t RegLo, RegHi;
+    Parser.Lex();
+    if (getLexer().isNot(AsmToken::LBrac))
+      return true;
+
+    Parser.Lex();
+    if (getParser().parseAbsoluteExpression(RegLo))
+      return true;
+
+    if (getLexer().isNot(AsmToken::Colon))
+      return true;
+
+    Parser.Lex();
+    if (getParser().parseAbsoluteExpression(RegHi))
+      return true;
+
+    if (getLexer().isNot(AsmToken::RBrac))
+      return true;
+
+    Parser.Lex();
+    RegWidth = (RegHi - RegLo) + 1;
+    if (IsVgpr) {
+      // VGPR registers aren't aligned.
+      RegIndexInClass = RegLo;
+    } else {
+      // SGPR registers are aligned.  Max alignment is 4 dwords.
+      RegIndexInClass = RegLo / std::min(RegWidth, 4u);
+    }
+  }
+
+  const MCRegisterInfo *TRC = getContext().getRegisterInfo();
+  unsigned RC = getRegClass(IsVgpr, RegWidth);
+  if (RegIndexInClass > TRC->getRegClass(RC).getNumRegs())
+    return true;
+  RegNo = TRC->getRegClass(RC).getRegister(RegIndexInClass);
+  return false;
+}
+
+unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
+
+  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
+
+  if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
+      (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)))
+    return Match_InvalidOperand;
+
+  return Match_Success;
+}
+
+
+bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+                                              OperandVector &Operands,
+                                              MCStreamer &Out,
+                                              uint64_t &ErrorInfo,
+                                              bool MatchingInlineAsm) {
+  MCInst Inst;
+
+  switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
+    default: break;
+    case Match_Success:
+      Inst.setLoc(IDLoc);
+      Out.EmitInstruction(Inst, STI);
+      return false;
+    case Match_MissingFeature:
+      return Error(IDLoc, "instruction not supported on this GPU");
+
+    case Match_MnemonicFail:
+      return Error(IDLoc, "unrecognized instruction mnemonic");
+
+    case Match_InvalidOperand: {
+      SMLoc ErrorLoc = IDLoc;
+      if (ErrorInfo != ~0ULL) {
+        if (ErrorInfo >= Operands.size()) {
+          if (isForcedVOP3()) {
+            // If 64-bit encoding has been forced we can end up with no
+            // clamp or omod operands if none of the registers have modifiers,
+            // so we need to add these to the operand list.
+            AMDGPUOperand &LastOp =
+                ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
+            if (LastOp.isRegKind() ||
+               (LastOp.isImm() &&
+                LastOp.getImmTy() != AMDGPUOperand::ImmTyNone)) {
+              SMLoc S = Parser.getTok().getLoc();
+              Operands.push_back(AMDGPUOperand::CreateImm(0, S,
+                                 AMDGPUOperand::ImmTyClamp));
+              Operands.push_back(AMDGPUOperand::CreateImm(0, S,
+                                 AMDGPUOperand::ImmTyOMod));
+              bool Res = MatchAndEmitInstruction(IDLoc, Opcode, Operands,
+                                                 Out, ErrorInfo,
+                                                 MatchingInlineAsm);
+              if (!Res)
+                return Res;
+            }
+
+          }
+          return Error(IDLoc, "too few operands for instruction");
+        }
+
+        ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
+        if (ErrorLoc == SMLoc())
+          ErrorLoc = IDLoc;
+      }
+      return Error(ErrorLoc, "invalid operand for instruction");
+    }
+  }
+  llvm_unreachable("Implement any new match types added!");
+}
+
+bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
+  return true;
+}
+
+static bool operandsHaveModifiers(const OperandVector &Operands) {
+
+  for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+    const AMDGPUOperand &Op = ((AMDGPUOperand&)*Operands[i]);
+    if (Op.isRegKind() && Op.hasModifiers())
+      return true;
+    if (Op.isImm() && (Op.getImmTy() == AMDGPUOperand::ImmTyOMod ||
+                       Op.getImmTy() == AMDGPUOperand::ImmTyClamp))
+      return true;
+  }
+  return false;
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
+
+  // Try to parse with a custom parser
+  OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+
+  // If we successfully parsed the operand or if there as an error parsing,
+  // we are done.
+  //
+  // If we are parsing after we reach EndOfStatement then this means we
+  // are appending default values to the Operands list.  This is only done
+  // by custom parser, so we shouldn't continue on to the generic parsing.
+  if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
+      getLexer().is(AsmToken::EndOfStatement))
+    return ResTy;
+
+  bool Negate = false, Abs = false;
+  if (getLexer().getKind()== AsmToken::Minus) {
+    Parser.Lex();
+    Negate = true;
+  }
+
+  if (getLexer().getKind() == AsmToken::Pipe) {
+    Parser.Lex();
+    Abs = true;
+  }
+
+  switch(getLexer().getKind()) {
+    case AsmToken::Integer: {
+      SMLoc S = Parser.getTok().getLoc();
+      int64_t IntVal;
+      if (getParser().parseAbsoluteExpression(IntVal))
+        return MatchOperand_ParseFail;
+      APInt IntVal32(32, IntVal);
+      if (IntVal32.getSExtValue() != IntVal) {
+        Error(S, "invalid immediate: only 32-bit values are legal");
+        return MatchOperand_ParseFail;
+      }
+
+      IntVal = IntVal32.getSExtValue();
+      if (Negate)
+        IntVal *= -1;
+      Operands.push_back(AMDGPUOperand::CreateImm(IntVal, S));
+      return MatchOperand_Success;
+    }
+    case AsmToken::Real: {
+      // FIXME: We should emit an error if a double precisions floating-point
+      // value is used.  I'm not sure the best way to detect this.
+      SMLoc S = Parser.getTok().getLoc();
+      int64_t IntVal;
+      if (getParser().parseAbsoluteExpression(IntVal))
+        return MatchOperand_ParseFail;
+
+      APFloat F((float)BitsToDouble(IntVal));
+      if (Negate)
+        F.changeSign();
+      Operands.push_back(
+          AMDGPUOperand::CreateImm(F.bitcastToAPInt().getZExtValue(), S));
+      return MatchOperand_Success;
+    }
+    case AsmToken::Identifier: {
+      SMLoc S, E;
+      unsigned RegNo;
+      if (!ParseRegister(RegNo, S, E)) {
+
+        bool HasModifiers = operandsHaveModifiers(Operands);
+        unsigned Modifiers = 0;
+
+        if (Negate)
+          Modifiers |= 0x1;
+
+        if (Abs) {
+          if (getLexer().getKind() != AsmToken::Pipe)
+            return MatchOperand_ParseFail;
+          Parser.Lex();
+          Modifiers |= 0x2;
+        }
+
+        if (Modifiers && !HasModifiers) {
+          // We are adding a modifier to src1 or src2 and previous sources
+          // don't have modifiers, so we need to go back and empty modifers
+          // for each previous source.
+          for (unsigned PrevRegIdx = Operands.size() - 1; PrevRegIdx > 1;
+               --PrevRegIdx) {
+
+            AMDGPUOperand &RegOp = ((AMDGPUOperand&)*Operands[PrevRegIdx]);
+            RegOp.setModifiers(0);
+          }
+        }
+
+
+        Operands.push_back(AMDGPUOperand::CreateReg(
+            RegNo, S, E, getContext().getRegisterInfo(),
+            isForcedVOP3()));
+
+        if (HasModifiers || Modifiers) {
+          AMDGPUOperand &RegOp = ((AMDGPUOperand&)*Operands[Operands.size() - 1]);
+          RegOp.setModifiers(Modifiers);
+
+        }
+     }  else {
+      Operands.push_back(AMDGPUOperand::CreateToken(Parser.getTok().getString(),
+                                                    S));
+      Parser.Lex();
+     }
+     return MatchOperand_Success;
+    }
+    default:
+      return MatchOperand_NoMatch;
+  }
+}
+
+bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
+                                       StringRef Name,
+                                       SMLoc NameLoc, OperandVector &Operands) {
+
+  // Clear any forced encodings from the previous instruction.
+  setForcedEncodingSize(0);
+
+  if (Name.endswith("_e64"))
+    setForcedEncodingSize(64);
+  else if (Name.endswith("_e32"))
+    setForcedEncodingSize(32);
+
+  // Add the instruction mnemonic
+  Operands.push_back(AMDGPUOperand::CreateToken(Name, NameLoc));
+
+  while (!getLexer().is(AsmToken::EndOfStatement)) {
+    AMDGPUAsmParser::OperandMatchResultTy Res = parseOperand(Operands, Name);
+
+    // Eat the comma or space if there is one.
+    if (getLexer().is(AsmToken::Comma))
+      Parser.Lex();
+
+    switch (Res) {
+      case MatchOperand_Success: break;
+      case MatchOperand_ParseFail: return Error(getLexer().getLoc(),
+                                                "failed parsing operand.");
+      case MatchOperand_NoMatch: return Error(getLexer().getLoc(),
+                                              "not a valid operand.");
+    }
+  }
+
+  // Once we reach end of statement, continue parsing so we can add default
+  // values for optional arguments.
+  AMDGPUAsmParser::OperandMatchResultTy Res;
+  while ((Res = parseOperand(Operands, Name)) != MatchOperand_NoMatch) {
+    if (Res != MatchOperand_Success)
+      return Error(getLexer().getLoc(), "failed parsing operand.");
+  }
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Utility functions
+//===----------------------------------------------------------------------===//
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int,
+                                    int64_t Default) {
+
+  // We are at the end of the statement, and this is a default argument, so
+  // use a default value.
+  if (getLexer().is(AsmToken::EndOfStatement)) {
+    Int = Default;
+    return MatchOperand_Success;
+  }
+
+  switch(getLexer().getKind()) {
+    default: return MatchOperand_NoMatch;
+    case AsmToken::Identifier: {
+      StringRef OffsetName = Parser.getTok().getString();
+      if (!OffsetName.equals(Prefix))
+        return MatchOperand_NoMatch;
+
+      Parser.Lex();
+      if (getLexer().isNot(AsmToken::Colon))
+        return MatchOperand_ParseFail;
+
+      Parser.Lex();
+      if (getLexer().isNot(AsmToken::Integer))
+        return MatchOperand_ParseFail;
+
+      if (getParser().parseAbsoluteExpression(Int))
+        return MatchOperand_ParseFail;
+      break;
+    }
+  }
+  return MatchOperand_Success;
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
+                                    enum AMDGPUOperand::ImmTy ImmTy) {
+
+  SMLoc S = Parser.getTok().getLoc();
+  int64_t Offset = 0;
+
+  AMDGPUAsmParser::OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Offset);
+  if (Res != MatchOperand_Success)
+    return Res;
+
+  Operands.push_back(AMDGPUOperand::CreateImm(Offset, S, ImmTy));
+  return MatchOperand_Success;
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
+                               enum AMDGPUOperand::ImmTy ImmTy) {
+  int64_t Bit = 0;
+  SMLoc S = Parser.getTok().getLoc();
+
+  // We are at the end of the statement, and this is a default argument, so
+  // use a default value.
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    switch(getLexer().getKind()) {
+      case AsmToken::Identifier: {
+        StringRef Tok = Parser.getTok().getString();
+        if (Tok == Name) {
+          Bit = 1;
+          Parser.Lex();
+        } else if (Tok.startswith("no") && Tok.endswith(Name)) {
+          Bit = 0;
+          Parser.Lex();
+        } else {
+          return MatchOperand_NoMatch;
+        }
+        break;
+      }
+      default:
+        return MatchOperand_NoMatch;
+    }
+  }
+
+  Operands.push_back(AMDGPUOperand::CreateImm(Bit, S, ImmTy));
+  return MatchOperand_Success;
+}
+
+static bool operandsHasOptionalOp(const OperandVector &Operands,
+                                  const OptionalOperand &OOp) {
+  for (unsigned i = 0; i < Operands.size(); i++) {
+    const AMDGPUOperand &ParsedOp = ((const AMDGPUOperand &)*Operands[i]);
+    if ((ParsedOp.isImm() && ParsedOp.getImmTy() == OOp.Type) ||
+        (ParsedOp.isToken() && ParsedOp.getToken() == OOp.Name))
+      return true;
+
+  }
+  return false;
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseOptionalOps(const ArrayRef<OptionalOperand> &OptionalOps,
+                                   OperandVector &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  for (const OptionalOperand &Op : OptionalOps) {
+    if (operandsHasOptionalOp(Operands, Op))
+      continue;
+    AMDGPUAsmParser::OperandMatchResultTy Res;
+    int64_t Value;
+    if (Op.IsBit) {
+      Res = parseNamedBit(Op.Name, Operands, Op.Type);
+      if (Res == MatchOperand_NoMatch)
+        continue;
+      return Res;
+    }
+
+    Res = parseIntWithPrefix(Op.Name, Value, Op.Default);
+
+    if (Res == MatchOperand_NoMatch)
+      continue;
+
+    if (Res != MatchOperand_Success)
+      return Res;
+
+    if (Op.ConvertResult && !Op.ConvertResult(Value)) {
+      return MatchOperand_ParseFail;
+    }
+
+    Operands.push_back(AMDGPUOperand::CreateImm(Value, S, Op.Type));
+    return MatchOperand_Success;
+  }
+  return MatchOperand_NoMatch;
+}
+
+//===----------------------------------------------------------------------===//
+// ds
+//===----------------------------------------------------------------------===//
+
+static const OptionalOperand DSOptionalOps [] = {
+  {"offset",  AMDGPUOperand::ImmTyOffset, false, 0, nullptr},
+  {"gds",     AMDGPUOperand::ImmTyGDS, true, 0, nullptr}
+};
+
+static const OptionalOperand DSOptionalOpsOff01 [] = {
+  {"offset0", AMDGPUOperand::ImmTyDSOffset0, false, 0, nullptr},
+  {"offset1", AMDGPUOperand::ImmTyDSOffset1, false, 0, nullptr},
+  {"gds",     AMDGPUOperand::ImmTyGDS, true, 0, nullptr}
+};
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseDSOptionalOps(OperandVector &Operands) {
+  return parseOptionalOps(DSOptionalOps, Operands);
+}
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseDSOff01OptionalOps(OperandVector &Operands) {
+  return parseOptionalOps(DSOptionalOpsOff01, Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseDSOffsetOptional(OperandVector &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  AMDGPUAsmParser::OperandMatchResultTy Res =
+    parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
+  if (Res == MatchOperand_NoMatch) {
+    Operands.push_back(AMDGPUOperand::CreateImm(0, S,
+                       AMDGPUOperand::ImmTyOffset));
+    Res = MatchOperand_Success;
+  }
+  return Res;
+}
+
+bool AMDGPUOperand::isDSOffset() const {
+  return isImm() && isUInt<16>(getImm());
+}
+
+bool AMDGPUOperand::isDSOffset01() const {
+  return isImm() && isUInt<8>(getImm());
+}
+
+void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
+                                    const OperandVector &Operands) {
+
+  std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
+
+  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
+    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+
+    // Add the register arguments
+    if (Op.isReg()) {
+      Op.addRegOperands(Inst, 1);
+      continue;
+    }
+
+    // Handle optional arguments
+    OptionalIdx[Op.getImmTy()] = i;
+  }
+
+  unsigned Offset0Idx = OptionalIdx[AMDGPUOperand::ImmTyDSOffset0];
+  unsigned Offset1Idx = OptionalIdx[AMDGPUOperand::ImmTyDSOffset1];
+  unsigned GDSIdx = OptionalIdx[AMDGPUOperand::ImmTyGDS];
+
+  ((AMDGPUOperand &)*Operands[Offset0Idx]).addImmOperands(Inst, 1); // offset0
+  ((AMDGPUOperand &)*Operands[Offset1Idx]).addImmOperands(Inst, 1); // offset1
+  ((AMDGPUOperand &)*Operands[GDSIdx]).addImmOperands(Inst, 1); // gds
+  Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
+}
+
+void AMDGPUAsmParser::cvtDS(MCInst &Inst, const OperandVector &Operands) {
+
+  std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
+  bool GDSOnly = false;
+
+  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
+    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+
+    // Add the register arguments
+    if (Op.isReg()) {
+      Op.addRegOperands(Inst, 1);
+      continue;
+    }
+
+    if (Op.isToken() && Op.getToken() == "gds") {
+      GDSOnly = true;
+      continue;
+    }
+
+    // Handle optional arguments
+    OptionalIdx[Op.getImmTy()] = i;
+  }
+
+  unsigned OffsetIdx = OptionalIdx[AMDGPUOperand::ImmTyOffset];
+  ((AMDGPUOperand &)*Operands[OffsetIdx]).addImmOperands(Inst, 1); // offset
+
+  if (!GDSOnly) {
+    unsigned GDSIdx = OptionalIdx[AMDGPUOperand::ImmTyGDS];
+    ((AMDGPUOperand &)*Operands[GDSIdx]).addImmOperands(Inst, 1); // gds
+  }
+  Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
+}
+
+
+//===----------------------------------------------------------------------===//
+// s_waitcnt
+//===----------------------------------------------------------------------===//
+
+bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
+  StringRef CntName = Parser.getTok().getString();
+  int64_t CntVal;
+
+  Parser.Lex();
+  if (getLexer().isNot(AsmToken::LParen))
+    return true;
+
+  Parser.Lex();
+  if (getLexer().isNot(AsmToken::Integer))
+    return true;
+
+  if (getParser().parseAbsoluteExpression(CntVal))
+    return true;
+
+  if (getLexer().isNot(AsmToken::RParen))
+    return true;
+
+  Parser.Lex();
+  if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma))
+    Parser.Lex();
+
+  int CntShift;
+  int CntMask;
+
+  if (CntName == "vmcnt") {
+    CntMask = 0xf;
+    CntShift = 0;
+  } else if (CntName == "expcnt") {
+    CntMask = 0x7;
+    CntShift = 4;
+  } else if (CntName == "lgkmcnt") {
+    CntMask = 0x7;
+    CntShift = 8;
+  } else {
+    return true;
+  }
+
+  IntVal &= ~(CntMask << CntShift);
+  IntVal |= (CntVal << CntShift);
+  return false;
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
+  // Disable all counters by default.
+  // vmcnt   [3:0]
+  // expcnt  [6:4]
+  // lgkmcnt [10:8]
+  int64_t CntVal = 0x77f;
+  SMLoc S = Parser.getTok().getLoc();
+
+  switch(getLexer().getKind()) {
+    default: return MatchOperand_ParseFail;
+    case AsmToken::Integer:
+      // The operand can be an integer value.
+      if (getParser().parseAbsoluteExpression(CntVal))
+        return MatchOperand_ParseFail;
+      break;
+
+    case AsmToken::Identifier:
+      do {
+        if (parseCnt(CntVal))
+          return MatchOperand_ParseFail;
+      } while(getLexer().isNot(AsmToken::EndOfStatement));
+      break;
+  }
+  Operands.push_back(AMDGPUOperand::CreateImm(CntVal, S));
+  return MatchOperand_Success;
+}
+
+bool AMDGPUOperand::isSWaitCnt() const {
+  return isImm();
+}
+
+//===----------------------------------------------------------------------===//
+// sopp branch targets
+//===----------------------------------------------------------------------===//
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+
+  switch (getLexer().getKind()) {
+    default: return MatchOperand_ParseFail;
+    case AsmToken::Integer: {
+      int64_t Imm;
+      if (getParser().parseAbsoluteExpression(Imm))
+        return MatchOperand_ParseFail;
+      Operands.push_back(AMDGPUOperand::CreateImm(Imm, S));
+      return MatchOperand_Success;
+    }
+
+    case AsmToken::Identifier:
+      Operands.push_back(AMDGPUOperand::CreateExpr(
+          MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
+                                  Parser.getTok().getString()), getContext()), S));
+      Parser.Lex();
+      return MatchOperand_Success;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// flat
+//===----------------------------------------------------------------------===//
+
+static const OptionalOperand FlatOptionalOps [] = {
+  {"glc",    AMDGPUOperand::ImmTyGLC, true, 0, nullptr},
+  {"slc",    AMDGPUOperand::ImmTySLC, true, 0, nullptr},
+  {"tfe",    AMDGPUOperand::ImmTyTFE, true, 0, nullptr}
+};
+
+static const OptionalOperand FlatAtomicOptionalOps [] = {
+  {"slc",    AMDGPUOperand::ImmTySLC, true, 0, nullptr},
+  {"tfe",    AMDGPUOperand::ImmTyTFE, true, 0, nullptr}
+};
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseFlatOptionalOps(OperandVector &Operands) {
+  return parseOptionalOps(FlatOptionalOps, Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseFlatAtomicOptionalOps(OperandVector &Operands) {
+  return parseOptionalOps(FlatAtomicOptionalOps, Operands);
+}
+
+void AMDGPUAsmParser::cvtFlat(MCInst &Inst,
+                               const OperandVector &Operands) {
+  std::map<AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
+
+  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
+    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+
+    // Add the register arguments
+    if (Op.isReg()) {
+      Op.addRegOperands(Inst, 1);
+      continue;
+    }
+
+    // Handle 'glc' token which is sometimes hard-coded into the
+    // asm string.  There are no MCInst operands for these.
+    if (Op.isToken())
+      continue;
+
+    // Handle optional arguments
+    OptionalIdx[Op.getImmTy()] = i;
+
+  }
+
+  // flat atomic instructions don't have a glc argument.
+  if (OptionalIdx.count(AMDGPUOperand::ImmTyGLC)) {
+    unsigned GLCIdx = OptionalIdx[AMDGPUOperand::ImmTyGLC];
+    ((AMDGPUOperand &)*Operands[GLCIdx]).addImmOperands(Inst, 1);
+  }
+
+  unsigned SLCIdx = OptionalIdx[AMDGPUOperand::ImmTySLC];
+  unsigned TFEIdx = OptionalIdx[AMDGPUOperand::ImmTyTFE];
+
+  ((AMDGPUOperand &)*Operands[SLCIdx]).addImmOperands(Inst, 1);
+  ((AMDGPUOperand &)*Operands[TFEIdx]).addImmOperands(Inst, 1);
+}
+
+//===----------------------------------------------------------------------===//
+// mubuf
+//===----------------------------------------------------------------------===//
+
+static const OptionalOperand MubufOptionalOps [] = {
+  {"offset", AMDGPUOperand::ImmTyOffset, false, 0, nullptr},
+  {"glc",    AMDGPUOperand::ImmTyGLC, true, 0, nullptr},
+  {"slc",    AMDGPUOperand::ImmTySLC, true, 0, nullptr},
+  {"tfe",    AMDGPUOperand::ImmTyTFE, true, 0, nullptr}
+};
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseMubufOptionalOps(OperandVector &Operands) {
+  return parseOptionalOps(MubufOptionalOps, Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseOffset(OperandVector &Operands) {
+  return parseIntWithPrefix("offset", Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseGLC(OperandVector &Operands) {
+  return parseNamedBit("glc", Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseSLC(OperandVector &Operands) {
+  return parseNamedBit("slc", Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseTFE(OperandVector &Operands) {
+  return parseNamedBit("tfe", Operands);
+}
+
+bool AMDGPUOperand::isMubufOffset() const {
+  return isImm() && isUInt<12>(getImm());
+}
+
+void AMDGPUAsmParser::cvtMubuf(MCInst &Inst,
+                               const OperandVector &Operands) {
+  std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
+
+  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
+    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+
+    // Add the register arguments
+    if (Op.isReg()) {
+      Op.addRegOperands(Inst, 1);
+      continue;
+    }
+
+    // Handle the case where soffset is an immediate
+    if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
+      Op.addImmOperands(Inst, 1);
+      continue;
+    }
+
+    // Handle tokens like 'offen' which are sometimes hard-coded into the
+    // asm string.  There are no MCInst operands for these.
+    if (Op.isToken()) {
+      continue;
+    }
+    assert(Op.isImm());
+
+    // Handle optional arguments
+    OptionalIdx[Op.getImmTy()] = i;
+  }
+
+  assert(OptionalIdx.size() == 4);
+
+  unsigned OffsetIdx = OptionalIdx[AMDGPUOperand::ImmTyOffset];
+  unsigned GLCIdx = OptionalIdx[AMDGPUOperand::ImmTyGLC];
+  unsigned SLCIdx = OptionalIdx[AMDGPUOperand::ImmTySLC];
+  unsigned TFEIdx = OptionalIdx[AMDGPUOperand::ImmTyTFE];
+
+  ((AMDGPUOperand &)*Operands[OffsetIdx]).addImmOperands(Inst, 1);
+  ((AMDGPUOperand &)*Operands[GLCIdx]).addImmOperands(Inst, 1);
+  ((AMDGPUOperand &)*Operands[SLCIdx]).addImmOperands(Inst, 1);
+  ((AMDGPUOperand &)*Operands[TFEIdx]).addImmOperands(Inst, 1);
+}
+
+//===----------------------------------------------------------------------===//
+// mimg
+//===----------------------------------------------------------------------===//
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseDMask(OperandVector &Operands) {
+  return parseIntWithPrefix("dmask", Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseUNorm(OperandVector &Operands) {
+  return parseNamedBit("unorm", Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseR128(OperandVector &Operands) {
+  return parseNamedBit("r128", Operands);
+}
+
+//===----------------------------------------------------------------------===//
+// vop3
+//===----------------------------------------------------------------------===//
+
+static bool ConvertOmodMul(int64_t &Mul) {
+  if (Mul != 1 && Mul != 2 && Mul != 4)
+    return false;
+
+  Mul >>= 1;
+  return true;
+}
+
+static bool ConvertOmodDiv(int64_t &Div) {
+  if (Div == 1) {
+    Div = 0;
+    return true;
+  }
+
+  if (Div == 2) {
+    Div = 3;
+    return true;
+  }
+
+  return false;
+}
+
+static const OptionalOperand VOP3OptionalOps [] = {
+  {"clamp", AMDGPUOperand::ImmTyClamp, true, 0, nullptr},
+  {"mul",   AMDGPUOperand::ImmTyOMod, false, 1, ConvertOmodMul},
+  {"div",   AMDGPUOperand::ImmTyOMod, false, 1, ConvertOmodDiv},
+};
+
+static bool isVOP3(OperandVector &Operands) {
+  if (operandsHaveModifiers(Operands))
+    return true;
+
+  AMDGPUOperand &DstOp = ((AMDGPUOperand&)*Operands[1]);
+
+  if (DstOp.isReg() && DstOp.isRegClass(AMDGPU::SGPR_64RegClassID))
+    return true;
+
+  if (Operands.size() >= 5)
+    return true;
+
+  if (Operands.size() > 3) {
+    AMDGPUOperand &Src1Op = ((AMDGPUOperand&)*Operands[3]);
+    if (Src1Op.getReg() && (Src1Op.isRegClass(AMDGPU::SReg_32RegClassID) ||
+                            Src1Op.isRegClass(AMDGPU::SReg_64RegClassID)))
+      return true;
+  }
+  return false;
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseVOP3OptionalOps(OperandVector &Operands) {
+
+  // The value returned by this function may change after parsing
+  // an operand so store the original value here.
+  bool HasModifiers = operandsHaveModifiers(Operands);
+
+  bool IsVOP3 = isVOP3(Operands);
+  if (HasModifiers || IsVOP3 ||
+      getLexer().isNot(AsmToken::EndOfStatement) ||
+      getForcedEncodingSize() == 64) {
+
+    AMDGPUAsmParser::OperandMatchResultTy Res =
+        parseOptionalOps(VOP3OptionalOps, Operands);
+
+    if (!HasModifiers && Res == MatchOperand_Success) {
+      // We have added a modifier operation, so we need to make sure all
+      // previous register operands have modifiers
+      for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
+        AMDGPUOperand &Op = ((AMDGPUOperand&)*Operands[i]);
+        if (Op.isReg())
+          Op.setModifiers(0);
+      }
+    }
+    return Res;
+  }
+  return MatchOperand_NoMatch;
+}
+
+void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
+  ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
+  unsigned i = 2;
+
+  std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
+
+  if (operandsHaveModifiers(Operands)) {
+    for (unsigned e = Operands.size(); i != e; ++i) {
+      AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+
+      if (Op.isRegWithInputMods()) {
+        ((AMDGPUOperand &)*Operands[i]).addRegWithInputModsOperands(Inst, 2);
+        continue;
+      }
+      OptionalIdx[Op.getImmTy()] = i;
+    }
+
+    unsigned ClampIdx = OptionalIdx[AMDGPUOperand::ImmTyClamp];
+    unsigned OModIdx = OptionalIdx[AMDGPUOperand::ImmTyOMod];
+
+    ((AMDGPUOperand &)*Operands[ClampIdx]).addImmOperands(Inst, 1);
+    ((AMDGPUOperand &)*Operands[OModIdx]).addImmOperands(Inst, 1);
+  } else {
+    for (unsigned e = Operands.size(); i != e; ++i)
+      ((AMDGPUOperand &)*Operands[i]).addRegOrImmOperands(Inst, 1);
+  }
+}
+
+/// Force static initialization.
+extern "C" void LLVMInitializeAMDGPUAsmParser() {
+  RegisterMCAsmParser<AMDGPUAsmParser> A(TheAMDGPUTarget);
+  RegisterMCAsmParser<AMDGPUAsmParser> B(TheGCNTarget);
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "AMDGPUGenAsmMatcher.inc"
+
diff --git a/contrib/llvm/lib/Target/AMDGPU/CIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/CIInstructions.td
new file mode 100644
index 0000000..2f5fdbe
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/CIInstructions.td
@@ -0,0 +1,149 @@
+//===-- CIInstructions.td - CI Instruction Defintions ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Instruction definitions for CI and newer.
+//===----------------------------------------------------------------------===//
+
+
+def isCIVI : Predicate <
+  "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS || "
+  "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS"
+>, AssemblerPredicate<"FeatureCIInsts">;
+
+def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">;
+
+//===----------------------------------------------------------------------===//
+// VOP1 Instructions
+//===----------------------------------------------------------------------===//
+
+let SubtargetPredicate = isCIVI in {
+
+defm V_TRUNC_F64 : VOP1Inst <vop1<0x17>, "v_trunc_f64",
+  VOP_F64_F64, ftrunc
+>;
+defm V_CEIL_F64 : VOP1Inst <vop1<0x18>, "v_ceil_f64",
+  VOP_F64_F64, fceil
+>;
+defm V_FLOOR_F64 : VOP1Inst <vop1<0x1A>, "v_floor_f64",
+  VOP_F64_F64, ffloor
+>;
+defm V_RNDNE_F64 : VOP1Inst <vop1<0x19>, "v_rndne_f64",
+  VOP_F64_F64, frint
+>;
+defm V_LOG_LEGACY_F32 : VOP1Inst <vop1<0x45, 0x4c>, "v_log_legacy_f32",
+  VOP_F32_F32
+>;
+defm V_EXP_LEGACY_F32 : VOP1Inst <vop1<0x46, 0x4b>, "v_exp_legacy_f32",
+  VOP_F32_F32
+>;
+
+//===----------------------------------------------------------------------===//
+// Flat Instructions
+//===----------------------------------------------------------------------===//
+
+def FLAT_LOAD_UBYTE : FLAT_Load_Helper <0x8, "flat_load_ubyte", VGPR_32>;
+def FLAT_LOAD_SBYTE : FLAT_Load_Helper <0x9, "flat_load_sbyte", VGPR_32>;
+def FLAT_LOAD_USHORT : FLAT_Load_Helper <0xa, "flat_load_ushort", VGPR_32>;
+def FLAT_LOAD_SSHORT : FLAT_Load_Helper <0xb, "flat_load_sshort", VGPR_32>;
+def FLAT_LOAD_DWORD : FLAT_Load_Helper <0xc, "flat_load_dword", VGPR_32>;
+def FLAT_LOAD_DWORDX2 : FLAT_Load_Helper <0xd, "flat_load_dwordx2", VReg_64>;
+def FLAT_LOAD_DWORDX4 : FLAT_Load_Helper <0xe, "flat_load_dwordx4", VReg_128>;
+def FLAT_LOAD_DWORDX3 : FLAT_Load_Helper <0xf, "flat_load_dwordx3", VReg_96>;
+def FLAT_STORE_BYTE : FLAT_Store_Helper <0x18, "flat_store_byte", VGPR_32>;
+def FLAT_STORE_SHORT : FLAT_Store_Helper <0x1a, "flat_store_short", VGPR_32>;
+def FLAT_STORE_DWORD : FLAT_Store_Helper <0x1c, "flat_store_dword", VGPR_32>;
+def FLAT_STORE_DWORDX2 : FLAT_Store_Helper <
+  0x1d, "flat_store_dwordx2", VReg_64
+>;
+def FLAT_STORE_DWORDX4 : FLAT_Store_Helper <
+  0x1e, "flat_store_dwordx4", VReg_128
+>;
+def FLAT_STORE_DWORDX3 : FLAT_Store_Helper <
+  0x1f, "flat_store_dwordx3", VReg_96
+>;
+defm FLAT_ATOMIC_SWAP : FLAT_ATOMIC <0x30, "flat_atomic_swap", VGPR_32>;
+defm FLAT_ATOMIC_CMPSWAP : FLAT_ATOMIC <
+  0x31, "flat_atomic_cmpswap", VGPR_32, VReg_64
+>;
+defm FLAT_ATOMIC_ADD : FLAT_ATOMIC <0x32, "flat_atomic_add", VGPR_32>;
+defm FLAT_ATOMIC_SUB : FLAT_ATOMIC <0x33, "flat_atomic_sub", VGPR_32>;
+defm FLAT_ATOMIC_RSUB : FLAT_ATOMIC <0x34, "flat_atomic_rsub", VGPR_32>;
+defm FLAT_ATOMIC_SMIN : FLAT_ATOMIC <0x35, "flat_atomic_smin", VGPR_32>;
+defm FLAT_ATOMIC_UMIN : FLAT_ATOMIC <0x36, "flat_atomic_umin", VGPR_32>;
+defm FLAT_ATOMIC_SMAX : FLAT_ATOMIC <0x37, "flat_atomic_smax", VGPR_32>;
+defm FLAT_ATOMIC_UMAX : FLAT_ATOMIC <0x38, "flat_atomic_umax", VGPR_32>;
+defm FLAT_ATOMIC_AND : FLAT_ATOMIC <0x39, "flat_atomic_and", VGPR_32>;
+defm FLAT_ATOMIC_OR : FLAT_ATOMIC <0x3a, "flat_atomic_or", VGPR_32>;
+defm FLAT_ATOMIC_XOR : FLAT_ATOMIC <0x3b, "flat_atomic_xor", VGPR_32>;
+defm FLAT_ATOMIC_INC : FLAT_ATOMIC <0x3c, "flat_atomic_inc", VGPR_32>;
+defm FLAT_ATOMIC_DEC : FLAT_ATOMIC <0x3d, "flat_atomic_dec", VGPR_32>;
+defm FLAT_ATOMIC_FCMPSWAP : FLAT_ATOMIC <
+  0x3e, "flat_atomic_fcmpswap", VGPR_32, VReg_64
+>;
+defm FLAT_ATOMIC_FMIN : FLAT_ATOMIC <0x3f, "flat_atomic_fmin", VGPR_32>;
+defm FLAT_ATOMIC_FMAX : FLAT_ATOMIC <0x40, "flat_atomic_fmax", VGPR_32>;
+defm FLAT_ATOMIC_SWAP_X2 : FLAT_ATOMIC <0x50, "flat_atomic_swap_x2", VReg_64>;
+defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_ATOMIC <
+  0x51, "flat_atomic_cmpswap_x2", VReg_64, VReg_128
+>;
+defm FLAT_ATOMIC_ADD_X2 : FLAT_ATOMIC <0x52, "flat_atomic_add_x2", VReg_64>;
+defm FLAT_ATOMIC_SUB_X2 : FLAT_ATOMIC <0x53, "flat_atomic_sub_x2", VReg_64>;
+defm FLAT_ATOMIC_RSUB_X2 : FLAT_ATOMIC <0x54, "flat_atomic_rsub_x2", VReg_64>;
+defm FLAT_ATOMIC_SMIN_X2 : FLAT_ATOMIC <0x55, "flat_atomic_smin_x2", VReg_64>;
+defm FLAT_ATOMIC_UMIN_X2 : FLAT_ATOMIC <0x56, "flat_atomic_umin_x2", VReg_64>;
+defm FLAT_ATOMIC_SMAX_X2 : FLAT_ATOMIC <0x57, "flat_atomic_smax_x2", VReg_64>;
+defm FLAT_ATOMIC_UMAX_X2 : FLAT_ATOMIC <0x58, "flat_atomic_umax_x2", VReg_64>;
+defm FLAT_ATOMIC_AND_X2 : FLAT_ATOMIC <0x59, "flat_atomic_and_x2", VReg_64>;
+defm FLAT_ATOMIC_OR_X2 : FLAT_ATOMIC <0x5a, "flat_atomic_or_x2", VReg_64>;
+defm FLAT_ATOMIC_XOR_X2 : FLAT_ATOMIC <0x5b, "flat_atomic_xor_x2", VReg_64>;
+defm FLAT_ATOMIC_INC_X2 : FLAT_ATOMIC <0x5c, "flat_atomic_inc_x2", VReg_64>;
+defm FLAT_ATOMIC_DEC_X2 : FLAT_ATOMIC <0x5d, "flat_atomic_dec_x2", VReg_64>;
+defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_ATOMIC <
+  0x5e, "flat_atomic_fcmpswap_x2", VReg_64, VReg_128
+>;
+defm FLAT_ATOMIC_FMIN_X2 : FLAT_ATOMIC <0x5f, "flat_atomic_fmin_x2", VReg_64>;
+defm FLAT_ATOMIC_FMAX_X2 : FLAT_ATOMIC <0x60, "flat_atomic_fmax_x2", VReg_64>;
+
+} // End SubtargetPredicate = isCIVI
+
+//===----------------------------------------------------------------------===//
+// Flat Patterns
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasFlatAddressSpace] in {
+
+class FLATLoad_Pattern <FLAT Instr_ADDR64, ValueType vt,
+                             PatFrag flat_ld> :
+  Pat <(vt (flat_ld i64:$ptr)),
+       (Instr_ADDR64 $ptr, 0, 0, 0)
+>;
+
+def : FLATLoad_Pattern <FLAT_LOAD_SBYTE, i32, sextloadi8_flat>;
+def : FLATLoad_Pattern <FLAT_LOAD_UBYTE, i32, az_extloadi8_flat>;
+def : FLATLoad_Pattern <FLAT_LOAD_SSHORT, i32, sextloadi16_flat>;
+def : FLATLoad_Pattern <FLAT_LOAD_USHORT, i32, az_extloadi16_flat>;
+def : FLATLoad_Pattern <FLAT_LOAD_DWORD, i32, flat_load>;
+def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, i64, flat_load>;
+def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, i64, az_extloadi32_flat>;
+def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, v2i32, flat_load>;
+def : FLATLoad_Pattern <FLAT_LOAD_DWORDX4, v4i32, flat_load>;
+
+class FLATStore_Pattern <FLAT Instr, ValueType vt, PatFrag st> :
+  Pat <(st vt:$value, i64:$ptr),
+        (Instr $value, $ptr, 0, 0, 0)
+  >;
+
+def : FLATStore_Pattern <FLAT_STORE_BYTE, i32, truncstorei8_flat>;
+def : FLATStore_Pattern <FLAT_STORE_SHORT, i32, truncstorei16_flat>;
+def : FLATStore_Pattern <FLAT_STORE_DWORD, i32, flat_store>;
+def : FLATStore_Pattern <FLAT_STORE_DWORDX2, i64, flat_store>;
+def : FLATStore_Pattern <FLAT_STORE_DWORDX2, v2i32, flat_store>;
+def : FLATStore_Pattern <FLAT_STORE_DWORDX4, v4i32, flat_store>;
+
+} // End HasFlatAddressSpace predicate
+
diff --git a/contrib/llvm/lib/Target/AMDGPU/CaymanInstructions.td b/contrib/llvm/lib/Target/AMDGPU/CaymanInstructions.td
new file mode 100644
index 0000000..ba4df82
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/CaymanInstructions.td
@@ -0,0 +1,226 @@
+//===-- CaymanInstructions.td - CM Instruction defs  -------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TableGen definitions for instructions which are available only on Cayman
+// family GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+def isCayman : Predicate<"Subtarget->hasCaymanISA()">;
+
+//===----------------------------------------------------------------------===//
+// Cayman Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isCayman] in {
+
+def MULADD_INT24_cm : R600_3OP <0x08, "MULADD_INT24",
+  [(set i32:$dst, (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2))], VecALU
+>;
+def MUL_INT24_cm : R600_2OP <0x5B, "MUL_INT24",
+  [(set i32:$dst, (AMDGPUmul_i24 i32:$src0, i32:$src1))], VecALU
+>;
+
+def : IMad24Pat<MULADD_INT24_cm>;
+
+let isVector = 1 in {
+
+def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>;
+
+def MULLO_INT_cm : MULLO_INT_Common<0x8F>;
+def MULHI_INT_cm : MULHI_INT_Common<0x90>;
+def MULLO_UINT_cm : MULLO_UINT_Common<0x91>;
+def MULHI_UINT_cm : MULHI_UINT_Common<0x92>;
+def RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>;
+def EXP_IEEE_cm : EXP_IEEE_Common<0x81>;
+def LOG_IEEE_cm : LOG_IEEE_Common<0x83>;
+def RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>;
+def RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>;
+def SIN_cm : SIN_Common<0x8D>;
+def COS_cm : COS_Common<0x8E>;
+} // End isVector = 1
+
+def : RsqPat<RECIPSQRT_IEEE_cm, f32>;
+
+def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>;
+
+defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
+defm : Expand24UBitOps<MULLO_UINT_cm, ADD_INT>;
+
+// RECIP_UINT emulation for Cayman
+// The multiplication scales from [0,1] to the unsigned integer range
+def : Pat <
+  (AMDGPUurecip i32:$src0),
+  (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg $src0)),
+                            (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1)))
+>;
+
+  def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> {
+    let ADDR = 0;
+    let POP_COUNT = 0;
+    let COUNT = 0;
+  }
+
+
+def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>;
+
+class RAT_STORE_DWORD <RegisterClass rc, ValueType vt, bits<4> mask> :
+  CF_MEM_RAT_CACHELESS <0x14, 0, mask,
+                        (ins rc:$rw_gpr, R600_TReg32_X:$index_gpr),
+                        "STORE_DWORD $rw_gpr, $index_gpr",
+                        [(global_store vt:$rw_gpr, i32:$index_gpr)]> {
+  let eop = 0; // This bit is not used on Cayman.
+}
+
+def RAT_STORE_DWORD32 : RAT_STORE_DWORD <R600_TReg32_X, i32, 0x1>;
+def RAT_STORE_DWORD64 : RAT_STORE_DWORD <R600_Reg64, v2i32, 0x3>;
+def RAT_STORE_DWORD128 : RAT_STORE_DWORD <R600_Reg128, v4i32, 0xf>;
+
+class VTX_READ_cm <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
+    : VTX_WORD0_cm, VTX_READ<name, buffer_id, outs, pattern> {
+
+  // Static fields
+  let VC_INST = 0;
+  let FETCH_TYPE = 2;
+  let FETCH_WHOLE_QUAD = 0;
+  let BUFFER_ID = buffer_id;
+  let SRC_REL = 0;
+  // XXX: We can infer this field based on the SRC_GPR.  This would allow us
+  // to store vertex addresses in any channel, not just X.
+  let SRC_SEL_X = 0;
+  let SRC_SEL_Y = 0;
+  let STRUCTURED_READ = 0;
+  let LDS_REQ = 0;
+  let COALESCED_READ = 0;
+
+  let Inst{31-0} = Word0;
+}
+
+class VTX_READ_8_cm <bits<8> buffer_id, list<dag> pattern>
+    : VTX_READ_cm <"VTX_READ_8 $dst_gpr, $src_gpr", buffer_id,
+                   (outs R600_TReg32_X:$dst_gpr), pattern> {
+
+  let DST_SEL_X = 0;
+  let DST_SEL_Y = 7;   // Masked
+  let DST_SEL_Z = 7;   // Masked
+  let DST_SEL_W = 7;   // Masked
+  let DATA_FORMAT = 1; // FMT_8
+}
+
+class VTX_READ_16_cm <bits<8> buffer_id, list<dag> pattern>
+    : VTX_READ_cm <"VTX_READ_16 $dst_gpr, $src_gpr", buffer_id,
+                   (outs R600_TReg32_X:$dst_gpr), pattern> {
+  let DST_SEL_X = 0;
+  let DST_SEL_Y = 7;   // Masked
+  let DST_SEL_Z = 7;   // Masked
+  let DST_SEL_W = 7;   // Masked
+  let DATA_FORMAT = 5; // FMT_16
+
+}
+
+class VTX_READ_32_cm <bits<8> buffer_id, list<dag> pattern>
+    : VTX_READ_cm <"VTX_READ_32 $dst_gpr, $src_gpr", buffer_id,
+                   (outs R600_TReg32_X:$dst_gpr), pattern> {
+
+  let DST_SEL_X        = 0;
+  let DST_SEL_Y        = 7;   // Masked
+  let DST_SEL_Z        = 7;   // Masked
+  let DST_SEL_W        = 7;   // Masked
+  let DATA_FORMAT      = 0xD; // COLOR_32
+
+  // This is not really necessary, but there were some GPU hangs that appeared
+  // to be caused by ALU instructions in the next instruction group that wrote
+  // to the $src_gpr registers of the VTX_READ.
+  // e.g.
+  // %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24
+  // %T2_X<def> = MOV %ZERO
+  //Adding this constraint prevents this from happening.
+  let Constraints = "$src_gpr.ptr = $dst_gpr";
+}
+
+class VTX_READ_64_cm <bits<8> buffer_id, list<dag> pattern>
+    : VTX_READ_cm <"VTX_READ_64 $dst_gpr, $src_gpr", buffer_id,
+                   (outs R600_Reg64:$dst_gpr), pattern> {
+
+  let DST_SEL_X        = 0;
+  let DST_SEL_Y        = 1;
+  let DST_SEL_Z        = 7;
+  let DST_SEL_W        = 7;
+  let DATA_FORMAT      = 0x1D; // COLOR_32_32
+}
+
+class VTX_READ_128_cm <bits<8> buffer_id, list<dag> pattern>
+    : VTX_READ_cm <"VTX_READ_128 $dst_gpr.XYZW, $src_gpr", buffer_id,
+                   (outs R600_Reg128:$dst_gpr), pattern> {
+
+  let DST_SEL_X        =  0;
+  let DST_SEL_Y        =  1;
+  let DST_SEL_Z        =  2;
+  let DST_SEL_W        =  3;
+  let DATA_FORMAT      =  0x22; // COLOR_32_32_32_32
+
+  // XXX: Need to force VTX_READ_128 instructions to write to the same register
+  // that holds its buffer address to avoid potential hangs.  We can't use
+  // the same constraint as VTX_READ_32_eg, because the $src_gpr.ptr and $dst
+  // registers are different sizes.
+}
+
+//===----------------------------------------------------------------------===//
+// VTX Read from parameter memory space
+//===----------------------------------------------------------------------===//
+def VTX_READ_PARAM_8_cm : VTX_READ_8_cm <0,
+  [(set i32:$dst_gpr, (load_param_exti8 ADDRVTX_READ:$src_gpr))]
+>;
+
+def VTX_READ_PARAM_16_cm : VTX_READ_16_cm <0,
+  [(set i32:$dst_gpr, (load_param_exti16 ADDRVTX_READ:$src_gpr))]
+>;
+
+def VTX_READ_PARAM_32_cm : VTX_READ_32_cm <0,
+  [(set i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
+>;
+
+def VTX_READ_PARAM_64_cm : VTX_READ_64_cm <0,
+  [(set v2i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
+>;
+
+def VTX_READ_PARAM_128_cm : VTX_READ_128_cm <0,
+  [(set v4i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
+>;
+
+//===----------------------------------------------------------------------===//
+// VTX Read from global memory space
+//===----------------------------------------------------------------------===//
+
+// 8-bit reads
+def VTX_READ_GLOBAL_8_cm : VTX_READ_8_cm <1,
+  [(set i32:$dst_gpr, (az_extloadi8_global ADDRVTX_READ:$src_gpr))]
+>;
+
+def VTX_READ_GLOBAL_16_cm : VTX_READ_16_cm <1,
+  [(set i32:$dst_gpr, (az_extloadi16_global ADDRVTX_READ:$src_gpr))]
+>;
+
+// 32-bit reads
+def VTX_READ_GLOBAL_32_cm : VTX_READ_32_cm <1,
+  [(set i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))]
+>;
+
+// 64-bit reads
+def VTX_READ_GLOBAL_64_cm : VTX_READ_64_cm <1,
+  [(set v2i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))]
+>;
+
+// 128-bit reads
+def VTX_READ_GLOBAL_128_cm : VTX_READ_128_cm <1,
+  [(set v4i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))]
+>;
+
+} // End isCayman
+
diff --git a/contrib/llvm/lib/Target/AMDGPU/EvergreenInstructions.td b/contrib/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
new file mode 100644
index 0000000..7adcd46
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
@@ -0,0 +1,670 @@
+//===-- EvergreenInstructions.td - EG Instruction defs  ----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TableGen definitions for instructions which are:
+// - Available to Evergreen and newer VLIW4/VLIW5 GPUs
+// - Available only on Evergreen family GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+def isEG : Predicate<
+  "Subtarget->getGeneration() >= AMDGPUSubtarget::EVERGREEN && "
+  "Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS && "
+  "!Subtarget->hasCaymanISA()"
+>;
+
+def isEGorCayman : Predicate<
+  "Subtarget->getGeneration() == AMDGPUSubtarget::EVERGREEN ||"
+  "Subtarget->getGeneration() ==AMDGPUSubtarget::NORTHERN_ISLANDS"
+>;
+
+//===----------------------------------------------------------------------===//
+// Evergreen / Cayman store instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isEGorCayman] in {
+
+class CF_MEM_RAT_CACHELESS <bits<6> rat_inst, bits<4> rat_id, bits<4> mask, dag ins,
+                           string name, list<dag> pattern>
+    : EG_CF_RAT <0x57, rat_inst, rat_id, mask, (outs), ins,
+                 "MEM_RAT_CACHELESS "#name, pattern>;
+
+class CF_MEM_RAT <bits<6> rat_inst, bits<4> rat_id, dag ins, string name,
+                  list<dag> pattern>
+    : EG_CF_RAT <0x56, rat_inst, rat_id, 0xf /* mask */, (outs), ins,
+                 "MEM_RAT "#name, pattern>;
+
+def RAT_MSKOR : CF_MEM_RAT <0x11, 0,
+  (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr),
+  "MSKOR $rw_gpr.XW, $index_gpr",
+  [(mskor_global v4i32:$rw_gpr, i32:$index_gpr)]
+> {
+  let eop = 0;
+}
+
+} // End let Predicates = [isEGorCayman]
+
+//===----------------------------------------------------------------------===//
+// Evergreen Only instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isEG] in {
+
+def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>;
+defm DIV_eg : DIV_Common<RECIP_IEEE_eg>;
+
+def MULLO_INT_eg : MULLO_INT_Common<0x8F>;
+def MULHI_INT_eg : MULHI_INT_Common<0x90>;
+def MULLO_UINT_eg : MULLO_UINT_Common<0x91>;
+def MULHI_UINT_eg : MULHI_UINT_Common<0x92>;
+def RECIP_UINT_eg : RECIP_UINT_Common<0x94>;
+def RECIPSQRT_CLAMPED_eg : RECIPSQRT_CLAMPED_Common<0x87>;
+def EXP_IEEE_eg : EXP_IEEE_Common<0x81>;
+def LOG_IEEE_eg : LOG_IEEE_Common<0x83>;
+def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>;
+def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
+def : RsqPat<RECIPSQRT_IEEE_eg, f32>;
+def SIN_eg : SIN_Common<0x8D>;
+def COS_eg : COS_Common<0x8E>;
+
+def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL>;
+def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>;
+
+defm : Expand24IBitOps<MULLO_INT_eg, ADD_INT>;
+
+//===----------------------------------------------------------------------===//
+// Memory read/write instructions
+//===----------------------------------------------------------------------===//
+
+let usesCustomInserter = 1 in {
+
+// 32-bit store
+def RAT_WRITE_CACHELESS_32_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0x1,
+  (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
+  "STORE_RAW $rw_gpr, $index_gpr, $eop",
+  [(global_store i32:$rw_gpr, i32:$index_gpr)]
+>;
+
+// 64-bit store
+def RAT_WRITE_CACHELESS_64_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0x3,
+  (ins R600_Reg64:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
+  "STORE_RAW $rw_gpr.XY, $index_gpr, $eop",
+  [(global_store v2i32:$rw_gpr, i32:$index_gpr)]
+>;
+
+//128-bit store
+def RAT_WRITE_CACHELESS_128_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0xf,
+  (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
+  "STORE_RAW $rw_gpr.XYZW, $index_gpr, $eop",
+  [(global_store v4i32:$rw_gpr, i32:$index_gpr)]
+>;
+
+} // End usesCustomInserter = 1
+
+class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
+    : VTX_WORD0_eg, VTX_READ<name, buffer_id, outs, pattern> {
+
+  // Static fields
+  let VC_INST = 0;
+  let FETCH_TYPE = 2;
+  let FETCH_WHOLE_QUAD = 0;
+  let BUFFER_ID = buffer_id;
+  let SRC_REL = 0;
+  // XXX: We can infer this field based on the SRC_GPR.  This would allow us
+  // to store vertex addresses in any channel, not just X.
+  let SRC_SEL_X = 0;
+
+  let Inst{31-0} = Word0;
+}
+
+class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern>
+    : VTX_READ_eg <"VTX_READ_8 $dst_gpr, $src_gpr", buffer_id,
+                   (outs R600_TReg32_X:$dst_gpr), pattern> {
+
+  let MEGA_FETCH_COUNT = 1;
+  let DST_SEL_X = 0;
+  let DST_SEL_Y = 7;   // Masked
+  let DST_SEL_Z = 7;   // Masked
+  let DST_SEL_W = 7;   // Masked
+  let DATA_FORMAT = 1; // FMT_8
+}
+
+class VTX_READ_16_eg <bits<8> buffer_id, list<dag> pattern>
+    : VTX_READ_eg <"VTX_READ_16 $dst_gpr, $src_gpr", buffer_id,
+                   (outs R600_TReg32_X:$dst_gpr), pattern> {
+  let MEGA_FETCH_COUNT = 2;
+  let DST_SEL_X = 0;
+  let DST_SEL_Y = 7;   // Masked
+  let DST_SEL_Z = 7;   // Masked
+  let DST_SEL_W = 7;   // Masked
+  let DATA_FORMAT = 5; // FMT_16
+
+}
+
+class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern>
+    : VTX_READ_eg <"VTX_READ_32 $dst_gpr, $src_gpr", buffer_id,
+                   (outs R600_TReg32_X:$dst_gpr), pattern> {
+
+  let MEGA_FETCH_COUNT = 4;
+  let DST_SEL_X        = 0;
+  let DST_SEL_Y        = 7;   // Masked
+  let DST_SEL_Z        = 7;   // Masked
+  let DST_SEL_W        = 7;   // Masked
+  let DATA_FORMAT      = 0xD; // COLOR_32
+
+  // This is not really necessary, but there were some GPU hangs that appeared
+  // to be caused by ALU instructions in the next instruction group that wrote
+  // to the $src_gpr registers of the VTX_READ.
+  // e.g.
+  // %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24
+  // %T2_X<def> = MOV %ZERO
+  //Adding this constraint prevents this from happening.
+  let Constraints = "$src_gpr.ptr = $dst_gpr";
+}
+
+class VTX_READ_64_eg <bits<8> buffer_id, list<dag> pattern>
+    : VTX_READ_eg <"VTX_READ_64 $dst_gpr.XY, $src_gpr", buffer_id,
+                   (outs R600_Reg64:$dst_gpr), pattern> {
+
+  let MEGA_FETCH_COUNT = 8;
+  let DST_SEL_X        = 0;
+  let DST_SEL_Y        = 1;
+  let DST_SEL_Z        = 7;
+  let DST_SEL_W        = 7;
+  let DATA_FORMAT      = 0x1D; // COLOR_32_32
+}
+
+class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern>
+    : VTX_READ_eg <"VTX_READ_128 $dst_gpr.XYZW, $src_gpr", buffer_id,
+                   (outs R600_Reg128:$dst_gpr), pattern> {
+
+  let MEGA_FETCH_COUNT = 16;
+  let DST_SEL_X        =  0;
+  let DST_SEL_Y        =  1;
+  let DST_SEL_Z        =  2;
+  let DST_SEL_W        =  3;
+  let DATA_FORMAT      =  0x22; // COLOR_32_32_32_32
+
+  // XXX: Need to force VTX_READ_128 instructions to write to the same register
+  // that holds its buffer address to avoid potential hangs.  We can't use
+  // the same constraint as VTX_READ_32_eg, because the $src_gpr.ptr and $dst
+  // registers are different sizes.
+}
+
+//===----------------------------------------------------------------------===//
+// VTX Read from parameter memory space
+//===----------------------------------------------------------------------===//
+
+def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0,
+  [(set i32:$dst_gpr, (load_param_exti8 ADDRVTX_READ:$src_gpr))]
+>;
+
+def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0,
+  [(set i32:$dst_gpr, (load_param_exti16 ADDRVTX_READ:$src_gpr))]
+>;
+
+def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0,
+  [(set i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
+>;
+
+def VTX_READ_PARAM_64_eg : VTX_READ_64_eg <0,
+  [(set v2i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
+>;
+
+def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0,
+  [(set v4i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
+>;
+
+//===----------------------------------------------------------------------===//
+// VTX Read from global memory space
+//===----------------------------------------------------------------------===//
+
+// 8-bit reads
+def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1,
+  [(set i32:$dst_gpr, (az_extloadi8_global ADDRVTX_READ:$src_gpr))]
+>;
+
+def VTX_READ_GLOBAL_16_eg : VTX_READ_16_eg <1,
+  [(set i32:$dst_gpr, (az_extloadi16_global ADDRVTX_READ:$src_gpr))]
+>;
+
+// 32-bit reads
+def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1,
+  [(set i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))]
+>;
+
+// 64-bit reads
+def VTX_READ_GLOBAL_64_eg : VTX_READ_64_eg <1,
+  [(set v2i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))]
+>;
+
+// 128-bit reads
+def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1,
+  [(set v4i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))]
+>;
+
+} // End Predicates = [isEG]
+
+//===----------------------------------------------------------------------===//
+// Evergreen / Cayman Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isEGorCayman] in {
+
+// Should be predicated on FeatureFP64
+// def FMA_64 : R600_3OP <
+//   0xA, "FMA_64",
+//   [(set f64:$dst, (fma f64:$src0, f64:$src1, f64:$src2))]
+// >;
+
+// BFE_UINT - bit_extract, an optimization for mask and shift
+// Src0 = Input
+// Src1 = Offset
+// Src2 = Width
+//
+// bit_extract = (Input << (32 - Offset - Width)) >> (32 - Width)
+//
+// Example Usage:
+// (Offset, Width)
+//
+// (0, 8)  = (Input << 24) >> 24 = (Input &  0xff)       >> 0
+// (8, 8)  = (Input << 16) >> 24 = (Input &  0xffff)     >> 8
+// (16, 8) = (Input <<  8) >> 24 = (Input &  0xffffff)   >> 16
+// (24, 8) = (Input <<  0) >> 24 = (Input &  0xffffffff) >> 24
+def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT",
+  [(set i32:$dst, (AMDGPUbfe_u32 i32:$src0, i32:$src1, i32:$src2))],
+  VecALU
+>;
+
+def BFE_INT_eg : R600_3OP <0x5, "BFE_INT",
+  [(set i32:$dst, (AMDGPUbfe_i32 i32:$src0, i32:$src1, i32:$src2))],
+  VecALU
+>;
+
+def : BFEPattern <BFE_UINT_eg, MOV_IMM_I32>;
+
+def BFI_INT_eg : R600_3OP <0x06, "BFI_INT",
+  [(set i32:$dst, (AMDGPUbfi i32:$src0, i32:$src1, i32:$src2))],
+  VecALU
+>;
+
+def : Pat<(i32 (sext_inreg i32:$src, i1)),
+  (BFE_INT_eg i32:$src, (i32 ZERO), (i32 ONE_INT))>;
+def : Pat<(i32 (sext_inreg i32:$src, i8)),
+  (BFE_INT_eg i32:$src, (i32 ZERO), (MOV_IMM_I32 8))>;
+def : Pat<(i32 (sext_inreg i32:$src, i16)),
+  (BFE_INT_eg i32:$src, (i32 ZERO), (MOV_IMM_I32 16))>;
+
+defm : BFIPatterns <BFI_INT_eg, MOV_IMM_I32, R600_Reg64>;
+
+def BFM_INT_eg : R600_2OP <0xA0, "BFM_INT",
+  [(set i32:$dst, (AMDGPUbfm i32:$src0, i32:$src1))],
+  VecALU
+>;
+
+def MULADD_UINT24_eg : R600_3OP <0x10, "MULADD_UINT24",
+  [(set i32:$dst, (AMDGPUmad_u24 i32:$src0, i32:$src1, i32:$src2))], VecALU
+>;
+
+def : UMad24Pat<MULADD_UINT24_eg>;
+
+def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", [], VecALU>;
+def : ROTRPattern <BIT_ALIGN_INT_eg>;
+def MULADD_eg : MULADD_Common<0x14>;
+def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>;
+def FMA_eg : FMA_Common<0x7>;
+def ASHR_eg : ASHR_Common<0x15>;
+def LSHR_eg : LSHR_Common<0x16>;
+def LSHL_eg : LSHL_Common<0x17>;
+def CNDE_eg : CNDE_Common<0x19>;
+def CNDGT_eg : CNDGT_Common<0x1A>;
+def CNDGE_eg : CNDGE_Common<0x1B>;
+def MUL_LIT_eg : MUL_LIT_Common<0x1F>;
+def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>;
+def MUL_UINT24_eg : R600_2OP <0xB5, "MUL_UINT24",
+  [(set i32:$dst, (AMDGPUmul_u24 i32:$src0, i32:$src1))], VecALU
+>;
+def DOT4_eg : DOT4_Common<0xBE>;
+defm CUBE_eg : CUBE_Common<0xC0>;
+
+def BCNT_INT : R600_1OP_Helper <0xAA, "BCNT_INT", ctpop, VecALU>;
+
+def ADDC_UINT : R600_2OP_Helper <0x52, "ADDC_UINT", AMDGPUcarry>;
+def SUBB_UINT : R600_2OP_Helper <0x53, "SUBB_UINT", AMDGPUborrow>;
+
+def FFBH_UINT : R600_1OP_Helper <0xAB, "FFBH_UINT", ctlz_zero_undef, VecALU>;
+def FFBL_INT : R600_1OP_Helper <0xAC, "FFBL_INT", cttz_zero_undef, VecALU>;
+
+let hasSideEffects = 1 in {
+  def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", [], VecALU>;
+}
+
+def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>;
+
+def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> {
+  let Pattern = [];
+  let Itinerary = AnyALU;
+}
+
+def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>;
+
+def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> {
+  let Pattern = [];
+}
+
+def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>;
+
+def GROUP_BARRIER : InstR600 <
+    (outs), (ins), "  GROUP_BARRIER", [(int_AMDGPU_barrier_local), (int_AMDGPU_barrier_global)], AnyALU>,
+    R600ALU_Word0,
+    R600ALU_Word1_OP2 <0x54> {
+
+  let dst = 0;
+  let dst_rel = 0;
+  let src0 = 0;
+  let src0_rel = 0;
+  let src0_neg = 0;
+  let src0_abs = 0;
+  let src1 = 0;
+  let src1_rel = 0;
+  let src1_neg = 0;
+  let src1_abs = 0;
+  let write = 0;
+  let omod = 0;
+  let clamp = 0;
+  let last = 1;
+  let bank_swizzle = 0;
+  let pred_sel = 0;
+  let update_exec_mask = 0;
+  let update_pred = 0;
+
+  let Inst{31-0}  = Word0;
+  let Inst{63-32} = Word1;
+
+  let ALUInst = 1;
+}
+
+def : Pat <
+	(int_AMDGPU_barrier_global),
+	(GROUP_BARRIER)
+>;
+
+//===----------------------------------------------------------------------===//
+// LDS Instructions
+//===----------------------------------------------------------------------===//
+class R600_LDS  <bits<6> op, dag outs, dag ins, string asm,
+                 list<dag> pattern = []> :
+
+    InstR600 <outs, ins, asm, pattern, XALU>,
+    R600_ALU_LDS_Word0,
+    R600LDS_Word1 {
+
+  bits<6>  offset = 0;
+  let lds_op = op;
+
+  let Word1{27} = offset{0};
+  let Word1{12} = offset{1};
+  let Word1{28} = offset{2};
+  let Word1{31} = offset{3};
+  let Word0{12} = offset{4};
+  let Word0{25} = offset{5};
+
+
+  let Inst{31-0}  = Word0;
+  let Inst{63-32} = Word1;
+
+  let ALUInst = 1;
+  let HasNativeOperands = 1;
+  let UseNamedOperandTable = 1;
+}
+
+class R600_LDS_1A <bits<6> lds_op, string name, list<dag> pattern> : R600_LDS <
+  lds_op,
+  (outs R600_Reg32:$dst),
+  (ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel,
+       LAST:$last, R600_Pred:$pred_sel,
+       BANK_SWIZZLE:$bank_swizzle),
+  "  "#name#" $last OQAP, $src0$src0_rel $pred_sel",
+  pattern
+  > {
+
+  let src1 = 0;
+  let src1_rel = 0;
+  let src2 = 0;
+  let src2_rel = 0;
+
+  let usesCustomInserter = 1;
+  let LDS_1A = 1;
+  let DisableEncoding = "$dst";
+}
+
+class R600_LDS_1A1D <bits<6> lds_op, dag outs, string name, list<dag> pattern,
+                     string dst =""> :
+    R600_LDS <
+  lds_op, outs,
+  (ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel,
+       R600_Reg32:$src1, REL:$src1_rel, SEL:$src1_sel,
+       LAST:$last, R600_Pred:$pred_sel,
+       BANK_SWIZZLE:$bank_swizzle),
+  "  "#name#" $last "#dst#"$src0$src0_rel, $src1$src1_rel, $pred_sel",
+  pattern
+  > {
+
+  field string BaseOp;
+
+  let src2 = 0;
+  let src2_rel = 0;
+  let LDS_1A1D = 1;
+}
+
+class R600_LDS_1A1D_NORET <bits<6> lds_op, string name, list<dag> pattern> :
+    R600_LDS_1A1D <lds_op, (outs), name, pattern> {
+  let BaseOp = name;
+}
+
+class R600_LDS_1A1D_RET <bits<6> lds_op, string name, list<dag> pattern> :
+    R600_LDS_1A1D <lds_op,  (outs R600_Reg32:$dst), name##"_RET", pattern, "OQAP, "> {
+
+  let BaseOp = name;
+  let usesCustomInserter = 1;
+  let DisableEncoding = "$dst";
+}
+
+class R600_LDS_1A2D <bits<6> lds_op, dag outs, string name, list<dag> pattern,
+                     string dst =""> :
+    R600_LDS <
+  lds_op, outs,
+  (ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel,
+       R600_Reg32:$src1, REL:$src1_rel, SEL:$src1_sel,
+       R600_Reg32:$src2, REL:$src2_rel, SEL:$src2_sel,
+       LAST:$last, R600_Pred:$pred_sel, BANK_SWIZZLE:$bank_swizzle),
+  "  "#name# "$last "#dst#"$src0$src0_rel, $src1$src1_rel, $src2$src2_rel, $pred_sel",
+  pattern> {
+
+  field string BaseOp;
+
+  let LDS_1A1D = 0;
+  let LDS_1A2D = 1;
+}
+
+class R600_LDS_1A2D_NORET <bits<6> lds_op, string name, list<dag> pattern> :
+    R600_LDS_1A2D <lds_op, (outs), name, pattern> {
+  let BaseOp = name;
+}
+
+class R600_LDS_1A2D_RET <bits<6> lds_op, string name, list<dag> pattern> :
+    R600_LDS_1A2D <lds_op, (outs R600_Reg32:$dst), name, pattern> {
+
+  let BaseOp = name;
+  let usesCustomInserter = 1;
+  let DisableEncoding = "$dst";
+}
+
+def LDS_ADD : R600_LDS_1A1D_NORET <0x0, "LDS_ADD", [] >;
+def LDS_SUB : R600_LDS_1A1D_NORET <0x1, "LDS_SUB", [] >;
+def LDS_AND : R600_LDS_1A1D_NORET <0x9, "LDS_AND", [] >;
+def LDS_OR : R600_LDS_1A1D_NORET <0xa, "LDS_OR", [] >;
+def LDS_XOR : R600_LDS_1A1D_NORET <0xb, "LDS_XOR", [] >;
+def LDS_WRXCHG: R600_LDS_1A1D_NORET <0xd, "LDS_WRXCHG", [] >;
+def LDS_CMPST: R600_LDS_1A2D_NORET <0x10, "LDS_CMPST", [] >;
+def LDS_MIN_INT : R600_LDS_1A1D_NORET <0x5, "LDS_MIN_INT", [] >;
+def LDS_MAX_INT : R600_LDS_1A1D_NORET <0x6, "LDS_MAX_INT", [] >;
+def LDS_MIN_UINT : R600_LDS_1A1D_NORET <0x7, "LDS_MIN_UINT", [] >;
+def LDS_MAX_UINT : R600_LDS_1A1D_NORET <0x8, "LDS_MAX_UINT", [] >;
+def LDS_WRITE : R600_LDS_1A1D_NORET <0xD, "LDS_WRITE",
+  [(local_store (i32 R600_Reg32:$src1), R600_Reg32:$src0)]
+>;
+def LDS_BYTE_WRITE : R600_LDS_1A1D_NORET<0x12, "LDS_BYTE_WRITE",
+  [(truncstorei8_local i32:$src1, i32:$src0)]
+>;
+def LDS_SHORT_WRITE : R600_LDS_1A1D_NORET<0x13, "LDS_SHORT_WRITE",
+  [(truncstorei16_local i32:$src1, i32:$src0)]
+>;
+def LDS_ADD_RET : R600_LDS_1A1D_RET <0x20, "LDS_ADD",
+  [(set i32:$dst, (atomic_load_add_local i32:$src0, i32:$src1))]
+>;
+def LDS_SUB_RET : R600_LDS_1A1D_RET <0x21, "LDS_SUB",
+  [(set i32:$dst, (atomic_load_sub_local i32:$src0, i32:$src1))]
+>;
+def LDS_AND_RET : R600_LDS_1A1D_RET <0x29, "LDS_AND",
+  [(set i32:$dst, (atomic_load_and_local i32:$src0, i32:$src1))]
+>;
+def LDS_OR_RET : R600_LDS_1A1D_RET <0x2a, "LDS_OR",
+  [(set i32:$dst, (atomic_load_or_local i32:$src0, i32:$src1))]
+>;
+def LDS_XOR_RET : R600_LDS_1A1D_RET <0x2b, "LDS_XOR",
+  [(set i32:$dst, (atomic_load_xor_local i32:$src0, i32:$src1))]
+>;
+def LDS_MIN_INT_RET : R600_LDS_1A1D_RET <0x25, "LDS_MIN_INT",
+  [(set i32:$dst, (atomic_load_min_local i32:$src0, i32:$src1))]
+>;
+def LDS_MAX_INT_RET : R600_LDS_1A1D_RET <0x26, "LDS_MAX_INT",
+  [(set i32:$dst, (atomic_load_max_local i32:$src0, i32:$src1))]
+>;
+def LDS_MIN_UINT_RET : R600_LDS_1A1D_RET <0x27, "LDS_MIN_UINT",
+  [(set i32:$dst, (atomic_load_umin_local i32:$src0, i32:$src1))]
+>;
+def LDS_MAX_UINT_RET : R600_LDS_1A1D_RET <0x28, "LDS_MAX_UINT",
+  [(set i32:$dst, (atomic_load_umax_local i32:$src0, i32:$src1))]
+>;
+def LDS_WRXCHG_RET : R600_LDS_1A1D_RET <0x2d, "LDS_WRXCHG",
+  [(set i32:$dst, (atomic_swap_local i32:$src0, i32:$src1))]
+>;
+def LDS_CMPST_RET : R600_LDS_1A2D_RET <0x30, "LDS_CMPST",
+  [(set i32:$dst, (atomic_cmp_swap_32_local i32:$src0, i32:$src1, i32:$src2))]
+>;
+def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET",
+  [(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))]
+>;
+def LDS_BYTE_READ_RET : R600_LDS_1A <0x36, "LDS_BYTE_READ_RET",
+  [(set i32:$dst, (sextloadi8_local i32:$src0))]
+>;
+def LDS_UBYTE_READ_RET : R600_LDS_1A <0x37, "LDS_UBYTE_READ_RET",
+  [(set i32:$dst, (az_extloadi8_local i32:$src0))]
+>;
+def LDS_SHORT_READ_RET : R600_LDS_1A <0x38, "LDS_SHORT_READ_RET",
+  [(set i32:$dst, (sextloadi16_local i32:$src0))]
+>;
+def LDS_USHORT_READ_RET : R600_LDS_1A <0x39, "LDS_USHORT_READ_RET",
+  [(set i32:$dst, (az_extloadi16_local i32:$src0))]
+>;
+
+// TRUNC is used for the FLT_TO_INT instructions to work around a
+// perceived problem where the rounding modes are applied differently
+// depending on the instruction and the slot they are in.
+// See:
+// https://bugs.freedesktop.org/show_bug.cgi?id=50232
+// Mesa commit: a1a0974401c467cb86ef818f22df67c21774a38c
+//
+// XXX: Lowering SELECT_CC will sometimes generate fp_to_[su]int nodes,
+// which do not need to be truncated since the fp values are 0.0f or 1.0f.
+// We should look into handling these cases separately.
+def : Pat<(fp_to_sint f32:$src0), (FLT_TO_INT_eg (TRUNC $src0))>;
+
+def : Pat<(fp_to_uint f32:$src0), (FLT_TO_UINT_eg (TRUNC $src0))>;
+
+// SHA-256 Patterns
+def : SHA256MaPattern <BFI_INT_eg, XOR_INT>;
+
+def EG_ExportSwz : ExportSwzInst {
+  let Word1{19-16} = 0; // BURST_COUNT
+  let Word1{20} = 0; // VALID_PIXEL_MODE
+  let Word1{21} = eop;
+  let Word1{29-22} = inst;
+  let Word1{30} = 0; // MARK
+  let Word1{31} = 1; // BARRIER
+}
+defm : ExportPattern<EG_ExportSwz, 83>;
+
+def EG_ExportBuf : ExportBufInst {
+  let Word1{19-16} = 0; // BURST_COUNT
+  let Word1{20} = 0; // VALID_PIXEL_MODE
+  let Word1{21} = eop;
+  let Word1{29-22} = inst;
+  let Word1{30} = 0; // MARK
+  let Word1{31} = 1; // BARRIER
+}
+defm : SteamOutputExportPattern<EG_ExportBuf, 0x40, 0x41, 0x42, 0x43>;
+
+def CF_TC_EG : CF_CLAUSE_EG<1, (ins i32imm:$ADDR, i32imm:$COUNT),
+  "TEX $COUNT @$ADDR"> {
+  let POP_COUNT = 0;
+}
+def CF_VC_EG : CF_CLAUSE_EG<2, (ins i32imm:$ADDR, i32imm:$COUNT),
+  "VTX $COUNT @$ADDR"> {
+  let POP_COUNT = 0;
+}
+def WHILE_LOOP_EG : CF_CLAUSE_EG<6, (ins i32imm:$ADDR),
+  "LOOP_START_DX10 @$ADDR"> {
+  let POP_COUNT = 0;
+  let COUNT = 0;
+}
+def END_LOOP_EG : CF_CLAUSE_EG<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
+  let POP_COUNT = 0;
+  let COUNT = 0;
+}
+def LOOP_BREAK_EG : CF_CLAUSE_EG<9, (ins i32imm:$ADDR),
+  "LOOP_BREAK @$ADDR"> {
+  let POP_COUNT = 0;
+  let COUNT = 0;
+}
+def CF_CONTINUE_EG : CF_CLAUSE_EG<8, (ins i32imm:$ADDR),
+  "CONTINUE @$ADDR"> {
+  let POP_COUNT = 0;
+  let COUNT = 0;
+}
+def CF_JUMP_EG : CF_CLAUSE_EG<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+  "JUMP @$ADDR POP:$POP_COUNT"> {
+  let COUNT = 0;
+}
+def CF_PUSH_EG : CF_CLAUSE_EG<11, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+                              "PUSH @$ADDR POP:$POP_COUNT"> {
+  let COUNT = 0;
+}
+def CF_ELSE_EG : CF_CLAUSE_EG<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+  "ELSE @$ADDR POP:$POP_COUNT"> {
+  let COUNT = 0;
+}
+def CF_CALL_FS_EG : CF_CLAUSE_EG<19, (ins), "CALL_FS"> {
+  let ADDR = 0;
+  let COUNT = 0;
+  let POP_COUNT = 0;
+}
+def POP_EG : CF_CLAUSE_EG<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+  "POP @$ADDR POP:$POP_COUNT"> {
+  let COUNT = 0;
+}
+def CF_END_EG :  CF_CLAUSE_EG<0, (ins), "CF_END"> {
+  let COUNT = 0;
+  let POP_COUNT = 0;
+  let ADDR = 0;
+  let END_OF_PROGRAM = 1;
+}
+
+} // End Predicates = [isEGorCayman]
diff --git a/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
new file mode 100644
index 0000000..e811d5c
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -0,0 +1,642 @@
+//===-- AMDGPUInstPrinter.cpp - AMDGPU MC Inst -> ASM ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// \file
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUInstPrinter.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIDefines.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/MathExtras.h"
+
+using namespace llvm;
+
+void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
+                                  StringRef Annot, const MCSubtargetInfo &STI) {
+  OS.flush();
+  printInstruction(MI, OS);
+
+  printAnnotation(OS, Annot);
+}
+
+void AMDGPUInstPrinter::printU8ImmOperand(const MCInst *MI, unsigned OpNo,
+                                           raw_ostream &O) {
+  O << formatHex(MI->getOperand(OpNo).getImm() & 0xff);
+}
+
+void AMDGPUInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo,
+                                           raw_ostream &O) {
+  O << formatHex(MI->getOperand(OpNo).getImm() & 0xffff);
+}
+
+void AMDGPUInstPrinter::printU32ImmOperand(const MCInst *MI, unsigned OpNo,
+                                           raw_ostream &O) {
+  O << formatHex(MI->getOperand(OpNo).getImm() & 0xffffffff);
+}
+
+void AMDGPUInstPrinter::printU8ImmDecOperand(const MCInst *MI, unsigned OpNo,
+                                             raw_ostream &O) {
+  O << formatDec(MI->getOperand(OpNo).getImm() & 0xff);
+}
+
+void AMDGPUInstPrinter::printU16ImmDecOperand(const MCInst *MI, unsigned OpNo,
+                                              raw_ostream &O) {
+  O << formatDec(MI->getOperand(OpNo).getImm() & 0xffff);
+}
+
+void AMDGPUInstPrinter::printOffen(const MCInst *MI, unsigned OpNo,
+                                   raw_ostream &O) {
+  if (MI->getOperand(OpNo).getImm())
+    O << " offen";
+}
+
+void AMDGPUInstPrinter::printIdxen(const MCInst *MI, unsigned OpNo,
+                                   raw_ostream &O) {
+  if (MI->getOperand(OpNo).getImm())
+    O << " idxen";
+}
+
+void AMDGPUInstPrinter::printAddr64(const MCInst *MI, unsigned OpNo,
+                                    raw_ostream &O) {
+  if (MI->getOperand(OpNo).getImm())
+    O << " addr64";
+}
+
+void AMDGPUInstPrinter::printMBUFOffset(const MCInst *MI, unsigned OpNo,
+                                        raw_ostream &O) {
+  if (MI->getOperand(OpNo).getImm()) {
+    O << " offset:";
+    printU16ImmDecOperand(MI, OpNo, O);
+  }
+}
+
+void AMDGPUInstPrinter::printDSOffset(const MCInst *MI, unsigned OpNo,
+                                      raw_ostream &O) {
+  uint16_t Imm = MI->getOperand(OpNo).getImm();
+  if (Imm != 0) {
+    O << " offset:";
+    printU16ImmDecOperand(MI, OpNo, O);
+  }
+}
+
+void AMDGPUInstPrinter::printDSOffset0(const MCInst *MI, unsigned OpNo,
+                                        raw_ostream &O) {
+  if (MI->getOperand(OpNo).getImm()) {
+    O << " offset0:";
+    printU8ImmDecOperand(MI, OpNo, O);
+  }
+}
+
+void AMDGPUInstPrinter::printDSOffset1(const MCInst *MI, unsigned OpNo,
+                                        raw_ostream &O) {
+  if (MI->getOperand(OpNo).getImm()) {
+    O << " offset1:";
+    printU8ImmDecOperand(MI, OpNo, O);
+  }
+}
+
+void AMDGPUInstPrinter::printGDS(const MCInst *MI, unsigned OpNo,
+                                 raw_ostream &O) {
+  if (MI->getOperand(OpNo).getImm())
+    O << " gds";
+}
+
+void AMDGPUInstPrinter::printGLC(const MCInst *MI, unsigned OpNo,
+                                 raw_ostream &O) {
+  if (MI->getOperand(OpNo).getImm())
+    O << " glc";
+}
+
+void AMDGPUInstPrinter::printSLC(const MCInst *MI, unsigned OpNo,
+                                 raw_ostream &O) {
+  if (MI->getOperand(OpNo).getImm())
+    O << " slc";
+}
+
+void AMDGPUInstPrinter::printTFE(const MCInst *MI, unsigned OpNo,
+                                 raw_ostream &O) {
+  if (MI->getOperand(OpNo).getImm())
+    O << " tfe";
+}
+
+void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O,
+                                        const MCRegisterInfo &MRI) {
+  switch (reg) {
+  case AMDGPU::VCC:
+    O << "vcc";
+    return;
+  case AMDGPU::SCC:
+    O << "scc";
+    return;
+  case AMDGPU::EXEC:
+    O << "exec";
+    return;
+  case AMDGPU::M0:
+    O << "m0";
+    return;
+  case AMDGPU::FLAT_SCR:
+    O << "flat_scratch";
+    return;
+  case AMDGPU::VCC_LO:
+    O << "vcc_lo";
+    return;
+  case AMDGPU::VCC_HI:
+    O << "vcc_hi";
+    return;
+  case AMDGPU::EXEC_LO:
+    O << "exec_lo";
+    return;
+  case AMDGPU::EXEC_HI:
+    O << "exec_hi";
+    return;
+  case AMDGPU::FLAT_SCR_LO:
+    O << "flat_scratch_lo";
+    return;
+  case AMDGPU::FLAT_SCR_HI:
+    O << "flat_scratch_hi";
+    return;
+  default:
+    break;
+  }
+
+  char Type;
+  unsigned NumRegs;
+
+  if (MRI.getRegClass(AMDGPU::VGPR_32RegClassID).contains(reg)) {
+    Type = 'v';
+    NumRegs = 1;
+  } else  if (MRI.getRegClass(AMDGPU::SGPR_32RegClassID).contains(reg)) {
+    Type = 's';
+    NumRegs = 1;
+  } else if (MRI.getRegClass(AMDGPU::VReg_64RegClassID).contains(reg)) {
+    Type = 'v';
+    NumRegs = 2;
+  } else  if (MRI.getRegClass(AMDGPU::SReg_64RegClassID).contains(reg)) {
+    Type = 's';
+    NumRegs = 2;
+  } else if (MRI.getRegClass(AMDGPU::VReg_128RegClassID).contains(reg)) {
+    Type = 'v';
+    NumRegs = 4;
+  } else  if (MRI.getRegClass(AMDGPU::SReg_128RegClassID).contains(reg)) {
+    Type = 's';
+    NumRegs = 4;
+  } else if (MRI.getRegClass(AMDGPU::VReg_96RegClassID).contains(reg)) {
+    Type = 'v';
+    NumRegs = 3;
+  } else if (MRI.getRegClass(AMDGPU::VReg_256RegClassID).contains(reg)) {
+    Type = 'v';
+    NumRegs = 8;
+  } else if (MRI.getRegClass(AMDGPU::SReg_256RegClassID).contains(reg)) {
+    Type = 's';
+    NumRegs = 8;
+  } else if (MRI.getRegClass(AMDGPU::VReg_512RegClassID).contains(reg)) {
+    Type = 'v';
+    NumRegs = 16;
+  } else if (MRI.getRegClass(AMDGPU::SReg_512RegClassID).contains(reg)) {
+    Type = 's';
+    NumRegs = 16;
+  } else {
+    O << getRegisterName(reg);
+    return;
+  }
+
+  // The low 8 bits of the encoding value is the register index, for both VGPRs
+  // and SGPRs.
+  unsigned RegIdx = MRI.getEncodingValue(reg) & ((1 << 8) - 1);
+  if (NumRegs == 1) {
+    O << Type << RegIdx;
+    return;
+  }
+
+  O << Type << '[' << RegIdx << ':' << (RegIdx + NumRegs - 1) << ']';
+}
+
+void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
+                                    raw_ostream &O) {
+  if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::VOP3)
+    O << "_e64 ";
+  else
+    O << "_e32 ";
+
+  printOperand(MI, OpNo, O);
+}
+
+void AMDGPUInstPrinter::printImmediate32(uint32_t Imm, raw_ostream &O) {
+  int32_t SImm = static_cast<int32_t>(Imm);
+  if (SImm >= -16 && SImm <= 64) {
+    O << SImm;
+    return;
+  }
+
+  if (Imm == FloatToBits(0.0f))
+    O << "0.0";
+  else if (Imm == FloatToBits(1.0f))
+    O << "1.0";
+  else if (Imm == FloatToBits(-1.0f))
+    O << "-1.0";
+  else if (Imm == FloatToBits(0.5f))
+    O << "0.5";
+  else if (Imm == FloatToBits(-0.5f))
+    O << "-0.5";
+  else if (Imm == FloatToBits(2.0f))
+    O << "2.0";
+  else if (Imm == FloatToBits(-2.0f))
+    O << "-2.0";
+  else if (Imm == FloatToBits(4.0f))
+    O << "4.0";
+  else if (Imm == FloatToBits(-4.0f))
+    O << "-4.0";
+  else
+    O << formatHex(static_cast<uint64_t>(Imm));
+}
+
+void AMDGPUInstPrinter::printImmediate64(uint64_t Imm, raw_ostream &O) {
+  int64_t SImm = static_cast<int64_t>(Imm);
+  if (SImm >= -16 && SImm <= 64) {
+    O << SImm;
+    return;
+  }
+
+  if (Imm == DoubleToBits(0.0))
+    O << "0.0";
+  else if (Imm == DoubleToBits(1.0))
+    O << "1.0";
+  else if (Imm == DoubleToBits(-1.0))
+    O << "-1.0";
+  else if (Imm == DoubleToBits(0.5))
+    O << "0.5";
+  else if (Imm == DoubleToBits(-0.5))
+    O << "-0.5";
+  else if (Imm == DoubleToBits(2.0))
+    O << "2.0";
+  else if (Imm == DoubleToBits(-2.0))
+    O << "-2.0";
+  else if (Imm == DoubleToBits(4.0))
+    O << "4.0";
+  else if (Imm == DoubleToBits(-4.0))
+    O << "-4.0";
+  else
+    llvm_unreachable("64-bit literal constants not supported");
+}
+
+void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                     raw_ostream &O) {
+
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    switch (Op.getReg()) {
+    // This is the default predicate state, so we don't need to print it.
+    case AMDGPU::PRED_SEL_OFF:
+      break;
+
+    default:
+      printRegOperand(Op.getReg(), O, MRI);
+      break;
+    }
+  } else if (Op.isImm()) {
+    const MCInstrDesc &Desc = MII.get(MI->getOpcode());
+    int RCID = Desc.OpInfo[OpNo].RegClass;
+    if (RCID != -1) {
+      const MCRegisterClass &ImmRC = MRI.getRegClass(RCID);
+      if (ImmRC.getSize() == 4)
+        printImmediate32(Op.getImm(), O);
+      else if (ImmRC.getSize() == 8)
+        printImmediate64(Op.getImm(), O);
+      else
+        llvm_unreachable("Invalid register class size");
+    } else if (Desc.OpInfo[OpNo].OperandType == MCOI::OPERAND_IMMEDIATE) {
+      printImmediate32(Op.getImm(), O);
+    } else {
+      // We hit this for the immediate instruction bits that don't yet have a
+      // custom printer.
+      // TODO: Eventually this should be unnecessary.
+      O << formatDec(Op.getImm());
+    }
+  } else if (Op.isFPImm()) {
+    // We special case 0.0 because otherwise it will be printed as an integer.
+    if (Op.getFPImm() == 0.0)
+      O << "0.0";
+    else {
+      const MCInstrDesc &Desc = MII.get(MI->getOpcode());
+      const MCRegisterClass &ImmRC = MRI.getRegClass(Desc.OpInfo[OpNo].RegClass);
+
+      if (ImmRC.getSize() == 4)
+        printImmediate32(FloatToBits(Op.getFPImm()), O);
+      else if (ImmRC.getSize() == 8)
+        printImmediate64(DoubleToBits(Op.getFPImm()), O);
+      else
+        llvm_unreachable("Invalid register class size");
+    }
+  } else if (Op.isExpr()) {
+    const MCExpr *Exp = Op.getExpr();
+    Exp->print(O, &MAI);
+  } else {
+    llvm_unreachable("unknown operand type in printOperand");
+  }
+}
+
+void AMDGPUInstPrinter::printOperandAndMods(const MCInst *MI, unsigned OpNo,
+                                            raw_ostream &O) {
+  unsigned InputModifiers = MI->getOperand(OpNo).getImm();
+  if (InputModifiers & SISrcMods::NEG)
+    O << '-';
+  if (InputModifiers & SISrcMods::ABS)
+    O << '|';
+  printOperand(MI, OpNo + 1, O);
+  if (InputModifiers & SISrcMods::ABS)
+    O << '|';
+}
+
+void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum,
+                                        raw_ostream &O) {
+  unsigned Imm = MI->getOperand(OpNum).getImm();
+
+  if (Imm == 2) {
+    O << "P0";
+  } else if (Imm == 1) {
+    O << "P20";
+  } else if (Imm == 0) {
+    O << "P10";
+  } else {
+    llvm_unreachable("Invalid interpolation parameter slot");
+  }
+}
+
+void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
+                                        raw_ostream &O) {
+  printOperand(MI, OpNo, O);
+  O  << ", ";
+  printOperand(MI, OpNo + 1, O);
+}
+
+void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo,
+                                   raw_ostream &O, StringRef Asm,
+                                   StringRef Default) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  assert(Op.isImm());
+  if (Op.getImm() == 1) {
+    O << Asm;
+  } else {
+    O << Default;
+  }
+}
+
+void AMDGPUInstPrinter::printAbs(const MCInst *MI, unsigned OpNo,
+                                 raw_ostream &O) {
+  printIfSet(MI, OpNo, O, "|");
+}
+
+void AMDGPUInstPrinter::printClamp(const MCInst *MI, unsigned OpNo,
+                                   raw_ostream &O) {
+  printIfSet(MI, OpNo, O, "_SAT");
+}
+
+void AMDGPUInstPrinter::printClampSI(const MCInst *MI, unsigned OpNo,
+                                     raw_ostream &O) {
+  if (MI->getOperand(OpNo).getImm())
+    O << " clamp";
+}
+
+void AMDGPUInstPrinter::printOModSI(const MCInst *MI, unsigned OpNo,
+                                     raw_ostream &O) {
+  int Imm = MI->getOperand(OpNo).getImm();
+  if (Imm == SIOutMods::MUL2)
+    O << " mul:2";
+  else if (Imm == SIOutMods::MUL4)
+    O << " mul:4";
+  else if (Imm == SIOutMods::DIV2)
+    O << " div:2";
+}
+
+void AMDGPUInstPrinter::printLiteral(const MCInst *MI, unsigned OpNo,
+                                     raw_ostream &O) {
+  int32_t Imm = MI->getOperand(OpNo).getImm();
+  O << Imm << '(' << BitsToFloat(Imm) << ')';
+}
+
+void AMDGPUInstPrinter::printLast(const MCInst *MI, unsigned OpNo,
+                                  raw_ostream &O) {
+  printIfSet(MI, OpNo, O, "*", " ");
+}
+
+void AMDGPUInstPrinter::printNeg(const MCInst *MI, unsigned OpNo,
+                                 raw_ostream &O) {
+  printIfSet(MI, OpNo, O, "-");
+}
+
+void AMDGPUInstPrinter::printOMOD(const MCInst *MI, unsigned OpNo,
+                                  raw_ostream &O) {
+  switch (MI->getOperand(OpNo).getImm()) {
+  default: break;
+  case 1:
+    O << " * 2.0";
+    break;
+  case 2:
+    O << " * 4.0";
+    break;
+  case 3:
+    O << " / 2.0";
+    break;
+  }
+}
+
+void AMDGPUInstPrinter::printRel(const MCInst *MI, unsigned OpNo,
+                                 raw_ostream &O) {
+  printIfSet(MI, OpNo, O, "+");
+}
+
+void AMDGPUInstPrinter::printUpdateExecMask(const MCInst *MI, unsigned OpNo,
+                                            raw_ostream &O) {
+  printIfSet(MI, OpNo, O, "ExecMask,");
+}
+
+void AMDGPUInstPrinter::printUpdatePred(const MCInst *MI, unsigned OpNo,
+                                        raw_ostream &O) {
+  printIfSet(MI, OpNo, O, "Pred,");
+}
+
+void AMDGPUInstPrinter::printWrite(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.getImm() == 0) {
+    O << " (MASKED)";
+  }
+}
+
+void AMDGPUInstPrinter::printSel(const MCInst *MI, unsigned OpNo,
+                                  raw_ostream &O) {
+  const char * chans = "XYZW";
+  int sel = MI->getOperand(OpNo).getImm();
+
+  int chan = sel & 3;
+  sel >>= 2;
+
+  if (sel >= 512) {
+    sel -= 512;
+    int cb = sel >> 12;
+    sel &= 4095;
+    O << cb << '[' << sel << ']';
+  } else if (sel >= 448) {
+    sel -= 448;
+    O << sel;
+  } else if (sel >= 0){
+    O << sel;
+  }
+
+  if (sel >= 0)
+    O << '.' << chans[chan];
+}
+
+void AMDGPUInstPrinter::printBankSwizzle(const MCInst *MI, unsigned OpNo,
+                                         raw_ostream &O) {
+  int BankSwizzle = MI->getOperand(OpNo).getImm();
+  switch (BankSwizzle) {
+  case 1:
+    O << "BS:VEC_021/SCL_122";
+    break;
+  case 2:
+    O << "BS:VEC_120/SCL_212";
+    break;
+  case 3:
+    O << "BS:VEC_102/SCL_221";
+    break;
+  case 4:
+    O << "BS:VEC_201";
+    break;
+  case 5:
+    O << "BS:VEC_210";
+    break;
+  default:
+    break;
+  }
+  return;
+}
+
+void AMDGPUInstPrinter::printRSel(const MCInst *MI, unsigned OpNo,
+                                  raw_ostream &O) {
+  unsigned Sel = MI->getOperand(OpNo).getImm();
+  switch (Sel) {
+  case 0:
+    O << 'X';
+    break;
+  case 1:
+    O << 'Y';
+    break;
+  case 2:
+    O << 'Z';
+    break;
+  case 3:
+    O << 'W';
+    break;
+  case 4:
+    O << '0';
+    break;
+  case 5:
+    O << '1';
+    break;
+  case 7:
+    O << '_';
+    break;
+  default:
+    break;
+  }
+}
+
+void AMDGPUInstPrinter::printCT(const MCInst *MI, unsigned OpNo,
+                                  raw_ostream &O) {
+  unsigned CT = MI->getOperand(OpNo).getImm();
+  switch (CT) {
+  case 0:
+    O << 'U';
+    break;
+  case 1:
+    O << 'N';
+    break;
+  default:
+    break;
+  }
+}
+
+void AMDGPUInstPrinter::printKCache(const MCInst *MI, unsigned OpNo,
+                                    raw_ostream &O) {
+  int KCacheMode = MI->getOperand(OpNo).getImm();
+  if (KCacheMode > 0) {
+    int KCacheBank = MI->getOperand(OpNo - 2).getImm();
+    O << "CB" << KCacheBank << ':';
+    int KCacheAddr = MI->getOperand(OpNo + 2).getImm();
+    int LineSize = (KCacheMode == 1) ? 16 : 32;
+    O << KCacheAddr * 16 << '-' << KCacheAddr * 16 + LineSize;
+  }
+}
+
+void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo,
+                                     raw_ostream &O) {
+  unsigned SImm16 = MI->getOperand(OpNo).getImm();
+  unsigned Msg = SImm16 & 0xF;
+  if (Msg == 2 || Msg == 3) {
+    unsigned Op = (SImm16 >> 4) & 0xF;
+    if (Msg == 3)
+      O << "Gs_done(";
+    else
+      O << "Gs(";
+    if (Op == 0) {
+      O << "nop";
+    } else {
+      unsigned Stream = (SImm16 >> 8) & 0x3;
+      if (Op == 1)
+	O << "cut";
+      else if (Op == 2)
+	O << "emit";
+      else if (Op == 3)
+	O << "emit-cut";
+      O << " stream " << Stream;
+    }
+    O << "), [m0] ";
+  } else if (Msg == 1)
+    O << "interrupt ";
+  else if (Msg == 15)
+    O << "system ";
+  else
+    O << "unknown(" << Msg << ") ";
+}
+
+void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
+                                      raw_ostream &O) {
+  // Note: Mask values are taken from SIInsertWaits.cpp and not from ISA docs
+  // SIInsertWaits.cpp bits usage does not match ISA docs description but it
+  // works so it might be a misprint in docs.
+  unsigned SImm16 = MI->getOperand(OpNo).getImm();
+  unsigned Vmcnt = SImm16 & 0xF;
+  unsigned Expcnt = (SImm16 >> 4) & 0xF;
+  unsigned Lgkmcnt = (SImm16 >> 8) & 0xF;
+
+  bool NeedSpace = false;
+
+  if (Vmcnt != 0xF) {
+    O << "vmcnt(" << Vmcnt << ')';
+    NeedSpace = true;
+  }
+
+  if (Expcnt != 0x7) {
+    if (NeedSpace)
+      O << ' ';
+    O << "expcnt(" << Expcnt << ')';
+    NeedSpace = true;
+  }
+
+  if (Lgkmcnt != 0x7) {
+    if (NeedSpace)
+      O << ' ';
+    O << "lgkmcnt(" << Lgkmcnt << ')';
+  }
+}
+
+#include "AMDGPUGenAsmWriter.inc"
diff --git a/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h b/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
new file mode 100644
index 0000000..14fb511
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
@@ -0,0 +1,88 @@
+//===-- AMDGPUInstPrinter.h - AMDGPU MC Inst -> ASM interface ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_R600_INSTPRINTER_AMDGPUINSTPRINTER_H
+#define LLVM_LIB_TARGET_R600_INSTPRINTER_AMDGPUINSTPRINTER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+class AMDGPUInstPrinter : public MCInstPrinter {
+public:
+  AMDGPUInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                     const MCRegisterInfo &MRI)
+    : MCInstPrinter(MAI, MII, MRI) {}
+
+  //Autogenerated by tblgen
+  void printInstruction(const MCInst *MI, raw_ostream &O);
+  static const char *getRegisterName(unsigned RegNo);
+
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
+  static void printRegOperand(unsigned RegNo, raw_ostream &O,
+                              const MCRegisterInfo &MRI);
+
+private:
+  void printU8ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printU8ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printU16ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printU32ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printOffen(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printIdxen(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printAddr64(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printMBUFOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printDSOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printDSOffset0(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printDSOffset1(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printGDS(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printGLC(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printSLC(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printTFE(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printRegOperand(unsigned RegNo, raw_ostream &O);
+  void printVOPDst(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printImmediate32(uint32_t I, raw_ostream &O);
+  void printImmediate64(uint64_t I, raw_ostream &O);
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printOperandAndMods(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  static void printInterpSlot(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  static void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O,
+                         StringRef Asm, StringRef Default = "");
+  static void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  static void printClamp(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  static void printClampSI(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  static void printOModSI(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  static void printLiteral(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  static void printLast(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  static void printNeg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  static void printOMOD(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  static void printRel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  static void printUpdateExecMask(const MCInst *MI, unsigned OpNo,
+                                  raw_ostream &O);
+  static void printUpdatePred(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  static void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  static void printSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  static void printBankSwizzle(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  static void printRSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  static void printCT(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  static void printKCache(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  static void printSendMsg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  static void printWaitFlag(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+};
+
+} // End namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
new file mode 100644
index 0000000..8bed2de
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -0,0 +1,144 @@
+//===-- AMDGPUAsmBackend.cpp - AMDGPU Assembler Backend -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/AMDGPUFixupKinds.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUMCObjectWriter : public MCObjectWriter {
+public:
+  AMDGPUMCObjectWriter(raw_pwrite_stream &OS) : MCObjectWriter(OS, true) {}
+  void executePostLayoutBinding(MCAssembler &Asm,
+                                const MCAsmLayout &Layout) override {
+    //XXX: Implement if necessary.
+  }
+  void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
+                        const MCFragment *Fragment, const MCFixup &Fixup,
+                        MCValue Target, bool &IsPCRel,
+                        uint64_t &FixedValue) override {
+    assert(!"Not implemented");
+  }
+
+  void writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
+
+};
+
+class AMDGPUAsmBackend : public MCAsmBackend {
+public:
+  AMDGPUAsmBackend(const Target &T)
+    : MCAsmBackend() {}
+
+  unsigned getNumFixupKinds() const override { return AMDGPU::NumTargetFixupKinds; };
+  void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+                  uint64_t Value, bool IsPCRel) const override;
+  bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+                            const MCRelaxableFragment *DF,
+                            const MCAsmLayout &Layout) const override {
+    return false;
+  }
+  void relaxInstruction(const MCInst &Inst, MCInst &Res) const override {
+    assert(!"Not implemented");
+  }
+  bool mayNeedRelaxation(const MCInst &Inst) const override { return false; }
+  bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
+
+  const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
+};
+
+} //End anonymous namespace
+
+void AMDGPUMCObjectWriter::writeObject(MCAssembler &Asm,
+                                       const MCAsmLayout &Layout) {
+  for (MCAssembler::iterator I = Asm.begin(), E = Asm.end(); I != E; ++I) {
+    Asm.writeSectionData(&*I, Layout);
+  }
+}
+
+void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
+                                  unsigned DataSize, uint64_t Value,
+                                  bool IsPCRel) const {
+
+  switch ((unsigned)Fixup.getKind()) {
+    default: llvm_unreachable("Unknown fixup kind");
+    case AMDGPU::fixup_si_sopp_br: {
+      uint16_t *Dst = (uint16_t*)(Data + Fixup.getOffset());
+      *Dst = (Value - 4) / 4;
+      break;
+    }
+
+    case AMDGPU::fixup_si_rodata: {
+      uint32_t *Dst = (uint32_t*)(Data + Fixup.getOffset());
+      *Dst = Value;
+      break;
+    }
+
+    case AMDGPU::fixup_si_end_of_text: {
+      uint32_t *Dst = (uint32_t*)(Data + Fixup.getOffset());
+      // The value points to the last instruction in the text section, so we
+      // need to add 4 bytes to get to the start of the constants.
+      *Dst = Value + 4;
+      break;
+    }
+  }
+}
+
+const MCFixupKindInfo &AMDGPUAsmBackend::getFixupKindInfo(
+                                                       MCFixupKind Kind) const {
+  const static MCFixupKindInfo Infos[AMDGPU::NumTargetFixupKinds] = {
+    // name                   offset bits  flags
+    { "fixup_si_sopp_br",     0,     16,   MCFixupKindInfo::FKF_IsPCRel },
+    { "fixup_si_rodata",      0,     32,   0 },
+    { "fixup_si_end_of_text", 0,     32,   MCFixupKindInfo::FKF_IsPCRel }
+  };
+
+  if (Kind < FirstTargetFixupKind)
+    return MCAsmBackend::getFixupKindInfo(Kind);
+
+  return Infos[Kind - FirstTargetFixupKind];
+}
+
+bool AMDGPUAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
+  OW->WriteZeros(Count);
+
+  return true;
+}
+
+//===----------------------------------------------------------------------===//
+// ELFAMDGPUAsmBackend class
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+class ELFAMDGPUAsmBackend : public AMDGPUAsmBackend {
+public:
+  ELFAMDGPUAsmBackend(const Target &T) : AMDGPUAsmBackend(T) { }
+
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
+    return createAMDGPUELFObjectWriter(OS);
+  }
+};
+
+} // end anonymous namespace
+
+MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T,
+                                           const MCRegisterInfo &MRI,
+                                           const Triple &TT, StringRef CPU) {
+  return new ELFAMDGPUAsmBackend(T);
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
new file mode 100644
index 0000000..59f45ff
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -0,0 +1,39 @@
+//===-- AMDGPUELFObjectWriter.cpp - AMDGPU ELF Writer ----------------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUMCTargetDesc.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixup.h"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+  AMDGPUELFObjectWriter();
+protected:
+  unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+                        bool IsPCRel) const override {
+    return Fixup.getKind();
+  }
+
+};
+
+
+} // End anonymous namespace
+
+AMDGPUELFObjectWriter::AMDGPUELFObjectWriter()
+  : MCELFObjectTargetWriter(false, 0, 0, false) { }
+
+MCObjectWriter *llvm::createAMDGPUELFObjectWriter(raw_pwrite_stream &OS) {
+  MCELFObjectTargetWriter *MOTW = new AMDGPUELFObjectWriter();
+  return createELFObjectWriter(MOTW, OS, true);
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h
new file mode 100644
index 0000000..fa3b3c3
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h
@@ -0,0 +1,34 @@
+//===-- AMDGPUFixupKinds.h - AMDGPU Specific Fixup Entries ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUFIXUPKINDS_H
+#define LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUFIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace AMDGPU {
+enum Fixups {
+  /// 16-bit PC relative fixup for SOPP branch instructions.
+  fixup_si_sopp_br = FirstTargetFixupKind,
+
+  /// fixup for global addresses with constant initializers
+  fixup_si_rodata,
+
+  /// fixup for offset from instruction to end of text section
+  fixup_si_end_of_text,
+
+  // Marker
+  LastTargetFixupKind,
+  NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+} // namespace AMDGPU
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
new file mode 100644
index 0000000..028a86d
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -0,0 +1,43 @@
+//===-- MCTargetDesc/AMDGPUMCAsmInfo.cpp - Assembly Info ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUMCAsmInfo.h"
+
+using namespace llvm;
+AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT) : MCAsmInfoELF() {
+  HasSingleParameterDotFile = false;
+  //===------------------------------------------------------------------===//
+  MaxInstLength = 16;
+  SeparatorString = "\n";
+  CommentString = ";";
+  PrivateLabelPrefix = "";
+  InlineAsmStart = ";#ASMSTART";
+  InlineAsmEnd = ";#ASMEND";
+
+  //===--- Data Emission Directives -------------------------------------===//
+  ZeroDirective = ".zero";
+  AsciiDirective = ".ascii\t";
+  AscizDirective = ".asciz\t";
+  Data8bitsDirective = ".byte\t";
+  Data16bitsDirective = ".short\t";
+  Data32bitsDirective = ".long\t";
+  Data64bitsDirective = ".quad\t";
+  SunStyleELFSectionSwitchSyntax = true;
+  UsesELFSectionDirectiveForBSS = true;
+
+  //===--- Global Variable Emission Directives --------------------------===//
+  HasAggressiveSymbolFolding = true;
+  COMMDirectiveAlignmentIsInBytes = false;
+  HasDotTypeDotSizeDirective = false;
+  HasNoDeadStrip = true;
+  WeakRefDirective = ".weakref\t";
+  //===--- Dwarf Emission Directives -----------------------------------===//
+  SupportsDebugInformation = true;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h
new file mode 100644
index 0000000..a5bac51
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h
@@ -0,0 +1,32 @@
+//===-- MCTargetDesc/AMDGPUMCAsmInfo.h - AMDGPU MCAsm Interface -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUMCASMINFO_H
+#define LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUMCASMINFO_H
+
+#include "llvm/MC/MCAsmInfoELF.h"
+namespace llvm {
+
+class Triple;
+
+// If you need to create another MCAsmInfo class, which inherits from MCAsmInfo,
+// you will need to make sure your new class sets PrivateGlobalPrefix to
+// a prefix that won't appeary in a fuction name.  The default value
+// for PrivateGlobalPrefix is 'L', so it will consider any function starting
+// with 'L' as a local symbol.
+class AMDGPUMCAsmInfo : public MCAsmInfoELF {
+public:
+  explicit AMDGPUMCAsmInfo(const Triple &TT);
+};
+} // namespace llvm
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
new file mode 100644
index 0000000..521b3b3
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
@@ -0,0 +1,21 @@
+//===-- AMDGPUCodeEmitter.cpp - AMDGPU Code Emitter interface -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief CodeEmitter interface for R600 and SI codegen.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUMCCodeEmitter.h"
+
+using namespace llvm;
+
+// pin vtable to this file
+void AMDGPUMCCodeEmitter::anchor() {}
+
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
new file mode 100644
index 0000000..c957427
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
@@ -0,0 +1,50 @@
+//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief CodeEmitter interface for R600 and SI codegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUMCCODEEMITTER_H
+#define LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUMCCODEEMITTER_H
+
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+class MCInst;
+class MCOperand;
+class MCSubtargetInfo;
+
+class AMDGPUMCCodeEmitter : public MCCodeEmitter {
+  virtual void anchor();
+public:
+
+  uint64_t getBinaryCodeForInstr(const MCInst &MI,
+                                 SmallVectorImpl<MCFixup> &Fixups,
+                                 const MCSubtargetInfo &STI) const;
+
+  virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+                                     SmallVectorImpl<MCFixup> &Fixups,
+                                     const MCSubtargetInfo &STI) const {
+    return 0;
+  }
+
+  virtual unsigned getSOPPBrEncoding(const MCInst &MI, unsigned OpNo,
+                                     SmallVectorImpl<MCFixup> &Fixups,
+                                     const MCSubtargetInfo &STI) const {
+    return 0;
+  }
+};
+
+} // End namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
new file mode 100644
index 0000000..a7d3dd1
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -0,0 +1,90 @@
+//===-- AMDGPUMCTargetDesc.cpp - AMDGPU Target Descriptions ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief This file provides AMDGPU specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUMCTargetDesc.h"
+#include "AMDGPUMCAsmInfo.h"
+#include "InstPrinter/AMDGPUInstPrinter.h"
+#include "SIDefines.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+#define GET_INSTRINFO_MC_DESC
+#include "AMDGPUGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "AMDGPUGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "AMDGPUGenRegisterInfo.inc"
+
+static MCInstrInfo *createAMDGPUMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitAMDGPUMCInstrInfo(X);
+  return X;
+}
+
+static MCRegisterInfo *createAMDGPUMCRegisterInfo(StringRef TT) {
+  MCRegisterInfo *X = new MCRegisterInfo();
+  InitAMDGPUMCRegisterInfo(X, 0);
+  return X;
+}
+
+static MCSubtargetInfo *
+createAMDGPUMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
+  MCSubtargetInfo * X = new MCSubtargetInfo();
+  InitAMDGPUMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+static MCCodeGenInfo *createAMDGPUMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+                                               CodeModel::Model CM,
+                                               CodeGenOpt::Level OL) {
+  MCCodeGenInfo *X = new MCCodeGenInfo();
+  X->initMCCodeGenInfo(RM, CM, OL);
+  return X;
+}
+
+static MCInstPrinter *createAMDGPUMCInstPrinter(const Triple &T,
+                                                unsigned SyntaxVariant,
+                                                const MCAsmInfo &MAI,
+                                                const MCInstrInfo &MII,
+                                                const MCRegisterInfo &MRI) {
+  return new AMDGPUInstPrinter(MAI, MII, MRI);
+}
+
+extern "C" void LLVMInitializeAMDGPUTargetMC() {
+  for (Target *T : {&TheAMDGPUTarget, &TheGCNTarget}) {
+    RegisterMCAsmInfo<AMDGPUMCAsmInfo> X(*T);
+
+    TargetRegistry::RegisterMCCodeGenInfo(*T, createAMDGPUMCCodeGenInfo);
+    TargetRegistry::RegisterMCInstrInfo(*T, createAMDGPUMCInstrInfo);
+    TargetRegistry::RegisterMCRegInfo(*T, createAMDGPUMCRegisterInfo);
+    TargetRegistry::RegisterMCSubtargetInfo(*T, createAMDGPUMCSubtargetInfo);
+    TargetRegistry::RegisterMCInstPrinter(*T, createAMDGPUMCInstPrinter);
+    TargetRegistry::RegisterMCAsmBackend(*T, createAMDGPUAsmBackend);
+  }
+
+  TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget,
+                                        createR600MCCodeEmitter);
+  TargetRegistry::RegisterMCCodeEmitter(TheGCNTarget, createSIMCCodeEmitter);
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
new file mode 100644
index 0000000..ac611b8
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -0,0 +1,61 @@
+//===-- AMDGPUMCTargetDesc.h - AMDGPU Target Descriptions -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Provides AMDGPU specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#ifndef LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUMCTARGETDESC_H
+#define LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUMCTARGETDESC_H
+
+#include "llvm/Support/DataTypes.h"
+#include "llvm/ADT/StringRef.h"
+
+namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCObjectWriter;
+class MCRegisterInfo;
+class MCSubtargetInfo;
+class Target;
+class Triple;
+class raw_pwrite_stream;
+class raw_ostream;
+
+extern Target TheAMDGPUTarget;
+extern Target TheGCNTarget;
+
+MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
+                                       const MCRegisterInfo &MRI,
+                                       MCContext &Ctx);
+
+MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
+                                     const MCRegisterInfo &MRI,
+                                     MCContext &Ctx);
+
+MCAsmBackend *createAMDGPUAsmBackend(const Target &T, const MCRegisterInfo &MRI,
+                                     const Triple &TT, StringRef CPU);
+
+MCObjectWriter *createAMDGPUELFObjectWriter(raw_pwrite_stream &OS);
+} // namespace llvm
+
+#define GET_REGINFO_ENUM
+#include "AMDGPUGenRegisterInfo.inc"
+
+#define GET_INSTRINFO_ENUM
+#include "AMDGPUGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "AMDGPUGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
new file mode 100644
index 0000000..e683498
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -0,0 +1,181 @@
+//===- R600MCCodeEmitter.cpp - Code Emitter for R600->Cayman GPU families -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+///
+/// \brief The R600 code emitter produces machine code that can be executed
+/// directly on the GPU device.
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600Defines.h"
+#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+class R600MCCodeEmitter : public AMDGPUMCCodeEmitter {
+  R600MCCodeEmitter(const R600MCCodeEmitter &) = delete;
+  void operator=(const R600MCCodeEmitter &) = delete;
+  const MCInstrInfo &MCII;
+  const MCRegisterInfo &MRI;
+
+public:
+
+  R600MCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri)
+    : MCII(mcii), MRI(mri) { }
+
+  /// \brief Encode the instruction and write it to the OS.
+  void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+                         SmallVectorImpl<MCFixup> &Fixups,
+                         const MCSubtargetInfo &STI) const override;
+
+  /// \returns the encoding for an MCOperand.
+  uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+                             SmallVectorImpl<MCFixup> &Fixups,
+                             const MCSubtargetInfo &STI) const override;
+private:
+
+  void EmitByte(unsigned int byte, raw_ostream &OS) const;
+
+  void Emit(uint32_t value, raw_ostream &OS) const;
+  void Emit(uint64_t value, raw_ostream &OS) const;
+
+  unsigned getHWRegChan(unsigned reg) const;
+  unsigned getHWReg(unsigned regNo) const;
+
+};
+
+} // End anonymous namespace
+
+enum RegElement {
+  ELEMENT_X = 0,
+  ELEMENT_Y,
+  ELEMENT_Z,
+  ELEMENT_W
+};
+
+enum FCInstr {
+  FC_IF_PREDICATE = 0,
+  FC_ELSE,
+  FC_ENDIF,
+  FC_BGNLOOP,
+  FC_ENDLOOP,
+  FC_BREAK_PREDICATE,
+  FC_CONTINUE
+};
+
+MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
+                                             const MCRegisterInfo &MRI,
+					     MCContext &Ctx) {
+  return new R600MCCodeEmitter(MCII, MRI);
+}
+
+void R600MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
+                                       SmallVectorImpl<MCFixup> &Fixups,
+                                       const MCSubtargetInfo &STI) const {
+  const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+  if (MI.getOpcode() == AMDGPU::RETURN ||
+    MI.getOpcode() == AMDGPU::FETCH_CLAUSE ||
+    MI.getOpcode() == AMDGPU::ALU_CLAUSE ||
+    MI.getOpcode() == AMDGPU::BUNDLE ||
+    MI.getOpcode() == AMDGPU::KILL) {
+    return;
+  } else if (IS_VTX(Desc)) {
+    uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups, STI);
+    uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
+    if (!(STI.getFeatureBits()[AMDGPU::FeatureCaymanISA])) {
+      InstWord2 |= 1 << 19; // Mega-Fetch bit
+    }
+
+    Emit(InstWord01, OS);
+    Emit(InstWord2, OS);
+    Emit((uint32_t) 0, OS);
+  } else if (IS_TEX(Desc)) {
+      int64_t Sampler = MI.getOperand(14).getImm();
+
+      int64_t SrcSelect[4] = {
+        MI.getOperand(2).getImm(),
+        MI.getOperand(3).getImm(),
+        MI.getOperand(4).getImm(),
+        MI.getOperand(5).getImm()
+      };
+      int64_t Offsets[3] = {
+        MI.getOperand(6).getImm() & 0x1F,
+        MI.getOperand(7).getImm() & 0x1F,
+        MI.getOperand(8).getImm() & 0x1F
+      };
+
+      uint64_t Word01 = getBinaryCodeForInstr(MI, Fixups, STI);
+      uint32_t Word2 = Sampler << 15 | SrcSelect[ELEMENT_X] << 20 |
+          SrcSelect[ELEMENT_Y] << 23 | SrcSelect[ELEMENT_Z] << 26 |
+          SrcSelect[ELEMENT_W] << 29 | Offsets[0] << 0 | Offsets[1] << 5 |
+          Offsets[2] << 10;
+
+      Emit(Word01, OS);
+      Emit(Word2, OS);
+      Emit((uint32_t) 0, OS);
+  } else {
+    uint64_t Inst = getBinaryCodeForInstr(MI, Fixups, STI);
+    if ((STI.getFeatureBits()[AMDGPU::FeatureR600ALUInst]) &&
+       ((Desc.TSFlags & R600_InstFlag::OP1) ||
+         Desc.TSFlags & R600_InstFlag::OP2)) {
+      uint64_t ISAOpCode = Inst & (0x3FFULL << 39);
+      Inst &= ~(0x3FFULL << 39);
+      Inst |= ISAOpCode << 1;
+    }
+    Emit(Inst, OS);
+  }
+}
+
+void R600MCCodeEmitter::EmitByte(unsigned int Byte, raw_ostream &OS) const {
+  OS.write((uint8_t) Byte & 0xff);
+}
+
+void R600MCCodeEmitter::Emit(uint32_t Value, raw_ostream &OS) const {
+  support::endian::Writer<support::little>(OS).write(Value);
+}
+
+void R600MCCodeEmitter::Emit(uint64_t Value, raw_ostream &OS) const {
+  support::endian::Writer<support::little>(OS).write(Value);
+}
+
+unsigned R600MCCodeEmitter::getHWRegChan(unsigned reg) const {
+  return MRI.getEncodingValue(reg) >> HW_CHAN_SHIFT;
+}
+
+unsigned R600MCCodeEmitter::getHWReg(unsigned RegNo) const {
+  return MRI.getEncodingValue(RegNo) & HW_REG_MASK;
+}
+
+uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI,
+                                              const MCOperand &MO,
+                                        SmallVectorImpl<MCFixup> &Fixup,
+                                        const MCSubtargetInfo &STI) const {
+  if (MO.isReg()) {
+    if (HAS_NATIVE_OPERANDS(MCII.get(MI.getOpcode()).TSFlags))
+      return MRI.getEncodingValue(MO.getReg());
+    return getHWReg(MO.getReg());
+  }
+
+  assert(MO.isImm());
+  return MO.getImm();
+}
+
+#include "AMDGPUGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
new file mode 100644
index 0000000..65a0eeb
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -0,0 +1,289 @@
+//===-- SIMCCodeEmitter.cpp - SI Code Emitter -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief The SI code emitter produces machine code that can be executed
+/// directly on the GPU device.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "MCTargetDesc/AMDGPUFixupKinds.h"
+#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIDefines.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+class SIMCCodeEmitter : public  AMDGPUMCCodeEmitter {
+  SIMCCodeEmitter(const SIMCCodeEmitter &) = delete;
+  void operator=(const SIMCCodeEmitter &) = delete;
+  const MCInstrInfo &MCII;
+  const MCRegisterInfo &MRI;
+  MCContext &Ctx;
+
+  /// \brief Can this operand also contain immediate values?
+  bool isSrcOperand(const MCInstrDesc &Desc, unsigned OpNo) const;
+
+  /// \brief Encode an fp or int literal
+  uint32_t getLitEncoding(const MCOperand &MO, unsigned OpSize) const;
+
+public:
+  SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
+                  MCContext &ctx)
+    : MCII(mcii), MRI(mri), Ctx(ctx) { }
+
+  ~SIMCCodeEmitter() override {}
+
+  /// \brief Encode the instruction and write it to the OS.
+  void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+                         SmallVectorImpl<MCFixup> &Fixups,
+                         const MCSubtargetInfo &STI) const override;
+
+  /// \returns the encoding for an MCOperand.
+  uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+                             SmallVectorImpl<MCFixup> &Fixups,
+                             const MCSubtargetInfo &STI) const override;
+
+  /// \brief Use a fixup to encode the simm16 field for SOPP branch
+  ///        instructions.
+  unsigned getSOPPBrEncoding(const MCInst &MI, unsigned OpNo,
+                             SmallVectorImpl<MCFixup> &Fixups,
+                             const MCSubtargetInfo &STI) const override;
+};
+
+} // End anonymous namespace
+
+MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII,
+                                           const MCRegisterInfo &MRI,
+                                           MCContext &Ctx) {
+  return new SIMCCodeEmitter(MCII, MRI, Ctx);
+}
+
+bool SIMCCodeEmitter::isSrcOperand(const MCInstrDesc &Desc,
+                                   unsigned OpNo) const {
+  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
+
+  return OpType == AMDGPU::OPERAND_REG_IMM32 ||
+         OpType == AMDGPU::OPERAND_REG_INLINE_C;
+}
+
+// Returns the encoding value to use if the given integer is an integer inline
+// immediate value, or 0 if it is not.
+template <typename IntTy>
+static uint32_t getIntInlineImmEncoding(IntTy Imm) {
+  if (Imm >= 0 && Imm <= 64)
+    return 128 + Imm;
+
+  if (Imm >= -16 && Imm <= -1)
+    return 192 + std::abs(Imm);
+
+  return 0;
+}
+
+static uint32_t getLit32Encoding(uint32_t Val) {
+  uint32_t IntImm = getIntInlineImmEncoding(static_cast<int32_t>(Val));
+  if (IntImm != 0)
+    return IntImm;
+
+  if (Val == FloatToBits(0.5f))
+    return 240;
+
+  if (Val == FloatToBits(-0.5f))
+    return 241;
+
+  if (Val == FloatToBits(1.0f))
+    return 242;
+
+  if (Val == FloatToBits(-1.0f))
+    return 243;
+
+  if (Val == FloatToBits(2.0f))
+    return 244;
+
+  if (Val == FloatToBits(-2.0f))
+    return 245;
+
+  if (Val == FloatToBits(4.0f))
+    return 246;
+
+  if (Val == FloatToBits(-4.0f))
+    return 247;
+
+  return 255;
+}
+
+static uint32_t getLit64Encoding(uint64_t Val) {
+  uint32_t IntImm = getIntInlineImmEncoding(static_cast<int64_t>(Val));
+  if (IntImm != 0)
+    return IntImm;
+
+  if (Val == DoubleToBits(0.5))
+    return 240;
+
+  if (Val == DoubleToBits(-0.5))
+    return 241;
+
+  if (Val == DoubleToBits(1.0))
+    return 242;
+
+  if (Val == DoubleToBits(-1.0))
+    return 243;
+
+  if (Val == DoubleToBits(2.0))
+    return 244;
+
+  if (Val == DoubleToBits(-2.0))
+    return 245;
+
+  if (Val == DoubleToBits(4.0))
+    return 246;
+
+  if (Val == DoubleToBits(-4.0))
+    return 247;
+
+  return 255;
+}
+
+uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
+                                         unsigned OpSize) const {
+  if (MO.isExpr())
+    return 255;
+
+  assert(!MO.isFPImm());
+
+  if (!MO.isImm())
+    return ~0;
+
+  if (OpSize == 4)
+    return getLit32Encoding(static_cast<uint32_t>(MO.getImm()));
+
+  assert(OpSize == 8);
+
+  return getLit64Encoding(static_cast<uint64_t>(MO.getImm()));
+}
+
+void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
+                                       SmallVectorImpl<MCFixup> &Fixups,
+                                       const MCSubtargetInfo &STI) const {
+
+  uint64_t Encoding = getBinaryCodeForInstr(MI, Fixups, STI);
+  const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+  unsigned bytes = Desc.getSize();
+
+  for (unsigned i = 0; i < bytes; i++) {
+    OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff));
+  }
+
+  if (bytes > 4)
+    return;
+
+  // Check for additional literals in SRC0/1/2 (Op 1/2/3)
+  for (unsigned i = 0, e = MI.getNumOperands(); i < e; ++i) {
+
+    // Check if this operand should be encoded as [SV]Src
+    if (!isSrcOperand(Desc, i))
+      continue;
+
+    int RCID = Desc.OpInfo[i].RegClass;
+    const MCRegisterClass &RC = MRI.getRegClass(RCID);
+
+    // Is this operand a literal immediate?
+    const MCOperand &Op = MI.getOperand(i);
+    if (getLitEncoding(Op, RC.getSize()) != 255)
+      continue;
+
+    // Yes! Encode it
+    int64_t Imm = 0;
+
+    if (Op.isImm())
+      Imm = Op.getImm();
+    else if (!Op.isExpr()) // Exprs will be replaced with a fixup value.
+      llvm_unreachable("Must be immediate or expr");
+
+    for (unsigned j = 0; j < 4; j++) {
+      OS.write((uint8_t) ((Imm >> (8 * j)) & 0xff));
+    }
+
+    // Only one literal value allowed
+    break;
+  }
+}
+
+unsigned SIMCCodeEmitter::getSOPPBrEncoding(const MCInst &MI, unsigned OpNo,
+                                            SmallVectorImpl<MCFixup> &Fixups,
+                                            const MCSubtargetInfo &STI) const {
+  const MCOperand &MO = MI.getOperand(OpNo);
+
+  if (MO.isExpr()) {
+    const MCExpr *Expr = MO.getExpr();
+    MCFixupKind Kind = (MCFixupKind)AMDGPU::fixup_si_sopp_br;
+    Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc()));
+    return 0;
+  }
+
+  return getMachineOpValue(MI, MO, Fixups, STI);
+}
+
+uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
+                                            const MCOperand &MO,
+                                       SmallVectorImpl<MCFixup> &Fixups,
+                                       const MCSubtargetInfo &STI) const {
+  if (MO.isReg())
+    return MRI.getEncodingValue(MO.getReg());
+
+  if (MO.isExpr()) {
+    const MCSymbolRefExpr *Expr = cast<MCSymbolRefExpr>(MO.getExpr());
+    MCFixupKind Kind;
+    const MCSymbol *Sym =
+        Ctx.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
+
+    if (&Expr->getSymbol() == Sym) {
+      // Add the offset to the beginning of the constant values.
+      Kind = (MCFixupKind)AMDGPU::fixup_si_end_of_text;
+    } else {
+      // This is used for constant data stored in .rodata.
+     Kind = (MCFixupKind)AMDGPU::fixup_si_rodata;
+    }
+    Fixups.push_back(MCFixup::create(4, Expr, Kind, MI.getLoc()));
+  }
+
+  // Figure out the operand number, needed for isSrcOperand check
+  unsigned OpNo = 0;
+  for (unsigned e = MI.getNumOperands(); OpNo < e; ++OpNo) {
+    if (&MO == &MI.getOperand(OpNo))
+      break;
+  }
+
+  const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+  if (isSrcOperand(Desc, OpNo)) {
+    int RCID = Desc.OpInfo[OpNo].RegClass;
+    const MCRegisterClass &RC = MRI.getRegClass(RCID);
+
+    uint32_t Enc = getLitEncoding(MO, RC.getSize());
+    if (Enc != ~0U && (Enc != 255 || Desc.getSize() == 4))
+      return Enc;
+
+  } else if (MO.isImm())
+    return MO.getImm();
+
+  llvm_unreachable("Encoding of this operand type is not supported yet.");
+  return 0;
+}
+
diff --git a/contrib/llvm/lib/Target/AMDGPU/Processors.td b/contrib/llvm/lib/Target/AMDGPU/Processors.td
new file mode 100644
index 0000000..c0ffede
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/Processors.td
@@ -0,0 +1,137 @@
+//===-- Processors.td - R600 Processor definitions ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, ProcessorItineraries itin, list<SubtargetFeature> Features>
+: Processor<Name, itin, Features>;
+
+//===----------------------------------------------------------------------===//
+// R600
+//===----------------------------------------------------------------------===//
+def : Proc<"",           R600_VLIW5_Itin,
+    [FeatureR600, FeatureVertexCache]>;
+
+def : Proc<"r600",       R600_VLIW5_Itin,
+    [FeatureR600 , FeatureVertexCache, FeatureWavefrontSize64]>;
+
+def : Proc<"r630",       R600_VLIW5_Itin,
+    [FeatureR600, FeatureVertexCache, FeatureWavefrontSize32]>;
+
+def : Proc<"rs880",      R600_VLIW5_Itin,
+    [FeatureR600, FeatureWavefrontSize16]>;
+
+def : Proc<"rv670",      R600_VLIW5_Itin,
+    [FeatureR600, FeatureFP64, FeatureVertexCache, FeatureWavefrontSize64]>;
+
+//===----------------------------------------------------------------------===//
+// R700
+//===----------------------------------------------------------------------===//
+
+def : Proc<"rv710",      R600_VLIW5_Itin,
+    [FeatureR700, FeatureVertexCache, FeatureWavefrontSize32]>;
+
+def : Proc<"rv730",      R600_VLIW5_Itin,
+    [FeatureR700, FeatureVertexCache, FeatureWavefrontSize32]>;
+
+def : Proc<"rv770",      R600_VLIW5_Itin,
+    [FeatureR700, FeatureFP64, FeatureVertexCache, FeatureWavefrontSize64]>;
+
+//===----------------------------------------------------------------------===//
+// Evergreen
+//===----------------------------------------------------------------------===//
+
+def : Proc<"cedar",      R600_VLIW5_Itin,
+    [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize32,
+     FeatureCFALUBug]>;
+
+def : Proc<"redwood",    R600_VLIW5_Itin,
+    [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize64,
+     FeatureCFALUBug]>;
+
+def : Proc<"sumo",       R600_VLIW5_Itin,
+    [FeatureEvergreen, FeatureWavefrontSize64, FeatureCFALUBug]>;
+
+def : Proc<"juniper",    R600_VLIW5_Itin,
+    [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize64]>;
+
+def : Proc<"cypress",    R600_VLIW5_Itin,
+    [FeatureEvergreen, FeatureFP64, FeatureVertexCache,
+     FeatureWavefrontSize64]>;
+
+//===----------------------------------------------------------------------===//
+// Northern Islands
+//===----------------------------------------------------------------------===//
+
+def : Proc<"barts",      R600_VLIW5_Itin,
+    [FeatureNorthernIslands, FeatureVertexCache, FeatureCFALUBug]>;
+
+def : Proc<"turks",      R600_VLIW5_Itin,
+    [FeatureNorthernIslands, FeatureVertexCache, FeatureCFALUBug]>;
+
+def : Proc<"caicos",     R600_VLIW5_Itin,
+    [FeatureNorthernIslands, FeatureCFALUBug]>;
+
+def : Proc<"cayman",     R600_VLIW4_Itin,
+    [FeatureNorthernIslands, FeatureFP64, FeatureCaymanISA]>;
+
+//===----------------------------------------------------------------------===//
+// Southern Islands
+//===----------------------------------------------------------------------===//
+
+def : ProcessorModel<"SI", SIFullSpeedModel,
+  [FeatureSouthernIslands, FeatureFastFMAF32]
+>;
+
+def : ProcessorModel<"tahiti",   SIFullSpeedModel,
+  [FeatureSouthernIslands, FeatureFastFMAF32]
+>;
+
+def : ProcessorModel<"pitcairn", SIQuarterSpeedModel, [FeatureSouthernIslands]>;
+
+def : ProcessorModel<"verde",    SIQuarterSpeedModel, [FeatureSouthernIslands]>;
+
+def : ProcessorModel<"oland",    SIQuarterSpeedModel, [FeatureSouthernIslands]>;
+
+def : ProcessorModel<"hainan",   SIQuarterSpeedModel, [FeatureSouthernIslands]>;
+
+//===----------------------------------------------------------------------===//
+// Sea Islands
+//===----------------------------------------------------------------------===//
+
+def : ProcessorModel<"bonaire",    SIQuarterSpeedModel,
+  [FeatureSeaIslands, FeatureLDSBankCount32]
+>;
+
+def : ProcessorModel<"kabini",     SIQuarterSpeedModel,
+  [FeatureSeaIslands, FeatureLDSBankCount16]
+>;
+
+def : ProcessorModel<"kaveri",     SIQuarterSpeedModel,
+  [FeatureSeaIslands, FeatureLDSBankCount32]
+>;
+
+def : ProcessorModel<"hawaii", SIFullSpeedModel,
+  [FeatureSeaIslands, FeatureFastFMAF32, FeatureLDSBankCount32]
+>;
+
+def : ProcessorModel<"mullins",    SIQuarterSpeedModel,
+  [FeatureSeaIslands, FeatureLDSBankCount16]>;
+
+//===----------------------------------------------------------------------===//
+// Volcanic Islands
+//===----------------------------------------------------------------------===//
+
+def : ProcessorModel<"tonga",   SIQuarterSpeedModel,
+  [FeatureVolcanicIslands, FeatureSGPRInitBug]
+>;
+
+def : ProcessorModel<"iceland", SIQuarterSpeedModel,
+  [FeatureVolcanicIslands, FeatureSGPRInitBug]
+>;
+
+def : ProcessorModel<"carrizo", SIQuarterSpeedModel, [FeatureVolcanicIslands]>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp b/contrib/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp
new file mode 100644
index 0000000..3cb9021
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp
@@ -0,0 +1,206 @@
+//===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer.
+/// This pass is merging consecutive CFAlus where applicable.
+/// It needs to be called after IfCvt for best results.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "R600Defines.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "R600RegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "r600mergeclause"
+
+namespace {
+
+static bool isCFAlu(const MachineInstr *MI) {
+  switch (MI->getOpcode()) {
+  case AMDGPU::CF_ALU:
+  case AMDGPU::CF_ALU_PUSH_BEFORE:
+    return true;
+  default:
+    return false;
+  }
+}
+
+class R600ClauseMergePass : public MachineFunctionPass {
+
+private:
+  static char ID;
+  const R600InstrInfo *TII;
+
+  unsigned getCFAluSize(const MachineInstr *MI) const;
+  bool isCFAluEnabled(const MachineInstr *MI) const;
+
+  /// IfCvt pass can generate "disabled" ALU clause marker that need to be
+  /// removed and their content affected to the previous alu clause.
+  /// This function parse instructions after CFAlu until it find a disabled
+  /// CFAlu and merge the content, or an enabled CFAlu.
+  void cleanPotentialDisabledCFAlu(MachineInstr *CFAlu) const;
+
+  /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if
+  /// it is the case.
+  bool mergeIfPossible(MachineInstr *RootCFAlu, const MachineInstr *LatrCFAlu)
+      const;
+
+public:
+  R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID) { }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  const char *getPassName() const override;
+};
+
+char R600ClauseMergePass::ID = 0;
+
+unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr *MI) const {
+  assert(isCFAlu(MI));
+  return MI->getOperand(
+      TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::COUNT)).getImm();
+}
+
+bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr *MI) const {
+  assert(isCFAlu(MI));
+  return MI->getOperand(
+      TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::Enabled)).getImm();
+}
+
+void R600ClauseMergePass::cleanPotentialDisabledCFAlu(MachineInstr *CFAlu)
+    const {
+  int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
+  MachineBasicBlock::iterator I = CFAlu, E = CFAlu->getParent()->end();
+  I++;
+  do {
+    while (I!= E && !isCFAlu(I))
+      I++;
+    if (I == E)
+      return;
+    MachineInstr *MI = I++;
+    if (isCFAluEnabled(MI))
+      break;
+    CFAlu->getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI));
+    MI->eraseFromParent();
+  } while (I != E);
+}
+
+bool R600ClauseMergePass::mergeIfPossible(MachineInstr *RootCFAlu,
+                                          const MachineInstr *LatrCFAlu) const {
+  assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
+  int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
+  unsigned RootInstCount = getCFAluSize(RootCFAlu),
+      LaterInstCount = getCFAluSize(LatrCFAlu);
+  unsigned CumuledInsts = RootInstCount + LaterInstCount;
+  if (CumuledInsts >= TII->getMaxAlusPerClause()) {
+    DEBUG(dbgs() << "Excess inst counts\n");
+    return false;
+  }
+  if (RootCFAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
+    return false;
+  // Is KCache Bank 0 compatible ?
+  int Mode0Idx =
+      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0);
+  int KBank0Idx =
+      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0);
+  int KBank0LineIdx =
+      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0);
+  if (LatrCFAlu->getOperand(Mode0Idx).getImm() &&
+      RootCFAlu->getOperand(Mode0Idx).getImm() &&
+      (LatrCFAlu->getOperand(KBank0Idx).getImm() !=
+       RootCFAlu->getOperand(KBank0Idx).getImm() ||
+      LatrCFAlu->getOperand(KBank0LineIdx).getImm() !=
+      RootCFAlu->getOperand(KBank0LineIdx).getImm())) {
+    DEBUG(dbgs() << "Wrong KC0\n");
+    return false;
+  }
+  // Is KCache Bank 1 compatible ?
+  int Mode1Idx =
+      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1);
+  int KBank1Idx =
+      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1);
+  int KBank1LineIdx =
+      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1);
+  if (LatrCFAlu->getOperand(Mode1Idx).getImm() &&
+      RootCFAlu->getOperand(Mode1Idx).getImm() &&
+      (LatrCFAlu->getOperand(KBank1Idx).getImm() !=
+      RootCFAlu->getOperand(KBank1Idx).getImm() ||
+      LatrCFAlu->getOperand(KBank1LineIdx).getImm() !=
+      RootCFAlu->getOperand(KBank1LineIdx).getImm())) {
+    DEBUG(dbgs() << "Wrong KC0\n");
+    return false;
+  }
+  if (LatrCFAlu->getOperand(Mode0Idx).getImm()) {
+    RootCFAlu->getOperand(Mode0Idx).setImm(
+        LatrCFAlu->getOperand(Mode0Idx).getImm());
+    RootCFAlu->getOperand(KBank0Idx).setImm(
+        LatrCFAlu->getOperand(KBank0Idx).getImm());
+    RootCFAlu->getOperand(KBank0LineIdx).setImm(
+        LatrCFAlu->getOperand(KBank0LineIdx).getImm());
+  }
+  if (LatrCFAlu->getOperand(Mode1Idx).getImm()) {
+    RootCFAlu->getOperand(Mode1Idx).setImm(
+        LatrCFAlu->getOperand(Mode1Idx).getImm());
+    RootCFAlu->getOperand(KBank1Idx).setImm(
+        LatrCFAlu->getOperand(KBank1Idx).getImm());
+    RootCFAlu->getOperand(KBank1LineIdx).setImm(
+        LatrCFAlu->getOperand(KBank1LineIdx).getImm());
+  }
+  RootCFAlu->getOperand(CntIdx).setImm(CumuledInsts);
+  RootCFAlu->setDesc(TII->get(LatrCFAlu->getOpcode()));
+  return true;
+}
+
+bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
+  TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
+  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+                                                  BB != BB_E; ++BB) {
+    MachineBasicBlock &MBB = *BB;
+    MachineBasicBlock::iterator I = MBB.begin(),  E = MBB.end();
+    MachineBasicBlock::iterator LatestCFAlu = E;
+    while (I != E) {
+      MachineInstr *MI = I++;
+      if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) ||
+          TII->mustBeLastInClause(MI->getOpcode()))
+        LatestCFAlu = E;
+      if (!isCFAlu(MI))
+        continue;
+      cleanPotentialDisabledCFAlu(MI);
+
+      if (LatestCFAlu != E && mergeIfPossible(LatestCFAlu, MI)) {
+        MI->eraseFromParent();
+      } else {
+        assert(MI->getOperand(8).getImm() && "CF ALU instruction disabled");
+        LatestCFAlu = MI;
+      }
+    }
+  }
+  return false;
+}
+
+const char *R600ClauseMergePass::getPassName() const {
+  return "R600 Merge Clause Markers Pass";
+}
+
+} // end anonymous namespace
+
+
+llvm::FunctionPass *llvm::createR600ClauseMergePass(TargetMachine &TM) {
+  return new R600ClauseMergePass(TM);
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/contrib/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
new file mode 100644
index 0000000..c8f37f6
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
@@ -0,0 +1,679 @@
+//===-- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass compute turns all control flow pseudo instructions into native one
+/// computing their address on the fly ; it also sets STACK_SIZE info.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Debug.h"
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "R600Defines.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "R600RegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "r600cf"
+
+namespace {
+
+struct CFStack {
+
+  enum StackItem {
+    ENTRY = 0,
+    SUB_ENTRY = 1,
+    FIRST_NON_WQM_PUSH = 2,
+    FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3
+  };
+
+  const AMDGPUSubtarget *ST;
+  std::vector<StackItem> BranchStack;
+  std::vector<StackItem> LoopStack;
+  unsigned MaxStackSize;
+  unsigned CurrentEntries;
+  unsigned CurrentSubEntries;
+
+  CFStack(const AMDGPUSubtarget *st, unsigned ShaderType) : ST(st),
+      // We need to reserve a stack entry for CALL_FS in vertex shaders.
+      MaxStackSize(ShaderType == ShaderType::VERTEX ? 1 : 0),
+      CurrentEntries(0), CurrentSubEntries(0) { }
+
+  unsigned getLoopDepth();
+  bool branchStackContains(CFStack::StackItem);
+  bool requiresWorkAroundForInst(unsigned Opcode);
+  unsigned getSubEntrySize(CFStack::StackItem Item);
+  void updateMaxStackSize();
+  void pushBranch(unsigned Opcode, bool isWQM = false);
+  void pushLoop();
+  void popBranch();
+  void popLoop();
+};
+
+unsigned CFStack::getLoopDepth() {
+  return LoopStack.size();
+}
+
+bool CFStack::branchStackContains(CFStack::StackItem Item) {
+  for (std::vector<CFStack::StackItem>::const_iterator I = BranchStack.begin(),
+       E = BranchStack.end(); I != E; ++I) {
+    if (*I == Item)
+      return true;
+  }
+  return false;
+}
+
+bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
+  if (Opcode == AMDGPU::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() &&
+      getLoopDepth() > 1)
+    return true;
+
+  if (!ST->hasCFAluBug())
+    return false;
+
+  switch(Opcode) {
+  default: return false;
+  case AMDGPU::CF_ALU_PUSH_BEFORE:
+  case AMDGPU::CF_ALU_ELSE_AFTER:
+  case AMDGPU::CF_ALU_BREAK:
+  case AMDGPU::CF_ALU_CONTINUE:
+    if (CurrentSubEntries == 0)
+      return false;
+    if (ST->getWavefrontSize() == 64) {
+      // We are being conservative here.  We only require this work-around if
+      // CurrentSubEntries > 3 &&
+      // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0)
+      //
+      // We have to be conservative, because we don't know for certain that
+      // our stack allocation algorithm for Evergreen/NI is correct.  Applying this
+      // work-around when CurrentSubEntries > 3 allows us to over-allocate stack
+      // resources without any problems.
+      return CurrentSubEntries > 3;
+    } else {
+      assert(ST->getWavefrontSize() == 32);
+      // We are being conservative here.  We only require the work-around if
+      // CurrentSubEntries > 7 &&
+      // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0)
+      // See the comment on the wavefront size == 64 case for why we are
+      // being conservative.
+      return CurrentSubEntries > 7;
+    }
+  }
+}
+
+unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
+  switch(Item) {
+  default:
+    return 0;
+  case CFStack::FIRST_NON_WQM_PUSH:
+  assert(!ST->hasCaymanISA());
+  if (ST->getGeneration() <= AMDGPUSubtarget::R700) {
+    // +1 For the push operation.
+    // +2 Extra space required.
+    return 3;
+  } else {
+    // Some documentation says that this is not necessary on Evergreen,
+    // but experimentation has show that we need to allocate 1 extra
+    // sub-entry for the first non-WQM push.
+    // +1 For the push operation.
+    // +1 Extra space required.
+    return 2;
+  }
+  case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY:
+    assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
+    // +1 For the push operation.
+    // +1 Extra space required.
+    return 2;
+  case CFStack::SUB_ENTRY:
+    return 1;
+  }
+}
+
+void CFStack::updateMaxStackSize() {
+  unsigned CurrentStackSize = CurrentEntries +
+                              (RoundUpToAlignment(CurrentSubEntries, 4) / 4);
+  MaxStackSize = std::max(CurrentStackSize, MaxStackSize);
+}
+
+void CFStack::pushBranch(unsigned Opcode, bool isWQM) {
+  CFStack::StackItem Item = CFStack::ENTRY;
+  switch(Opcode) {
+  case AMDGPU::CF_PUSH_EG:
+  case AMDGPU::CF_ALU_PUSH_BEFORE:
+    if (!isWQM) {
+      if (!ST->hasCaymanISA() &&
+          !branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
+        Item = CFStack::FIRST_NON_WQM_PUSH;  // May not be required on Evergreen/NI
+                                             // See comment in
+                                             // CFStack::getSubEntrySize()
+      else if (CurrentEntries > 0 &&
+               ST->getGeneration() > AMDGPUSubtarget::EVERGREEN &&
+               !ST->hasCaymanISA() &&
+               !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY))
+        Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;
+      else
+        Item = CFStack::SUB_ENTRY;
+    } else
+      Item = CFStack::ENTRY;
+    break;
+  }
+  BranchStack.push_back(Item);
+  if (Item == CFStack::ENTRY)
+    CurrentEntries++;
+  else
+    CurrentSubEntries += getSubEntrySize(Item);
+  updateMaxStackSize();
+}
+
+void CFStack::pushLoop() {
+  LoopStack.push_back(CFStack::ENTRY);
+  CurrentEntries++;
+  updateMaxStackSize();
+}
+
+void CFStack::popBranch() {
+  CFStack::StackItem Top = BranchStack.back();
+  if (Top == CFStack::ENTRY)
+    CurrentEntries--;
+  else
+    CurrentSubEntries-= getSubEntrySize(Top);
+  BranchStack.pop_back();
+}
+
+void CFStack::popLoop() {
+  CurrentEntries--;
+  LoopStack.pop_back();
+}
+
+class R600ControlFlowFinalizer : public MachineFunctionPass {
+
+private:
+  typedef std::pair<MachineInstr *, std::vector<MachineInstr *> > ClauseFile;
+
+  enum ControlFlowInstruction {
+    CF_TC,
+    CF_VC,
+    CF_CALL_FS,
+    CF_WHILE_LOOP,
+    CF_END_LOOP,
+    CF_LOOP_BREAK,
+    CF_LOOP_CONTINUE,
+    CF_JUMP,
+    CF_ELSE,
+    CF_POP,
+    CF_END
+  };
+
+  static char ID;
+  const R600InstrInfo *TII;
+  const R600RegisterInfo *TRI;
+  unsigned MaxFetchInst;
+  const AMDGPUSubtarget *ST;
+
+  bool IsTrivialInst(MachineInstr *MI) const {
+    switch (MI->getOpcode()) {
+    case AMDGPU::KILL:
+    case AMDGPU::RETURN:
+      return true;
+    default:
+      return false;
+    }
+  }
+
+  const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
+    unsigned Opcode = 0;
+    bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
+    switch (CFI) {
+    case CF_TC:
+      Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600;
+      break;
+    case CF_VC:
+      Opcode = isEg ? AMDGPU::CF_VC_EG : AMDGPU::CF_VC_R600;
+      break;
+    case CF_CALL_FS:
+      Opcode = isEg ? AMDGPU::CF_CALL_FS_EG : AMDGPU::CF_CALL_FS_R600;
+      break;
+    case CF_WHILE_LOOP:
+      Opcode = isEg ? AMDGPU::WHILE_LOOP_EG : AMDGPU::WHILE_LOOP_R600;
+      break;
+    case CF_END_LOOP:
+      Opcode = isEg ? AMDGPU::END_LOOP_EG : AMDGPU::END_LOOP_R600;
+      break;
+    case CF_LOOP_BREAK:
+      Opcode = isEg ? AMDGPU::LOOP_BREAK_EG : AMDGPU::LOOP_BREAK_R600;
+      break;
+    case CF_LOOP_CONTINUE:
+      Opcode = isEg ? AMDGPU::CF_CONTINUE_EG : AMDGPU::CF_CONTINUE_R600;
+      break;
+    case CF_JUMP:
+      Opcode = isEg ? AMDGPU::CF_JUMP_EG : AMDGPU::CF_JUMP_R600;
+      break;
+    case CF_ELSE:
+      Opcode = isEg ? AMDGPU::CF_ELSE_EG : AMDGPU::CF_ELSE_R600;
+      break;
+    case CF_POP:
+      Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600;
+      break;
+    case CF_END:
+      if (ST->hasCaymanISA()) {
+        Opcode = AMDGPU::CF_END_CM;
+        break;
+      }
+      Opcode = isEg ? AMDGPU::CF_END_EG : AMDGPU::CF_END_R600;
+      break;
+    }
+    assert (Opcode && "No opcode selected");
+    return TII->get(Opcode);
+  }
+
+  bool isCompatibleWithClause(const MachineInstr *MI,
+      std::set<unsigned> &DstRegs) const {
+    unsigned DstMI, SrcMI;
+    for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
+        E = MI->operands_end(); I != E; ++I) {
+      const MachineOperand &MO = *I;
+      if (!MO.isReg())
+        continue;
+      if (MO.isDef()) {
+        unsigned Reg = MO.getReg();
+        if (AMDGPU::R600_Reg128RegClass.contains(Reg))
+          DstMI = Reg;
+        else
+          DstMI = TRI->getMatchingSuperReg(Reg,
+              TRI->getSubRegFromChannel(TRI->getHWRegChan(Reg)),
+              &AMDGPU::R600_Reg128RegClass);
+      }
+      if (MO.isUse()) {
+        unsigned Reg = MO.getReg();
+        if (AMDGPU::R600_Reg128RegClass.contains(Reg))
+          SrcMI = Reg;
+        else
+          SrcMI = TRI->getMatchingSuperReg(Reg,
+              TRI->getSubRegFromChannel(TRI->getHWRegChan(Reg)),
+              &AMDGPU::R600_Reg128RegClass);
+      }
+    }
+    if ((DstRegs.find(SrcMI) == DstRegs.end())) {
+      DstRegs.insert(DstMI);
+      return true;
+    } else
+      return false;
+  }
+
+  ClauseFile
+  MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
+      const {
+    MachineBasicBlock::iterator ClauseHead = I;
+    std::vector<MachineInstr *> ClauseContent;
+    unsigned AluInstCount = 0;
+    bool IsTex = TII->usesTextureCache(ClauseHead);
+    std::set<unsigned> DstRegs;
+    for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
+      if (IsTrivialInst(I))
+        continue;
+      if (AluInstCount >= MaxFetchInst)
+        break;
+      if ((IsTex && !TII->usesTextureCache(I)) ||
+          (!IsTex && !TII->usesVertexCache(I)))
+        break;
+      if (!isCompatibleWithClause(I, DstRegs))
+        break;
+      AluInstCount ++;
+      ClauseContent.push_back(I);
+    }
+    MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
+        getHWInstrDesc(IsTex?CF_TC:CF_VC))
+        .addImm(0) // ADDR
+        .addImm(AluInstCount - 1); // COUNT
+    return ClauseFile(MIb, std::move(ClauseContent));
+  }
+
+  void getLiteral(MachineInstr *MI, std::vector<int64_t> &Lits) const {
+    static const unsigned LiteralRegs[] = {
+      AMDGPU::ALU_LITERAL_X,
+      AMDGPU::ALU_LITERAL_Y,
+      AMDGPU::ALU_LITERAL_Z,
+      AMDGPU::ALU_LITERAL_W
+    };
+    const SmallVector<std::pair<MachineOperand *, int64_t>, 3 > Srcs =
+        TII->getSrcs(MI);
+    for (unsigned i = 0, e = Srcs.size(); i < e; ++i) {
+      if (Srcs[i].first->getReg() != AMDGPU::ALU_LITERAL_X)
+        continue;
+      int64_t Imm = Srcs[i].second;
+      std::vector<int64_t>::iterator It =
+          std::find(Lits.begin(), Lits.end(), Imm);
+      if (It != Lits.end()) {
+        unsigned Index = It - Lits.begin();
+        Srcs[i].first->setReg(LiteralRegs[Index]);
+      } else {
+        assert(Lits.size() < 4 && "Too many literals in Instruction Group");
+        Srcs[i].first->setReg(LiteralRegs[Lits.size()]);
+        Lits.push_back(Imm);
+      }
+    }
+  }
+
+  MachineBasicBlock::iterator insertLiterals(
+      MachineBasicBlock::iterator InsertPos,
+      const std::vector<unsigned> &Literals) const {
+    MachineBasicBlock *MBB = InsertPos->getParent();
+    for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
+      unsigned LiteralPair0 = Literals[i];
+      unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
+      InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(),
+          TII->get(AMDGPU::LITERALS))
+          .addImm(LiteralPair0)
+          .addImm(LiteralPair1);
+    }
+    return InsertPos;
+  }
+
+  ClauseFile
+  MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
+      const {
+    MachineBasicBlock::iterator ClauseHead = I;
+    std::vector<MachineInstr *> ClauseContent;
+    I++;
+    for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) {
+      if (IsTrivialInst(I)) {
+        ++I;
+        continue;
+      }
+      if (!I->isBundle() && !TII->isALUInstr(I->getOpcode()))
+        break;
+      std::vector<int64_t> Literals;
+      if (I->isBundle()) {
+        MachineInstr *DeleteMI = I;
+        MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
+        while (++BI != E && BI->isBundledWithPred()) {
+          BI->unbundleFromPred();
+          for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i) {
+            MachineOperand &MO = BI->getOperand(i);
+            if (MO.isReg() && MO.isInternalRead())
+              MO.setIsInternalRead(false);
+          }
+          getLiteral(BI, Literals);
+          ClauseContent.push_back(BI);
+        }
+        I = BI;
+        DeleteMI->eraseFromParent();
+      } else {
+        getLiteral(I, Literals);
+        ClauseContent.push_back(I);
+        I++;
+      }
+      for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
+        unsigned literal0 = Literals[i];
+        unsigned literal2 = (i + 1 < e)?Literals[i + 1]:0;
+        MachineInstr *MILit = BuildMI(MBB, I, I->getDebugLoc(),
+            TII->get(AMDGPU::LITERALS))
+            .addImm(literal0)
+            .addImm(literal2);
+        ClauseContent.push_back(MILit);
+      }
+    }
+    assert(ClauseContent.size() < 128 && "ALU clause is too big");
+    ClauseHead->getOperand(7).setImm(ClauseContent.size() - 1);
+    return ClauseFile(ClauseHead, std::move(ClauseContent));
+  }
+
+  void
+  EmitFetchClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
+      unsigned &CfCount) {
+    CounterPropagateAddr(Clause.first, CfCount);
+    MachineBasicBlock *BB = Clause.first->getParent();
+    BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::FETCH_CLAUSE))
+        .addImm(CfCount);
+    for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
+      BB->splice(InsertPos, BB, Clause.second[i]);
+    }
+    CfCount += 2 * Clause.second.size();
+  }
+
+  void
+  EmitALUClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
+      unsigned &CfCount) {
+    Clause.first->getOperand(0).setImm(0);
+    CounterPropagateAddr(Clause.first, CfCount);
+    MachineBasicBlock *BB = Clause.first->getParent();
+    BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::ALU_CLAUSE))
+        .addImm(CfCount);
+    for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
+      BB->splice(InsertPos, BB, Clause.second[i]);
+    }
+    CfCount += Clause.second.size();
+  }
+
+  void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const {
+    MI->getOperand(0).setImm(Addr + MI->getOperand(0).getImm());
+  }
+  void CounterPropagateAddr(const std::set<MachineInstr *> &MIs,
+                            unsigned Addr) const {
+    for (MachineInstr *MI : MIs) {
+      CounterPropagateAddr(MI, Addr);
+    }
+  }
+
+public:
+  R600ControlFlowFinalizer(TargetMachine &tm)
+      : MachineFunctionPass(ID), TII(nullptr), TRI(nullptr), ST(nullptr) {}
+
+  bool runOnMachineFunction(MachineFunction &MF) override {
+    ST = &MF.getSubtarget<AMDGPUSubtarget>();
+    MaxFetchInst = ST->getTexVTXClauseSize();
+    TII = static_cast<const R600InstrInfo *>(ST->getInstrInfo());
+    TRI = static_cast<const R600RegisterInfo *>(ST->getRegisterInfo());
+    R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
+
+    CFStack CFStack(ST, MFI->getShaderType());
+    for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
+        ++MB) {
+      MachineBasicBlock &MBB = *MB;
+      unsigned CfCount = 0;
+      std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack;
+      std::vector<MachineInstr * > IfThenElseStack;
+      if (MFI->getShaderType() == ShaderType::VERTEX) {
+        BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
+            getHWInstrDesc(CF_CALL_FS));
+        CfCount++;
+      }
+      std::vector<ClauseFile> FetchClauses, AluClauses;
+      std::vector<MachineInstr *> LastAlu(1);
+      std::vector<MachineInstr *> ToPopAfter;
+      
+      for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+          I != E;) {
+        if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) {
+          DEBUG(dbgs() << CfCount << ":"; I->dump(););
+          FetchClauses.push_back(MakeFetchClause(MBB, I));
+          CfCount++;
+          LastAlu.back() = nullptr;
+          continue;
+        }
+
+        MachineBasicBlock::iterator MI = I;
+        if (MI->getOpcode() != AMDGPU::ENDIF)
+          LastAlu.back() = nullptr;
+        if (MI->getOpcode() == AMDGPU::CF_ALU)
+          LastAlu.back() = MI;
+        I++;
+        bool RequiresWorkAround =
+            CFStack.requiresWorkAroundForInst(MI->getOpcode());
+        switch (MI->getOpcode()) {
+        case AMDGPU::CF_ALU_PUSH_BEFORE:
+          if (RequiresWorkAround) {
+            DEBUG(dbgs() << "Applying bug work-around for ALU_PUSH_BEFORE\n");
+            BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_EG))
+                .addImm(CfCount + 1)
+                .addImm(1);
+            MI->setDesc(TII->get(AMDGPU::CF_ALU));
+            CfCount++;
+            CFStack.pushBranch(AMDGPU::CF_PUSH_EG);
+          } else
+            CFStack.pushBranch(AMDGPU::CF_ALU_PUSH_BEFORE);
+
+        case AMDGPU::CF_ALU:
+          I = MI;
+          AluClauses.push_back(MakeALUClause(MBB, I));
+          DEBUG(dbgs() << CfCount << ":"; MI->dump(););
+          CfCount++;
+          break;
+        case AMDGPU::WHILELOOP: {
+          CFStack.pushLoop();
+          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+              getHWInstrDesc(CF_WHILE_LOOP))
+              .addImm(1);
+          std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
+              std::set<MachineInstr *>());
+          Pair.second.insert(MIb);
+          LoopStack.push_back(std::move(Pair));
+          MI->eraseFromParent();
+          CfCount++;
+          break;
+        }
+        case AMDGPU::ENDLOOP: {
+          CFStack.popLoop();
+          std::pair<unsigned, std::set<MachineInstr *> > Pair =
+              std::move(LoopStack.back());
+          LoopStack.pop_back();
+          CounterPropagateAddr(Pair.second, CfCount);
+          BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
+              .addImm(Pair.first + 1);
+          MI->eraseFromParent();
+          CfCount++;
+          break;
+        }
+        case AMDGPU::IF_PREDICATE_SET: {
+          LastAlu.push_back(nullptr);
+          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+              getHWInstrDesc(CF_JUMP))
+              .addImm(0)
+              .addImm(0);
+          IfThenElseStack.push_back(MIb);
+          DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
+          MI->eraseFromParent();
+          CfCount++;
+          break;
+        }
+        case AMDGPU::ELSE: {
+          MachineInstr * JumpInst = IfThenElseStack.back();
+          IfThenElseStack.pop_back();
+          CounterPropagateAddr(JumpInst, CfCount);
+          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+              getHWInstrDesc(CF_ELSE))
+              .addImm(0)
+              .addImm(0);
+          DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
+          IfThenElseStack.push_back(MIb);
+          MI->eraseFromParent();
+          CfCount++;
+          break;
+        }
+        case AMDGPU::ENDIF: {
+          CFStack.popBranch();
+          if (LastAlu.back()) {
+            ToPopAfter.push_back(LastAlu.back());
+          } else {
+            MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+                getHWInstrDesc(CF_POP))
+                .addImm(CfCount + 1)
+                .addImm(1);
+            (void)MIb;
+            DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
+            CfCount++;
+          }
+          
+          MachineInstr *IfOrElseInst = IfThenElseStack.back();
+          IfThenElseStack.pop_back();
+          CounterPropagateAddr(IfOrElseInst, CfCount);
+          IfOrElseInst->getOperand(1).setImm(1);
+          LastAlu.pop_back();
+          MI->eraseFromParent();
+          break;
+        }
+        case AMDGPU::BREAK: {
+          CfCount ++;
+          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+              getHWInstrDesc(CF_LOOP_BREAK))
+              .addImm(0);
+          LoopStack.back().second.insert(MIb);
+          MI->eraseFromParent();
+          break;
+        }
+        case AMDGPU::CONTINUE: {
+          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+              getHWInstrDesc(CF_LOOP_CONTINUE))
+              .addImm(0);
+          LoopStack.back().second.insert(MIb);
+          MI->eraseFromParent();
+          CfCount++;
+          break;
+        }
+        case AMDGPU::RETURN: {
+          BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END));
+          CfCount++;
+          MI->eraseFromParent();
+          if (CfCount % 2) {
+            BuildMI(MBB, I, MBB.findDebugLoc(MI), TII->get(AMDGPU::PAD));
+            CfCount++;
+          }
+          for (unsigned i = 0, e = FetchClauses.size(); i < e; i++)
+            EmitFetchClause(I, FetchClauses[i], CfCount);
+          for (unsigned i = 0, e = AluClauses.size(); i < e; i++)
+            EmitALUClause(I, AluClauses[i], CfCount);
+        }
+        default:
+          if (TII->isExport(MI->getOpcode())) {
+            DEBUG(dbgs() << CfCount << ":"; MI->dump(););
+            CfCount++;
+          }
+          break;
+        }
+      }
+      for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
+        MachineInstr *Alu = ToPopAfter[i];
+        BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu),
+            TII->get(AMDGPU::CF_ALU_POP_AFTER))
+            .addImm(Alu->getOperand(0).getImm())
+            .addImm(Alu->getOperand(1).getImm())
+            .addImm(Alu->getOperand(2).getImm())
+            .addImm(Alu->getOperand(3).getImm())
+            .addImm(Alu->getOperand(4).getImm())
+            .addImm(Alu->getOperand(5).getImm())
+            .addImm(Alu->getOperand(6).getImm())
+            .addImm(Alu->getOperand(7).getImm())
+            .addImm(Alu->getOperand(8).getImm());
+        Alu->eraseFromParent();
+      }
+      MFI->StackSize = CFStack.MaxStackSize;
+    }
+
+    return false;
+  }
+
+  const char *getPassName() const override {
+    return "R600 Control Flow Finalizer Pass";
+  }
+};
+
+char R600ControlFlowFinalizer::ID = 0;
+
+} // end anonymous namespace
+
+
+llvm::FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) {
+  return new R600ControlFlowFinalizer(TM);
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600Defines.h b/contrib/llvm/lib/Target/AMDGPU/R600Defines.h
new file mode 100644
index 0000000..6ff0a22
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/R600Defines.h
@@ -0,0 +1,171 @@
+//===-- R600Defines.h - R600 Helper Macros ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_R600_R600DEFINES_H
+#define LLVM_LIB_TARGET_R600_R600DEFINES_H
+
+#include "llvm/MC/MCRegisterInfo.h"
+
+// Operand Flags
+#define MO_FLAG_CLAMP (1 << 0)
+#define MO_FLAG_NEG   (1 << 1)
+#define MO_FLAG_ABS   (1 << 2)
+#define MO_FLAG_MASK  (1 << 3)
+#define MO_FLAG_PUSH  (1 << 4)
+#define MO_FLAG_NOT_LAST  (1 << 5)
+#define MO_FLAG_LAST  (1 << 6)
+#define NUM_MO_FLAGS 7
+
+/// \brief Helper for getting the operand index for the instruction flags
+/// operand.
+#define GET_FLAG_OPERAND_IDX(Flags) (((Flags) >> 7) & 0x3)
+
+namespace R600_InstFlag {
+  enum TIF {
+    TRANS_ONLY = (1 << 0),
+    TEX = (1 << 1),
+    REDUCTION = (1 << 2),
+    FC = (1 << 3),
+    TRIG = (1 << 4),
+    OP3 = (1 << 5),
+    VECTOR = (1 << 6),
+    //FlagOperand bits 7, 8
+    NATIVE_OPERANDS = (1 << 9),
+    OP1 = (1 << 10),
+    OP2 = (1 << 11),
+    VTX_INST  = (1 << 12),
+    TEX_INST = (1 << 13),
+    ALU_INST = (1 << 14),
+    LDS_1A = (1 << 15),
+    LDS_1A1D = (1 << 16),
+    IS_EXPORT = (1 << 17),
+    LDS_1A2D = (1 << 18)
+  };
+} // namespace R600_InstFlag
+
+#define HAS_NATIVE_OPERANDS(Flags) ((Flags) & R600_InstFlag::NATIVE_OPERANDS)
+
+/// \brief Defines for extracting register information from register encoding
+#define HW_REG_MASK 0x1ff
+#define HW_CHAN_SHIFT 9
+
+#define GET_REG_CHAN(reg) ((reg) >> HW_CHAN_SHIFT)
+#define GET_REG_INDEX(reg) ((reg) & HW_REG_MASK)
+
+#define IS_VTX(desc) ((desc).TSFlags & R600_InstFlag::VTX_INST)
+#define IS_TEX(desc) ((desc).TSFlags & R600_InstFlag::TEX_INST)
+
+namespace OpName {
+
+  enum VecOps {
+    UPDATE_EXEC_MASK_X,
+    UPDATE_PREDICATE_X,
+    WRITE_X,
+    OMOD_X,
+    DST_REL_X,
+    CLAMP_X,
+    SRC0_X,
+    SRC0_NEG_X,
+    SRC0_REL_X,
+    SRC0_ABS_X,
+    SRC0_SEL_X,
+    SRC1_X,
+    SRC1_NEG_X,
+    SRC1_REL_X,
+    SRC1_ABS_X,
+    SRC1_SEL_X,
+    PRED_SEL_X,
+    UPDATE_EXEC_MASK_Y,
+    UPDATE_PREDICATE_Y,
+    WRITE_Y,
+    OMOD_Y,
+    DST_REL_Y,
+    CLAMP_Y,
+    SRC0_Y,
+    SRC0_NEG_Y,
+    SRC0_REL_Y,
+    SRC0_ABS_Y,
+    SRC0_SEL_Y,
+    SRC1_Y,
+    SRC1_NEG_Y,
+    SRC1_REL_Y,
+    SRC1_ABS_Y,
+    SRC1_SEL_Y,
+    PRED_SEL_Y,
+    UPDATE_EXEC_MASK_Z,
+    UPDATE_PREDICATE_Z,
+    WRITE_Z,
+    OMOD_Z,
+    DST_REL_Z,
+    CLAMP_Z,
+    SRC0_Z,
+    SRC0_NEG_Z,
+    SRC0_REL_Z,
+    SRC0_ABS_Z,
+    SRC0_SEL_Z,
+    SRC1_Z,
+    SRC1_NEG_Z,
+    SRC1_REL_Z,
+    SRC1_ABS_Z,
+    SRC1_SEL_Z,
+    PRED_SEL_Z,
+    UPDATE_EXEC_MASK_W,
+    UPDATE_PREDICATE_W,
+    WRITE_W,
+    OMOD_W,
+    DST_REL_W,
+    CLAMP_W,
+    SRC0_W,
+    SRC0_NEG_W,
+    SRC0_REL_W,
+    SRC0_ABS_W,
+    SRC0_SEL_W,
+    SRC1_W,
+    SRC1_NEG_W,
+    SRC1_REL_W,
+    SRC1_ABS_W,
+    SRC1_SEL_W,
+    PRED_SEL_W,
+    IMM_0,
+    IMM_1,
+    VEC_COUNT
+ };
+
+} // namespace OpName
+
+//===----------------------------------------------------------------------===//
+// Config register definitions
+//===----------------------------------------------------------------------===//
+
+#define R_02880C_DB_SHADER_CONTROL                    0x02880C
+#define   S_02880C_KILL_ENABLE(x)                      (((x) & 0x1) << 6)
+
+// These fields are the same for all shader types and families.
+#define   S_NUM_GPRS(x)                         (((x) & 0xFF) << 0)
+#define   S_STACK_SIZE(x)                       (((x) & 0xFF) << 8)
+//===----------------------------------------------------------------------===//
+// R600, R700 Registers
+//===----------------------------------------------------------------------===//
+
+#define R_028850_SQ_PGM_RESOURCES_PS                 0x028850
+#define R_028868_SQ_PGM_RESOURCES_VS                 0x028868
+
+//===----------------------------------------------------------------------===//
+// Evergreen, Northern Islands Registers
+//===----------------------------------------------------------------------===//
+
+#define R_028844_SQ_PGM_RESOURCES_PS                 0x028844
+#define R_028860_SQ_PGM_RESOURCES_VS                 0x028860
+#define R_028878_SQ_PGM_RESOURCES_GS                 0x028878
+#define R_0288D4_SQ_PGM_RESOURCES_LS                 0x0288d4
+
+#define R_0288E8_SQ_LDS_ALLOC                        0x0288E8
+
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp b/contrib/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
new file mode 100644
index 0000000..fdc2030
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
@@ -0,0 +1,336 @@
+//===-- R600EmitClauseMarkers.cpp - Emit CF_ALU ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Add CF_ALU. R600 Alu instructions are grouped in clause which can hold
+/// 128 Alu instructions ; these instructions can access up to 4 prefetched
+/// 4 lines of 16 registers from constant buffers. Such ALU clauses are
+/// initiated by CF_ALU instructions.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "R600Defines.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "R600RegisterInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+namespace llvm {
+  void initializeR600EmitClauseMarkersPass(PassRegistry&);
+}
+
+namespace {
+
+class R600EmitClauseMarkers : public MachineFunctionPass {
+
+private:
+  const R600InstrInfo *TII;
+  int Address;
+
+  unsigned OccupiedDwords(MachineInstr *MI) const {
+    switch (MI->getOpcode()) {
+    case AMDGPU::INTERP_PAIR_XY:
+    case AMDGPU::INTERP_PAIR_ZW:
+    case AMDGPU::INTERP_VEC_LOAD:
+    case AMDGPU::DOT_4:
+      return 4;
+    case AMDGPU::KILL:
+      return 0;
+    default:
+      break;
+    }
+
+    // These will be expanded to two ALU instructions in the
+    // ExpandSpecialInstructions pass.
+    if (TII->isLDSRetInstr(MI->getOpcode()))
+      return 2;
+
+    if(TII->isVector(*MI) ||
+        TII->isCubeOp(MI->getOpcode()) ||
+        TII->isReductionOp(MI->getOpcode()))
+      return 4;
+
+    unsigned NumLiteral = 0;
+    for (MachineInstr::mop_iterator It = MI->operands_begin(),
+        E = MI->operands_end(); It != E; ++It) {
+      MachineOperand &MO = *It;
+      if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
+        ++NumLiteral;
+    }
+    return 1 + NumLiteral;
+  }
+
+  bool isALU(const MachineInstr *MI) const {
+    if (TII->isALUInstr(MI->getOpcode()))
+      return true;
+    if (TII->isVector(*MI) || TII->isCubeOp(MI->getOpcode()))
+      return true;
+    switch (MI->getOpcode()) {
+    case AMDGPU::PRED_X:
+    case AMDGPU::INTERP_PAIR_XY:
+    case AMDGPU::INTERP_PAIR_ZW:
+    case AMDGPU::INTERP_VEC_LOAD:
+    case AMDGPU::COPY:
+    case AMDGPU::DOT_4:
+      return true;
+    default:
+      return false;
+    }
+  }
+
+  bool IsTrivialInst(MachineInstr *MI) const {
+    switch (MI->getOpcode()) {
+    case AMDGPU::KILL:
+    case AMDGPU::RETURN:
+    case AMDGPU::IMPLICIT_DEF:
+      return true;
+    default:
+      return false;
+    }
+  }
+
+  std::pair<unsigned, unsigned> getAccessedBankLine(unsigned Sel) const {
+    // Sel is (512 + (kc_bank << 12) + ConstIndex) << 2
+    // (See also R600ISelLowering.cpp)
+    // ConstIndex value is in [0, 4095];
+    return std::pair<unsigned, unsigned>(
+        ((Sel >> 2) - 512) >> 12, // KC_BANK
+        // Line Number of ConstIndex
+        // A line contains 16 constant registers however KCX bank can lock
+        // two line at the same time ; thus we want to get an even line number.
+        // Line number can be retrieved with (>>4), using (>>5) <<1 generates
+        // an even number.
+        ((((Sel >> 2) - 512) & 4095) >> 5) << 1);
+  }
+
+  bool SubstituteKCacheBank(MachineInstr *MI,
+      std::vector<std::pair<unsigned, unsigned> > &CachedConsts,
+      bool UpdateInstr = true) const {
+    std::vector<std::pair<unsigned, unsigned> > UsedKCache;
+
+    if (!TII->isALUInstr(MI->getOpcode()) && MI->getOpcode() != AMDGPU::DOT_4)
+      return true;
+
+    const SmallVectorImpl<std::pair<MachineOperand *, int64_t> > &Consts =
+        TII->getSrcs(MI);
+    assert((TII->isALUInstr(MI->getOpcode()) ||
+        MI->getOpcode() == AMDGPU::DOT_4) && "Can't assign Const");
+    for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
+      if (Consts[i].first->getReg() != AMDGPU::ALU_CONST)
+        continue;
+      unsigned Sel = Consts[i].second;
+      unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31;
+      unsigned KCacheIndex = Index * 4 + Chan;
+      const std::pair<unsigned, unsigned> &BankLine = getAccessedBankLine(Sel);
+      if (CachedConsts.empty()) {
+        CachedConsts.push_back(BankLine);
+        UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex));
+        continue;
+      }
+      if (CachedConsts[0] == BankLine) {
+        UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex));
+        continue;
+      }
+      if (CachedConsts.size() == 1) {
+        CachedConsts.push_back(BankLine);
+        UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex));
+        continue;
+      }
+      if (CachedConsts[1] == BankLine) {
+        UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex));
+        continue;
+      }
+      return false;
+    }
+
+    if (!UpdateInstr)
+      return true;
+
+    for (unsigned i = 0, j = 0, n = Consts.size(); i < n; ++i) {
+      if (Consts[i].first->getReg() != AMDGPU::ALU_CONST)
+        continue;
+      switch(UsedKCache[j].first) {
+      case 0:
+        Consts[i].first->setReg(
+            AMDGPU::R600_KC0RegClass.getRegister(UsedKCache[j].second));
+        break;
+      case 1:
+        Consts[i].first->setReg(
+            AMDGPU::R600_KC1RegClass.getRegister(UsedKCache[j].second));
+        break;
+      default:
+        llvm_unreachable("Wrong Cache Line");
+      }
+      j++;
+    }
+    return true;
+  }
+
+  bool canClauseLocalKillFitInClause(
+                        unsigned AluInstCount,
+                        std::vector<std::pair<unsigned, unsigned> > KCacheBanks,
+                        MachineBasicBlock::iterator Def,
+                        MachineBasicBlock::iterator BBEnd) {
+    const R600RegisterInfo &TRI = TII->getRegisterInfo();
+    for (MachineInstr::const_mop_iterator
+           MOI = Def->operands_begin(),
+           MOE = Def->operands_end(); MOI != MOE; ++MOI) {
+      if (!MOI->isReg() || !MOI->isDef() ||
+          TRI.isPhysRegLiveAcrossClauses(MOI->getReg()))
+        continue;
+
+      // Def defines a clause local register, so check that its use will fit
+      // in the clause.
+      unsigned LastUseCount = 0;
+      for (MachineBasicBlock::iterator UseI = Def; UseI != BBEnd; ++UseI) {
+        AluInstCount += OccupiedDwords(UseI);
+        // Make sure we won't need to end the clause due to KCache limitations.
+        if (!SubstituteKCacheBank(UseI, KCacheBanks, false))
+          return false;
+
+        // We have reached the maximum instruction limit before finding the
+        // use that kills this register, so we cannot use this def in the
+        // current clause.
+        if (AluInstCount >= TII->getMaxAlusPerClause())
+          return false;
+
+        // Register kill flags have been cleared by the time we get to this
+        // pass, but it is safe to assume that all uses of this register
+        // occur in the same basic block as its definition, because
+        // it is illegal for the scheduler to schedule them in
+        // different blocks.
+        if (UseI->findRegisterUseOperandIdx(MOI->getReg()))
+          LastUseCount = AluInstCount;
+
+        if (UseI != Def && UseI->findRegisterDefOperandIdx(MOI->getReg()) != -1)
+          break;
+      }
+      if (LastUseCount)
+        return LastUseCount <= TII->getMaxAlusPerClause();
+      llvm_unreachable("Clause local register live at end of clause.");
+    }
+    return true;
+  }
+
+  MachineBasicBlock::iterator
+  MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) {
+    MachineBasicBlock::iterator ClauseHead = I;
+    std::vector<std::pair<unsigned, unsigned> > KCacheBanks;
+    bool PushBeforeModifier = false;
+    unsigned AluInstCount = 0;
+    for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
+      if (IsTrivialInst(I))
+        continue;
+      if (!isALU(I))
+        break;
+      if (AluInstCount > TII->getMaxAlusPerClause())
+        break;
+      if (I->getOpcode() == AMDGPU::PRED_X) {
+        // We put PRED_X in its own clause to ensure that ifcvt won't create
+        // clauses with more than 128 insts.
+        // IfCvt is indeed checking that "then" and "else" branches of an if
+        // statement have less than ~60 insts thus converted clauses can't be
+        // bigger than ~121 insts (predicate setter needs to be in the same
+        // clause as predicated alus).
+        if (AluInstCount > 0)
+          break;
+        if (TII->getFlagOp(I).getImm() & MO_FLAG_PUSH)
+          PushBeforeModifier = true;
+        AluInstCount ++;
+        continue;
+      }
+      // XXX: GROUP_BARRIER instructions cannot be in the same ALU clause as:
+      //
+      // * KILL or INTERP instructions
+      // * Any instruction that sets UPDATE_EXEC_MASK or UPDATE_PRED bits
+      // * Uses waterfalling (i.e. INDEX_MODE = AR.X)
+      //
+      // XXX: These checks have not been implemented yet.
+      if (TII->mustBeLastInClause(I->getOpcode())) {
+        I++;
+        break;
+      }
+
+      // If this instruction defines a clause local register, make sure
+      // its use can fit in this clause.
+      if (!canClauseLocalKillFitInClause(AluInstCount, KCacheBanks, I, E))
+        break;
+
+      if (!SubstituteKCacheBank(I, KCacheBanks))
+        break;
+      AluInstCount += OccupiedDwords(I);
+    }
+    unsigned Opcode = PushBeforeModifier ?
+        AMDGPU::CF_ALU_PUSH_BEFORE : AMDGPU::CF_ALU;
+    BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode))
+    // We don't use the ADDR field until R600ControlFlowFinalizer pass, where
+    // it is safe to assume it is 0. However if we always put 0 here, the ifcvt
+    // pass may assume that identical ALU clause starter at the beginning of a 
+    // true and false branch can be factorized which is not the case.
+        .addImm(Address++) // ADDR
+        .addImm(KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0
+        .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1
+        .addImm(KCacheBanks.empty()?0:2) // KM0
+        .addImm((KCacheBanks.size() < 2)?0:2) // KM1
+        .addImm(KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0
+        .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].second) // KLINE1
+        .addImm(AluInstCount) // COUNT
+        .addImm(1); // Enabled
+    return I;
+  }
+
+public:
+  static char ID;
+  R600EmitClauseMarkers() : MachineFunctionPass(ID), TII(nullptr), Address(0) {
+
+    initializeR600EmitClauseMarkersPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override {
+    TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+    for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+                                                    BB != BB_E; ++BB) {
+      MachineBasicBlock &MBB = *BB;
+      MachineBasicBlock::iterator I = MBB.begin();
+      if (I->getOpcode() == AMDGPU::CF_ALU)
+        continue; // BB was already parsed
+      for (MachineBasicBlock::iterator E = MBB.end(); I != E;) {
+        if (isALU(I))
+          I = MakeALUClause(MBB, I);
+        else
+          ++I;
+      }
+    }
+    return false;
+  }
+
+  const char *getPassName() const override {
+    return "R600 Emit Clause Markers Pass";
+  }
+};
+
+char R600EmitClauseMarkers::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS_BEGIN(R600EmitClauseMarkers, "emitclausemarkers",
+                      "R600 Emit Clause Markters", false, false)
+INITIALIZE_PASS_END(R600EmitClauseMarkers, "emitclausemarkers",
+                      "R600 Emit Clause Markters", false, false)
+
+llvm::FunctionPass *llvm::createR600EmitClauseMarkers() {
+  return new R600EmitClauseMarkers();
+}
+
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp b/contrib/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
new file mode 100644
index 0000000..211d392e
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
@@ -0,0 +1,349 @@
+//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Vector, Reduction, and Cube instructions need to fill the entire instruction
+/// group to work correctly.  This pass expands these individual instructions
+/// into several instructions that will completely fill the instruction group.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "R600Defines.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "R600RegisterInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+class R600ExpandSpecialInstrsPass : public MachineFunctionPass {
+
+private:
+  static char ID;
+  const R600InstrInfo *TII;
+
+  void SetFlagInNewMI(MachineInstr *NewMI, const MachineInstr *OldMI,
+      unsigned Op);
+
+public:
+  R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
+    TII(nullptr) { }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  const char *getPassName() const override {
+    return "R600 Expand special instructions pass";
+  }
+};
+
+} // End anonymous namespace
+
+char R600ExpandSpecialInstrsPass::ID = 0;
+
+FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
+  return new R600ExpandSpecialInstrsPass(TM);
+}
+
+void R600ExpandSpecialInstrsPass::SetFlagInNewMI(MachineInstr *NewMI,
+    const MachineInstr *OldMI, unsigned Op) {
+  int OpIdx = TII->getOperandIdx(*OldMI, Op);
+  if (OpIdx > -1) {
+    uint64_t Val = OldMI->getOperand(OpIdx).getImm();
+    TII->setImmOperand(NewMI, Op, Val);
+  }
+}
+
+bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
+  TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+  const R600RegisterInfo &TRI = TII->getRegisterInfo();
+
+  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+                                                  BB != BB_E; ++BB) {
+    MachineBasicBlock &MBB = *BB;
+    MachineBasicBlock::iterator I = MBB.begin();
+    while (I != MBB.end()) {
+      MachineInstr &MI = *I;
+      I = std::next(I);
+
+      // Expand LDS_*_RET instructions
+      if (TII->isLDSRetInstr(MI.getOpcode())) {
+        int DstIdx = TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
+        assert(DstIdx != -1);
+        MachineOperand &DstOp = MI.getOperand(DstIdx);
+        MachineInstr *Mov = TII->buildMovInstr(&MBB, I,
+                                               DstOp.getReg(), AMDGPU::OQAP);
+        DstOp.setReg(AMDGPU::OQAP);
+        int LDSPredSelIdx = TII->getOperandIdx(MI.getOpcode(),
+                                           AMDGPU::OpName::pred_sel);
+        int MovPredSelIdx = TII->getOperandIdx(Mov->getOpcode(),
+                                           AMDGPU::OpName::pred_sel);
+        // Copy the pred_sel bit
+        Mov->getOperand(MovPredSelIdx).setReg(
+            MI.getOperand(LDSPredSelIdx).getReg());
+      }
+
+      switch (MI.getOpcode()) {
+      default: break;
+      // Expand PRED_X to one of the PRED_SET instructions.
+      case AMDGPU::PRED_X: {
+        uint64_t Flags = MI.getOperand(3).getImm();
+        // The native opcode used by PRED_X is stored as an immediate in the
+        // third operand.
+        MachineInstr *PredSet = TII->buildDefaultInstruction(MBB, I,
+                                            MI.getOperand(2).getImm(), // opcode
+                                            MI.getOperand(0).getReg(), // dst
+                                            MI.getOperand(1).getReg(), // src0
+                                            AMDGPU::ZERO);             // src1
+        TII->addFlag(PredSet, 0, MO_FLAG_MASK);
+        if (Flags & MO_FLAG_PUSH) {
+          TII->setImmOperand(PredSet, AMDGPU::OpName::update_exec_mask, 1);
+        } else {
+          TII->setImmOperand(PredSet, AMDGPU::OpName::update_pred, 1);
+        }
+        MI.eraseFromParent();
+        continue;
+        }
+
+      case AMDGPU::INTERP_PAIR_XY: {
+        MachineInstr *BMI;
+        unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
+                MI.getOperand(2).getImm());
+
+        for (unsigned Chan = 0; Chan < 4; ++Chan) {
+          unsigned DstReg;
+
+          if (Chan < 2)
+            DstReg = MI.getOperand(Chan).getReg();
+          else
+            DstReg = Chan == 2 ? AMDGPU::T0_Z : AMDGPU::T0_W;
+
+          BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_XY,
+              DstReg, MI.getOperand(3 + (Chan % 2)).getReg(), PReg);
+
+          if (Chan > 0) {
+            BMI->bundleWithPred();
+          }
+          if (Chan >= 2)
+            TII->addFlag(BMI, 0, MO_FLAG_MASK);
+          if (Chan != 3)
+            TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST);
+        }
+
+        MI.eraseFromParent();
+        continue;
+        }
+
+      case AMDGPU::INTERP_PAIR_ZW: {
+        MachineInstr *BMI;
+        unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
+                MI.getOperand(2).getImm());
+
+        for (unsigned Chan = 0; Chan < 4; ++Chan) {
+          unsigned DstReg;
+
+          if (Chan < 2)
+            DstReg = Chan == 0 ? AMDGPU::T0_X : AMDGPU::T0_Y;
+          else
+            DstReg = MI.getOperand(Chan-2).getReg();
+
+          BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_ZW,
+              DstReg, MI.getOperand(3 + (Chan % 2)).getReg(), PReg);
+
+          if (Chan > 0) {
+            BMI->bundleWithPred();
+          }
+          if (Chan < 2)
+            TII->addFlag(BMI, 0, MO_FLAG_MASK);
+          if (Chan != 3)
+            TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST);
+        }
+
+        MI.eraseFromParent();
+        continue;
+        }
+
+      case AMDGPU::INTERP_VEC_LOAD: {
+        const R600RegisterInfo &TRI = TII->getRegisterInfo();
+        MachineInstr *BMI;
+        unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
+                MI.getOperand(1).getImm());
+        unsigned DstReg = MI.getOperand(0).getReg();
+
+        for (unsigned Chan = 0; Chan < 4; ++Chan) {
+          BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_LOAD_P0,
+              TRI.getSubReg(DstReg, TRI.getSubRegFromChannel(Chan)), PReg);
+          if (Chan > 0) {
+            BMI->bundleWithPred();
+          }
+          if (Chan != 3)
+            TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST);
+        }
+
+        MI.eraseFromParent();
+        continue;
+        }
+      case AMDGPU::DOT_4: {
+
+        const R600RegisterInfo &TRI = TII->getRegisterInfo();
+
+        unsigned DstReg = MI.getOperand(0).getReg();
+        unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK;
+
+        for (unsigned Chan = 0; Chan < 4; ++Chan) {
+          bool Mask = (Chan != TRI.getHWRegChan(DstReg));
+          unsigned SubDstReg =
+              AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
+          MachineInstr *BMI =
+              TII->buildSlotOfVectorInstruction(MBB, &MI, Chan, SubDstReg);
+          if (Chan > 0) {
+            BMI->bundleWithPred();
+          }
+          if (Mask) {
+            TII->addFlag(BMI, 0, MO_FLAG_MASK);
+          }
+          if (Chan != 3)
+            TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST);
+          unsigned Opcode = BMI->getOpcode();
+          // While not strictly necessary from hw point of view, we force
+          // all src operands of a dot4 inst to belong to the same slot.
+          unsigned Src0 = BMI->getOperand(
+              TII->getOperandIdx(Opcode, AMDGPU::OpName::src0))
+              .getReg();
+          unsigned Src1 = BMI->getOperand(
+              TII->getOperandIdx(Opcode, AMDGPU::OpName::src1))
+              .getReg();
+          (void) Src0;
+          (void) Src1;
+          if ((TRI.getEncodingValue(Src0) & 0xff) < 127 &&
+              (TRI.getEncodingValue(Src1) & 0xff) < 127)
+            assert(TRI.getHWRegChan(Src0) == TRI.getHWRegChan(Src1));
+        }
+        MI.eraseFromParent();
+        continue;
+      }
+      }
+
+      bool IsReduction = TII->isReductionOp(MI.getOpcode());
+      bool IsVector = TII->isVector(MI);
+      bool IsCube = TII->isCubeOp(MI.getOpcode());
+      if (!IsReduction && !IsVector && !IsCube) {
+        continue;
+      }
+
+      // Expand the instruction
+      //
+      // Reduction instructions:
+      // T0_X = DP4 T1_XYZW, T2_XYZW
+      // becomes:
+      // TO_X = DP4 T1_X, T2_X
+      // TO_Y (write masked) = DP4 T1_Y, T2_Y
+      // TO_Z (write masked) = DP4 T1_Z, T2_Z
+      // TO_W (write masked) = DP4 T1_W, T2_W
+      //
+      // Vector instructions:
+      // T0_X = MULLO_INT T1_X, T2_X
+      // becomes:
+      // T0_X = MULLO_INT T1_X, T2_X
+      // T0_Y (write masked) = MULLO_INT T1_X, T2_X
+      // T0_Z (write masked) = MULLO_INT T1_X, T2_X
+      // T0_W (write masked) = MULLO_INT T1_X, T2_X
+      //
+      // Cube instructions:
+      // T0_XYZW = CUBE T1_XYZW
+      // becomes:
+      // TO_X = CUBE T1_Z, T1_Y
+      // T0_Y = CUBE T1_Z, T1_X
+      // T0_Z = CUBE T1_X, T1_Z
+      // T0_W = CUBE T1_Y, T1_Z
+      for (unsigned Chan = 0; Chan < 4; Chan++) {
+        unsigned DstReg = MI.getOperand(
+                            TII->getOperandIdx(MI, AMDGPU::OpName::dst)).getReg();
+        unsigned Src0 = MI.getOperand(
+                           TII->getOperandIdx(MI, AMDGPU::OpName::src0)).getReg();
+        unsigned Src1 = 0;
+
+        // Determine the correct source registers
+        if (!IsCube) {
+          int Src1Idx = TII->getOperandIdx(MI, AMDGPU::OpName::src1);
+          if (Src1Idx != -1) {
+            Src1 = MI.getOperand(Src1Idx).getReg();
+          }
+        }
+        if (IsReduction) {
+          unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
+          Src0 = TRI.getSubReg(Src0, SubRegIndex);
+          Src1 = TRI.getSubReg(Src1, SubRegIndex);
+        } else if (IsCube) {
+          static const int CubeSrcSwz[] = {2, 2, 0, 1};
+          unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]);
+          unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]);
+          Src1 = TRI.getSubReg(Src0, SubRegIndex1);
+          Src0 = TRI.getSubReg(Src0, SubRegIndex0);
+        }
+
+        // Determine the correct destination registers;
+        bool Mask = false;
+        bool NotLast = true;
+        if (IsCube) {
+          unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
+          DstReg = TRI.getSubReg(DstReg, SubRegIndex);
+        } else {
+          // Mask the write if the original instruction does not write to
+          // the current Channel.
+          Mask = (Chan != TRI.getHWRegChan(DstReg));
+          unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK;
+          DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
+        }
+
+        // Set the IsLast bit
+        NotLast = (Chan != 3 );
+
+        // Add the new instruction
+        unsigned Opcode = MI.getOpcode();
+        switch (Opcode) {
+        case AMDGPU::CUBE_r600_pseudo:
+          Opcode = AMDGPU::CUBE_r600_real;
+          break;
+        case AMDGPU::CUBE_eg_pseudo:
+          Opcode = AMDGPU::CUBE_eg_real;
+          break;
+        default:
+          break;
+        }
+
+        MachineInstr *NewMI =
+          TII->buildDefaultInstruction(MBB, I, Opcode, DstReg, Src0, Src1);
+
+        if (Chan != 0)
+          NewMI->bundleWithPred();
+        if (Mask) {
+          TII->addFlag(NewMI, 0, MO_FLAG_MASK);
+        }
+        if (NotLast) {
+          TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
+        }
+        SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::clamp);
+        SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::literal);
+        SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src0_abs);
+        SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src1_abs);
+        SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src0_neg);
+        SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src1_neg);
+      }
+      MI.eraseFromParent();
+    }
+  }
+  return false;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
new file mode 100644
index 0000000..8357b6d
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -0,0 +1,2286 @@
+//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Custom DAG lowering for R600
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600ISelLowering.h"
+#include "AMDGPUFrameLowering.h"
+#include "AMDGPUIntrinsicInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "R600Defines.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Function.h"
+
+using namespace llvm;
+
+R600TargetLowering::R600TargetLowering(TargetMachine &TM,
+                                       const AMDGPUSubtarget &STI)
+    : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
+  addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
+  addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
+  addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
+  addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
+  addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
+  addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
+
+  computeRegisterProperties(STI.getRegisterInfo());
+
+  // Set condition code actions
+  setCondCodeAction(ISD::SETO,   MVT::f32, Expand);
+  setCondCodeAction(ISD::SETUO,  MVT::f32, Expand);
+  setCondCodeAction(ISD::SETLT,  MVT::f32, Expand);
+  setCondCodeAction(ISD::SETLE,  MVT::f32, Expand);
+  setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
+  setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
+  setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
+  setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
+  setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
+  setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
+  setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
+  setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
+
+  setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
+  setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
+  setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
+  setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
+
+  setOperationAction(ISD::FCOS, MVT::f32, Custom);
+  setOperationAction(ISD::FSIN, MVT::f32, Custom);
+
+  setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
+  setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
+
+  setOperationAction(ISD::BR_CC, MVT::i32, Expand);
+  setOperationAction(ISD::BR_CC, MVT::f32, Expand);
+  setOperationAction(ISD::BRCOND, MVT::Other, Custom);
+
+  setOperationAction(ISD::FSUB, MVT::f32, Expand);
+
+  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
+  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
+
+  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+
+  setOperationAction(ISD::SETCC, MVT::i32, Expand);
+  setOperationAction(ISD::SETCC, MVT::f32, Expand);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
+  setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
+
+  setOperationAction(ISD::SELECT, MVT::i32, Expand);
+  setOperationAction(ISD::SELECT, MVT::f32, Expand);
+  setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
+  setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
+
+  // ADD, SUB overflow.
+  // TODO: turn these into Legal?
+  if (Subtarget->hasCARRY())
+    setOperationAction(ISD::UADDO, MVT::i32, Custom);
+
+  if (Subtarget->hasBORROW())
+    setOperationAction(ISD::USUBO, MVT::i32, Custom);
+
+  // Expand sign extension of vectors
+  if (!Subtarget->hasBFE())
+    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
+
+  if (!Subtarget->hasBFE())
+    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
+
+  if (!Subtarget->hasBFE())
+    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
+
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
+
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
+
+
+  // Legalize loads and stores to the private address space.
+  setOperationAction(ISD::LOAD, MVT::i32, Custom);
+  setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
+  setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
+
+  // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
+  // spaces, so it is custom lowered to handle those where it isn't.
+  for (MVT VT : MVT::integer_valuetypes()) {
+    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
+    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom);
+    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom);
+
+    setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
+    setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom);
+    setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom);
+
+    setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
+    setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom);
+    setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
+  }
+
+  setOperationAction(ISD::STORE, MVT::i8, Custom);
+  setOperationAction(ISD::STORE, MVT::i32, Custom);
+  setOperationAction(ISD::STORE, MVT::v2i32, Custom);
+  setOperationAction(ISD::STORE, MVT::v4i32, Custom);
+  setTruncStoreAction(MVT::i32, MVT::i8, Custom);
+  setTruncStoreAction(MVT::i32, MVT::i16, Custom);
+
+  setOperationAction(ISD::LOAD, MVT::i32, Custom);
+  setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
+  setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
+
+  setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
+  setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
+  setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
+  setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
+
+  setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
+  setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
+  setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
+  setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
+
+  setTargetDAGCombine(ISD::FP_ROUND);
+  setTargetDAGCombine(ISD::FP_TO_SINT);
+  setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
+  setTargetDAGCombine(ISD::SELECT_CC);
+  setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
+
+  // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
+  //  to be Legal/Custom in order to avoid library calls.
+  setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
+  setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
+  setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
+
+  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+
+  const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
+  for (MVT VT : ScalarIntVTs) {
+    setOperationAction(ISD::ADDC, VT, Expand);
+    setOperationAction(ISD::SUBC, VT, Expand);
+    setOperationAction(ISD::ADDE, VT, Expand);
+    setOperationAction(ISD::SUBE, VT, Expand);
+  }
+
+  setSchedulingPreference(Sched::Source);
+}
+
+MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
+    MachineInstr * MI, MachineBasicBlock * BB) const {
+  MachineFunction * MF = BB->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  MachineBasicBlock::iterator I = *MI;
+  const R600InstrInfo *TII =
+      static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
+
+  switch (MI->getOpcode()) {
+  default:
+    // Replace LDS_*_RET instruction that don't have any uses with the
+    // equivalent LDS_*_NORET instruction.
+    if (TII->isLDSRetInstr(MI->getOpcode())) {
+      int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
+      assert(DstIdx != -1);
+      MachineInstrBuilder NewMI;
+      // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
+      //        LDS_1A2D support and remove this special case.
+      if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()) ||
+           MI->getOpcode() == AMDGPU::LDS_CMPST_RET)
+        return BB;
+
+      NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
+                      TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
+      for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
+        NewMI.addOperand(MI->getOperand(i));
+      }
+    } else {
+      return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
+    }
+    break;
+  case AMDGPU::CLAMP_R600: {
+    MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
+                                                   AMDGPU::MOV,
+                                                   MI->getOperand(0).getReg(),
+                                                   MI->getOperand(1).getReg());
+    TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
+    break;
+  }
+
+  case AMDGPU::FABS_R600: {
+    MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
+                                                    AMDGPU::MOV,
+                                                    MI->getOperand(0).getReg(),
+                                                    MI->getOperand(1).getReg());
+    TII->addFlag(NewMI, 0, MO_FLAG_ABS);
+    break;
+  }
+
+  case AMDGPU::FNEG_R600: {
+    MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
+                                                    AMDGPU::MOV,
+                                                    MI->getOperand(0).getReg(),
+                                                    MI->getOperand(1).getReg());
+    TII->addFlag(NewMI, 0, MO_FLAG_NEG);
+    break;
+  }
+
+  case AMDGPU::MASK_WRITE: {
+    unsigned maskedRegister = MI->getOperand(0).getReg();
+    assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
+    MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
+    TII->addFlag(defInstr, 0, MO_FLAG_MASK);
+    break;
+  }
+
+  case AMDGPU::MOV_IMM_F32:
+    TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
+                     MI->getOperand(1).getFPImm()->getValueAPF()
+                         .bitcastToAPInt().getZExtValue());
+    break;
+  case AMDGPU::MOV_IMM_I32:
+    TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
+                     MI->getOperand(1).getImm());
+    break;
+  case AMDGPU::CONST_COPY: {
+    MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
+        MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
+    TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
+        MI->getOperand(1).getImm());
+    break;
+  }
+
+  case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
+  case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
+  case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
+    unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
+
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
+            .addOperand(MI->getOperand(0))
+            .addOperand(MI->getOperand(1))
+            .addImm(EOP); // Set End of program bit
+    break;
+  }
+
+  case AMDGPU::TXD: {
+    unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
+    unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
+    MachineOperand &RID = MI->getOperand(4);
+    MachineOperand &SID = MI->getOperand(5);
+    unsigned TextureId = MI->getOperand(6).getImm();
+    unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
+    unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
+
+    switch (TextureId) {
+    case 5: // Rect
+      CTX = CTY = 0;
+      break;
+    case 6: // Shadow1D
+      SrcW = SrcZ;
+      break;
+    case 7: // Shadow2D
+      SrcW = SrcZ;
+      break;
+    case 8: // ShadowRect
+      CTX = CTY = 0;
+      SrcW = SrcZ;
+      break;
+    case 9: // 1DArray
+      SrcZ = SrcY;
+      CTZ = 0;
+      break;
+    case 10: // 2DArray
+      CTZ = 0;
+      break;
+    case 11: // Shadow1DArray
+      SrcZ = SrcY;
+      CTZ = 0;
+      break;
+    case 12: // Shadow2DArray
+      CTZ = 0;
+      break;
+    }
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
+            .addOperand(MI->getOperand(3))
+            .addImm(SrcX)
+            .addImm(SrcY)
+            .addImm(SrcZ)
+            .addImm(SrcW)
+            .addImm(0)
+            .addImm(0)
+            .addImm(0)
+            .addImm(0)
+            .addImm(1)
+            .addImm(2)
+            .addImm(3)
+            .addOperand(RID)
+            .addOperand(SID)
+            .addImm(CTX)
+            .addImm(CTY)
+            .addImm(CTZ)
+            .addImm(CTW);
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
+            .addOperand(MI->getOperand(2))
+            .addImm(SrcX)
+            .addImm(SrcY)
+            .addImm(SrcZ)
+            .addImm(SrcW)
+            .addImm(0)
+            .addImm(0)
+            .addImm(0)
+            .addImm(0)
+            .addImm(1)
+            .addImm(2)
+            .addImm(3)
+            .addOperand(RID)
+            .addOperand(SID)
+            .addImm(CTX)
+            .addImm(CTY)
+            .addImm(CTZ)
+            .addImm(CTW);
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
+            .addOperand(MI->getOperand(0))
+            .addOperand(MI->getOperand(1))
+            .addImm(SrcX)
+            .addImm(SrcY)
+            .addImm(SrcZ)
+            .addImm(SrcW)
+            .addImm(0)
+            .addImm(0)
+            .addImm(0)
+            .addImm(0)
+            .addImm(1)
+            .addImm(2)
+            .addImm(3)
+            .addOperand(RID)
+            .addOperand(SID)
+            .addImm(CTX)
+            .addImm(CTY)
+            .addImm(CTZ)
+            .addImm(CTW)
+            .addReg(T0, RegState::Implicit)
+            .addReg(T1, RegState::Implicit);
+    break;
+  }
+
+  case AMDGPU::TXD_SHADOW: {
+    unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
+    unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
+    MachineOperand &RID = MI->getOperand(4);
+    MachineOperand &SID = MI->getOperand(5);
+    unsigned TextureId = MI->getOperand(6).getImm();
+    unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
+    unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
+
+    switch (TextureId) {
+    case 5: // Rect
+      CTX = CTY = 0;
+      break;
+    case 6: // Shadow1D
+      SrcW = SrcZ;
+      break;
+    case 7: // Shadow2D
+      SrcW = SrcZ;
+      break;
+    case 8: // ShadowRect
+      CTX = CTY = 0;
+      SrcW = SrcZ;
+      break;
+    case 9: // 1DArray
+      SrcZ = SrcY;
+      CTZ = 0;
+      break;
+    case 10: // 2DArray
+      CTZ = 0;
+      break;
+    case 11: // Shadow1DArray
+      SrcZ = SrcY;
+      CTZ = 0;
+      break;
+    case 12: // Shadow2DArray
+      CTZ = 0;
+      break;
+    }
+
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
+            .addOperand(MI->getOperand(3))
+            .addImm(SrcX)
+            .addImm(SrcY)
+            .addImm(SrcZ)
+            .addImm(SrcW)
+            .addImm(0)
+            .addImm(0)
+            .addImm(0)
+            .addImm(0)
+            .addImm(1)
+            .addImm(2)
+            .addImm(3)
+            .addOperand(RID)
+            .addOperand(SID)
+            .addImm(CTX)
+            .addImm(CTY)
+            .addImm(CTZ)
+            .addImm(CTW);
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
+            .addOperand(MI->getOperand(2))
+            .addImm(SrcX)
+            .addImm(SrcY)
+            .addImm(SrcZ)
+            .addImm(SrcW)
+            .addImm(0)
+            .addImm(0)
+            .addImm(0)
+            .addImm(0)
+            .addImm(1)
+            .addImm(2)
+            .addImm(3)
+            .addOperand(RID)
+            .addOperand(SID)
+            .addImm(CTX)
+            .addImm(CTY)
+            .addImm(CTZ)
+            .addImm(CTW);
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
+            .addOperand(MI->getOperand(0))
+            .addOperand(MI->getOperand(1))
+            .addImm(SrcX)
+            .addImm(SrcY)
+            .addImm(SrcZ)
+            .addImm(SrcW)
+            .addImm(0)
+            .addImm(0)
+            .addImm(0)
+            .addImm(0)
+            .addImm(1)
+            .addImm(2)
+            .addImm(3)
+            .addOperand(RID)
+            .addOperand(SID)
+            .addImm(CTX)
+            .addImm(CTY)
+            .addImm(CTZ)
+            .addImm(CTW)
+            .addReg(T0, RegState::Implicit)
+            .addReg(T1, RegState::Implicit);
+    break;
+  }
+
+  case AMDGPU::BRANCH:
+      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
+              .addOperand(MI->getOperand(0));
+      break;
+
+  case AMDGPU::BRANCH_COND_f32: {
+    MachineInstr *NewMI =
+      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
+              AMDGPU::PREDICATE_BIT)
+              .addOperand(MI->getOperand(1))
+              .addImm(OPCODE_IS_NOT_ZERO)
+              .addImm(0); // Flags
+    TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
+            .addOperand(MI->getOperand(0))
+            .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+    break;
+  }
+
+  case AMDGPU::BRANCH_COND_i32: {
+    MachineInstr *NewMI =
+      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
+            AMDGPU::PREDICATE_BIT)
+            .addOperand(MI->getOperand(1))
+            .addImm(OPCODE_IS_NOT_ZERO_INT)
+            .addImm(0); // Flags
+    TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
+           .addOperand(MI->getOperand(0))
+            .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+    break;
+  }
+
+  case AMDGPU::EG_ExportSwz:
+  case AMDGPU::R600_ExportSwz: {
+    // Instruction is left unmodified if its not the last one of its type
+    bool isLastInstructionOfItsType = true;
+    unsigned InstExportType = MI->getOperand(1).getImm();
+    for (MachineBasicBlock::iterator NextExportInst = std::next(I),
+         EndBlock = BB->end(); NextExportInst != EndBlock;
+         NextExportInst = std::next(NextExportInst)) {
+      if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
+          NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
+        unsigned CurrentInstExportType = NextExportInst->getOperand(1)
+            .getImm();
+        if (CurrentInstExportType == InstExportType) {
+          isLastInstructionOfItsType = false;
+          break;
+        }
+      }
+    }
+    bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
+    if (!EOP && !isLastInstructionOfItsType)
+      return BB;
+    unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
+            .addOperand(MI->getOperand(0))
+            .addOperand(MI->getOperand(1))
+            .addOperand(MI->getOperand(2))
+            .addOperand(MI->getOperand(3))
+            .addOperand(MI->getOperand(4))
+            .addOperand(MI->getOperand(5))
+            .addOperand(MI->getOperand(6))
+            .addImm(CfInst)
+            .addImm(EOP);
+    break;
+  }
+  case AMDGPU::RETURN: {
+    // RETURN instructions must have the live-out registers as implicit uses,
+    // otherwise they appear dead.
+    R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
+    MachineInstrBuilder MIB(*MF, MI);
+    for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
+      MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
+    return BB;
+  }
+  }
+
+  MI->eraseFromParent();
+  return BB;
+}
+
+//===----------------------------------------------------------------------===//
+// Custom DAG Lowering Operations
+//===----------------------------------------------------------------------===//
+
+SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
+  switch (Op.getOpcode()) {
+  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
+  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
+  case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
+  case ISD::SRA_PARTS:
+  case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
+  case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
+  case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
+  case ISD::FCOS:
+  case ISD::FSIN: return LowerTrig(Op, DAG);
+  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+  case ISD::STORE: return LowerSTORE(Op, DAG);
+  case ISD::LOAD: {
+    SDValue Result = LowerLOAD(Op, DAG);
+    assert((!Result.getNode() ||
+            Result.getNode()->getNumValues() == 2) &&
+           "Load should return a value and a chain");
+    return Result;
+  }
+
+  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
+  case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
+  case ISD::INTRINSIC_VOID: {
+    SDValue Chain = Op.getOperand(0);
+    unsigned IntrinsicID =
+                         cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+    switch (IntrinsicID) {
+    case AMDGPUIntrinsic::AMDGPU_store_output: {
+      int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+      unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
+      MFI->LiveOuts.push_back(Reg);
+      return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
+    }
+    case AMDGPUIntrinsic::R600_store_swizzle: {
+      SDLoc DL(Op);
+      const SDValue Args[8] = {
+        Chain,
+        Op.getOperand(2), // Export Value
+        Op.getOperand(3), // ArrayBase
+        Op.getOperand(4), // Type
+        DAG.getConstant(0, DL, MVT::i32), // SWZ_X
+        DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
+        DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
+        DAG.getConstant(3, DL, MVT::i32) // SWZ_W
+      };
+      return DAG.getNode(AMDGPUISD::EXPORT, DL, Op.getValueType(), Args);
+    }
+
+    // default for switch(IntrinsicID)
+    default: break;
+    }
+    // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
+    break;
+  }
+  case ISD::INTRINSIC_WO_CHAIN: {
+    unsigned IntrinsicID =
+                         cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+    EVT VT = Op.getValueType();
+    SDLoc DL(Op);
+    switch(IntrinsicID) {
+    default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+    case AMDGPUIntrinsic::R600_load_input: {
+      int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+      unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
+      MachineFunction &MF = DAG.getMachineFunction();
+      MachineRegisterInfo &MRI = MF.getRegInfo();
+      MRI.addLiveIn(Reg);
+      return DAG.getCopyFromReg(DAG.getEntryNode(),
+          SDLoc(DAG.getEntryNode()), Reg, VT);
+    }
+
+    case AMDGPUIntrinsic::R600_interp_input: {
+      int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+      int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
+      MachineSDNode *interp;
+      if (ijb < 0) {
+        const R600InstrInfo *TII =
+            static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
+        interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
+            MVT::v4f32, DAG.getTargetConstant(slot / 4, DL, MVT::i32));
+        return DAG.getTargetExtractSubreg(
+            TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
+            DL, MVT::f32, SDValue(interp, 0));
+      }
+      MachineFunction &MF = DAG.getMachineFunction();
+      MachineRegisterInfo &MRI = MF.getRegInfo();
+      unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
+      unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
+      MRI.addLiveIn(RegisterI);
+      MRI.addLiveIn(RegisterJ);
+      SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
+          SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
+      SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
+          SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
+
+      if (slot % 4 < 2)
+        interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
+            MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4, DL, MVT::i32),
+            RegisterJNode, RegisterINode);
+      else
+        interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
+            MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4, DL, MVT::i32),
+            RegisterJNode, RegisterINode);
+      return SDValue(interp, slot % 2);
+    }
+    case AMDGPUIntrinsic::R600_interp_xy:
+    case AMDGPUIntrinsic::R600_interp_zw: {
+      int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+      MachineSDNode *interp;
+      SDValue RegisterINode = Op.getOperand(2);
+      SDValue RegisterJNode = Op.getOperand(3);
+
+      if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
+        interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
+            MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
+            RegisterJNode, RegisterINode);
+      else
+        interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
+            MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
+            RegisterJNode, RegisterINode);
+      return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
+          SDValue(interp, 0), SDValue(interp, 1));
+    }
+    case AMDGPUIntrinsic::R600_tex:
+    case AMDGPUIntrinsic::R600_texc:
+    case AMDGPUIntrinsic::R600_txl:
+    case AMDGPUIntrinsic::R600_txlc:
+    case AMDGPUIntrinsic::R600_txb:
+    case AMDGPUIntrinsic::R600_txbc:
+    case AMDGPUIntrinsic::R600_txf:
+    case AMDGPUIntrinsic::R600_txq:
+    case AMDGPUIntrinsic::R600_ddx:
+    case AMDGPUIntrinsic::R600_ddy:
+    case AMDGPUIntrinsic::R600_ldptr: {
+      unsigned TextureOp;
+      switch (IntrinsicID) {
+      case AMDGPUIntrinsic::R600_tex:
+        TextureOp = 0;
+        break;
+      case AMDGPUIntrinsic::R600_texc:
+        TextureOp = 1;
+        break;
+      case AMDGPUIntrinsic::R600_txl:
+        TextureOp = 2;
+        break;
+      case AMDGPUIntrinsic::R600_txlc:
+        TextureOp = 3;
+        break;
+      case AMDGPUIntrinsic::R600_txb:
+        TextureOp = 4;
+        break;
+      case AMDGPUIntrinsic::R600_txbc:
+        TextureOp = 5;
+        break;
+      case AMDGPUIntrinsic::R600_txf:
+        TextureOp = 6;
+        break;
+      case AMDGPUIntrinsic::R600_txq:
+        TextureOp = 7;
+        break;
+      case AMDGPUIntrinsic::R600_ddx:
+        TextureOp = 8;
+        break;
+      case AMDGPUIntrinsic::R600_ddy:
+        TextureOp = 9;
+        break;
+      case AMDGPUIntrinsic::R600_ldptr:
+        TextureOp = 10;
+        break;
+      default:
+        llvm_unreachable("Unknow Texture Operation");
+      }
+
+      SDValue TexArgs[19] = {
+        DAG.getConstant(TextureOp, DL, MVT::i32),
+        Op.getOperand(1),
+        DAG.getConstant(0, DL, MVT::i32),
+        DAG.getConstant(1, DL, MVT::i32),
+        DAG.getConstant(2, DL, MVT::i32),
+        DAG.getConstant(3, DL, MVT::i32),
+        Op.getOperand(2),
+        Op.getOperand(3),
+        Op.getOperand(4),
+        DAG.getConstant(0, DL, MVT::i32),
+        DAG.getConstant(1, DL, MVT::i32),
+        DAG.getConstant(2, DL, MVT::i32),
+        DAG.getConstant(3, DL, MVT::i32),
+        Op.getOperand(5),
+        Op.getOperand(6),
+        Op.getOperand(7),
+        Op.getOperand(8),
+        Op.getOperand(9),
+        Op.getOperand(10)
+      };
+      return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
+    }
+    case AMDGPUIntrinsic::AMDGPU_dp4: {
+      SDValue Args[8] = {
+      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
+          DAG.getConstant(0, DL, MVT::i32)),
+      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
+          DAG.getConstant(0, DL, MVT::i32)),
+      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
+          DAG.getConstant(1, DL, MVT::i32)),
+      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
+          DAG.getConstant(1, DL, MVT::i32)),
+      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
+          DAG.getConstant(2, DL, MVT::i32)),
+      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
+          DAG.getConstant(2, DL, MVT::i32)),
+      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
+          DAG.getConstant(3, DL, MVT::i32)),
+      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
+          DAG.getConstant(3, DL, MVT::i32))
+      };
+      return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
+    }
+
+    case Intrinsic::r600_read_ngroups_x:
+      return LowerImplicitParameter(DAG, VT, DL, 0);
+    case Intrinsic::r600_read_ngroups_y:
+      return LowerImplicitParameter(DAG, VT, DL, 1);
+    case Intrinsic::r600_read_ngroups_z:
+      return LowerImplicitParameter(DAG, VT, DL, 2);
+    case Intrinsic::r600_read_global_size_x:
+      return LowerImplicitParameter(DAG, VT, DL, 3);
+    case Intrinsic::r600_read_global_size_y:
+      return LowerImplicitParameter(DAG, VT, DL, 4);
+    case Intrinsic::r600_read_global_size_z:
+      return LowerImplicitParameter(DAG, VT, DL, 5);
+    case Intrinsic::r600_read_local_size_x:
+      return LowerImplicitParameter(DAG, VT, DL, 6);
+    case Intrinsic::r600_read_local_size_y:
+      return LowerImplicitParameter(DAG, VT, DL, 7);
+    case Intrinsic::r600_read_local_size_z:
+      return LowerImplicitParameter(DAG, VT, DL, 8);
+
+    case Intrinsic::AMDGPU_read_workdim:
+      return LowerImplicitParameter(DAG, VT, DL, MFI->ABIArgOffset / 4);
+
+    case Intrinsic::r600_read_tgid_x:
+      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+                                  AMDGPU::T1_X, VT);
+    case Intrinsic::r600_read_tgid_y:
+      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+                                  AMDGPU::T1_Y, VT);
+    case Intrinsic::r600_read_tgid_z:
+      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+                                  AMDGPU::T1_Z, VT);
+    case Intrinsic::r600_read_tidig_x:
+      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+                                  AMDGPU::T0_X, VT);
+    case Intrinsic::r600_read_tidig_y:
+      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+                                  AMDGPU::T0_Y, VT);
+    case Intrinsic::r600_read_tidig_z:
+      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+                                  AMDGPU::T0_Z, VT);
+    case Intrinsic::AMDGPU_rsq:
+      // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
+      return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
+
+    case AMDGPUIntrinsic::AMDGPU_fract:
+    case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
+      return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
+    }
+    // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
+    break;
+  }
+  } // end switch(Op.getOpcode())
+  return SDValue();
+}
+
+void R600TargetLowering::ReplaceNodeResults(SDNode *N,
+                                            SmallVectorImpl<SDValue> &Results,
+                                            SelectionDAG &DAG) const {
+  switch (N->getOpcode()) {
+  default:
+    AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
+    return;
+  case ISD::FP_TO_UINT:
+    if (N->getValueType(0) == MVT::i1) {
+      Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
+      return;
+    }
+    // Fall-through. Since we don't care about out of bounds values
+    // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
+    // considers some extra cases which are not necessary here.
+  case ISD::FP_TO_SINT: {
+    SDValue Result;
+    if (expandFP_TO_SINT(N, Result, DAG))
+      Results.push_back(Result);
+    return;
+  }
+  case ISD::SDIVREM: {
+    SDValue Op = SDValue(N, 1);
+    SDValue RES = LowerSDIVREM(Op, DAG);
+    Results.push_back(RES);
+    Results.push_back(RES.getValue(1));
+    break;
+  }
+  case ISD::UDIVREM: {
+    SDValue Op = SDValue(N, 0);
+    LowerUDIVREM64(Op, DAG, Results);
+    break;
+  }
+  }
+}
+
+SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
+                                                   SDValue Vector) const {
+
+  SDLoc DL(Vector);
+  EVT VecVT = Vector.getValueType();
+  EVT EltVT = VecVT.getVectorElementType();
+  SmallVector<SDValue, 8> Args;
+
+  for (unsigned i = 0, e = VecVT.getVectorNumElements();
+                                                           i != e; ++i) {
+    Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
+                               DAG.getConstant(i, DL, getVectorIdxTy())));
+  }
+
+  return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
+}
+
+SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
+                                                    SelectionDAG &DAG) const {
+
+  SDLoc DL(Op);
+  SDValue Vector = Op.getOperand(0);
+  SDValue Index = Op.getOperand(1);
+
+  if (isa<ConstantSDNode>(Index) ||
+      Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
+    return Op;
+
+  Vector = vectorToVerticalVector(DAG, Vector);
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
+                     Vector, Index);
+}
+
+SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
+                                                   SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  SDValue Vector = Op.getOperand(0);
+  SDValue Value = Op.getOperand(1);
+  SDValue Index = Op.getOperand(2);
+
+  if (isa<ConstantSDNode>(Index) ||
+      Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
+    return Op;
+
+  Vector = vectorToVerticalVector(DAG, Vector);
+  SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
+                               Vector, Value, Index);
+  return vectorToVerticalVector(DAG, Insert);
+}
+
+SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
+  // On hw >= R700, COS/SIN input must be between -1. and 1.
+  // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
+  EVT VT = Op.getValueType();
+  SDValue Arg = Op.getOperand(0);
+  SDLoc DL(Op);
+  SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
+      DAG.getNode(ISD::FADD, DL, VT,
+        DAG.getNode(ISD::FMUL, DL, VT, Arg,
+          DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
+        DAG.getConstantFP(0.5, DL, MVT::f32)));
+  unsigned TrigNode;
+  switch (Op.getOpcode()) {
+  case ISD::FCOS:
+    TrigNode = AMDGPUISD::COS_HW;
+    break;
+  case ISD::FSIN:
+    TrigNode = AMDGPUISD::SIN_HW;
+    break;
+  default:
+    llvm_unreachable("Wrong trig opcode");
+  }
+  SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
+      DAG.getNode(ISD::FADD, DL, VT, FractPart,
+        DAG.getConstantFP(-0.5, DL, MVT::f32)));
+  if (Gen >= AMDGPUSubtarget::R700)
+    return TrigVal;
+  // On R600 hw, COS/SIN input must be between -Pi and Pi.
+  return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
+      DAG.getConstantFP(3.14159265359, DL, MVT::f32));
+}
+
+SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  EVT VT = Op.getValueType();
+
+  SDValue Lo = Op.getOperand(0);
+  SDValue Hi = Op.getOperand(1);
+  SDValue Shift = Op.getOperand(2);
+  SDValue Zero = DAG.getConstant(0, DL, VT);
+  SDValue One  = DAG.getConstant(1, DL, VT);
+
+  SDValue Width  = DAG.getConstant(VT.getSizeInBits(), DL, VT);
+  SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
+  SDValue BigShift  = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
+  SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
+
+  // The dance around Width1 is necessary for 0 special case.
+  // Without it the CompShift might be 32, producing incorrect results in
+  // Overflow. So we do the shift in two steps, the alternative is to
+  // add a conditional to filter the special case.
+
+  SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
+  Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
+
+  SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
+  HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
+  SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
+
+  SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
+  SDValue LoBig = Zero;
+
+  Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
+  Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
+
+  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
+}
+
+SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  EVT VT = Op.getValueType();
+
+  SDValue Lo = Op.getOperand(0);
+  SDValue Hi = Op.getOperand(1);
+  SDValue Shift = Op.getOperand(2);
+  SDValue Zero = DAG.getConstant(0, DL, VT);
+  SDValue One  = DAG.getConstant(1, DL, VT);
+
+  const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
+
+  SDValue Width  = DAG.getConstant(VT.getSizeInBits(), DL, VT);
+  SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
+  SDValue BigShift  = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
+  SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
+
+  // The dance around Width1 is necessary for 0 special case.
+  // Without it the CompShift might be 32, producing incorrect results in
+  // Overflow. So we do the shift in two steps, the alternative is to
+  // add a conditional to filter the special case.
+
+  SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
+  Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
+
+  SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
+  SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
+  LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
+
+  SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
+  SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
+
+  Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
+  Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
+
+  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
+}
+
+SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
+                                          unsigned mainop, unsigned ovf) const {
+  SDLoc DL(Op);
+  EVT VT = Op.getValueType();
+
+  SDValue Lo = Op.getOperand(0);
+  SDValue Hi = Op.getOperand(1);
+
+  SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
+  // Extend sign.
+  OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
+                    DAG.getValueType(MVT::i1));
+
+  SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
+
+  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
+}
+
+SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  return DAG.getNode(
+      ISD::SETCC,
+      DL,
+      MVT::i1,
+      Op, DAG.getConstantFP(0.0f, DL, MVT::f32),
+      DAG.getCondCode(ISD::SETNE)
+      );
+}
+
+SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
+                                                   SDLoc DL,
+                                                   unsigned DwordOffset) const {
+  unsigned ByteOffset = DwordOffset * 4;
+  PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
+                                      AMDGPUAS::CONSTANT_BUFFER_0);
+
+  // We shouldn't be using an offset wider than 16-bits for implicit parameters.
+  assert(isInt<16>(ByteOffset));
+
+  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
+                     DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
+                     MachinePointerInfo(ConstantPointerNull::get(PtrType)),
+                     false, false, false, 0);
+}
+
+bool R600TargetLowering::isZero(SDValue Op) const {
+  if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
+    return Cst->isNullValue();
+  } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
+    return CstFP->isZero();
+  } else {
+    return false;
+  }
+}
+
+SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  EVT VT = Op.getValueType();
+
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  SDValue True = Op.getOperand(2);
+  SDValue False = Op.getOperand(3);
+  SDValue CC = Op.getOperand(4);
+  SDValue Temp;
+
+  if (VT == MVT::f32) {
+    DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
+    SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
+    if (MinMax)
+      return MinMax;
+  }
+
+  // LHS and RHS are guaranteed to be the same value type
+  EVT CompareVT = LHS.getValueType();
+
+  // Check if we can lower this to a native operation.
+
+  // Try to lower to a SET* instruction:
+  //
+  // SET* can match the following patterns:
+  //
+  // select_cc f32, f32, -1,  0, cc_supported
+  // select_cc f32, f32, 1.0f, 0.0f, cc_supported
+  // select_cc i32, i32, -1,  0, cc_supported
+  //
+
+  // Move hardware True/False values to the correct operand.
+  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+  ISD::CondCode InverseCC =
+     ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
+  if (isHWTrueValue(False) && isHWFalseValue(True)) {
+    if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
+      std::swap(False, True);
+      CC = DAG.getCondCode(InverseCC);
+    } else {
+      ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
+      if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
+        std::swap(False, True);
+        std::swap(LHS, RHS);
+        CC = DAG.getCondCode(SwapInvCC);
+      }
+    }
+  }
+
+  if (isHWTrueValue(True) && isHWFalseValue(False) &&
+      (CompareVT == VT || VT == MVT::i32)) {
+    // This can be matched by a SET* instruction.
+    return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
+  }
+
+  // Try to lower to a CND* instruction:
+  //
+  // CND* can match the following patterns:
+  //
+  // select_cc f32, 0.0, f32, f32, cc_supported
+  // select_cc f32, 0.0, i32, i32, cc_supported
+  // select_cc i32, 0,   f32, f32, cc_supported
+  // select_cc i32, 0,   i32, i32, cc_supported
+  //
+
+  // Try to move the zero value to the RHS
+  if (isZero(LHS)) {
+    ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+    // Try swapping the operands
+    ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
+    if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
+      std::swap(LHS, RHS);
+      CC = DAG.getCondCode(CCSwapped);
+    } else {
+      // Try inverting the conditon and then swapping the operands
+      ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
+      CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
+      if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
+        std::swap(True, False);
+        std::swap(LHS, RHS);
+        CC = DAG.getCondCode(CCSwapped);
+      }
+    }
+  }
+  if (isZero(RHS)) {
+    SDValue Cond = LHS;
+    SDValue Zero = RHS;
+    ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+    if (CompareVT != VT) {
+      // Bitcast True / False to the correct types.  This will end up being
+      // a nop, but it allows us to define only a single pattern in the
+      // .TD files for each CND* instruction rather than having to have
+      // one pattern for integer True/False and one for fp True/False
+      True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
+      False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
+    }
+
+    switch (CCOpcode) {
+    case ISD::SETONE:
+    case ISD::SETUNE:
+    case ISD::SETNE:
+      CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
+      Temp = True;
+      True = False;
+      False = Temp;
+      break;
+    default:
+      break;
+    }
+    SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
+        Cond, Zero,
+        True, False,
+        DAG.getCondCode(CCOpcode));
+    return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
+  }
+
+  // If we make it this for it means we have no native instructions to handle
+  // this SELECT_CC, so we must lower it.
+  SDValue HWTrue, HWFalse;
+
+  if (CompareVT == MVT::f32) {
+    HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
+    HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
+  } else if (CompareVT == MVT::i32) {
+    HWTrue = DAG.getConstant(-1, DL, CompareVT);
+    HWFalse = DAG.getConstant(0, DL, CompareVT);
+  }
+  else {
+    llvm_unreachable("Unhandled value type in LowerSELECT_CC");
+  }
+
+  // Lower this unsupported SELECT_CC into a combination of two supported
+  // SELECT_CC operations.
+  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
+
+  return DAG.getNode(ISD::SELECT_CC, DL, VT,
+      Cond, HWFalse,
+      True, False,
+      DAG.getCondCode(ISD::SETNE));
+}
+
+/// LLVM generates byte-addressed pointers.  For indirect addressing, we need to
+/// convert these pointers to a register index.  Each register holds
+/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
+/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
+/// for indirect addressing.
+SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
+                                               unsigned StackWidth,
+                                               SelectionDAG &DAG) const {
+  unsigned SRLPad;
+  switch(StackWidth) {
+  case 1:
+    SRLPad = 2;
+    break;
+  case 2:
+    SRLPad = 3;
+    break;
+  case 4:
+    SRLPad = 4;
+    break;
+  default: llvm_unreachable("Invalid stack width");
+  }
+
+  SDLoc DL(Ptr);
+  return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
+                     DAG.getConstant(SRLPad, DL, MVT::i32));
+}
+
+void R600TargetLowering::getStackAddress(unsigned StackWidth,
+                                         unsigned ElemIdx,
+                                         unsigned &Channel,
+                                         unsigned &PtrIncr) const {
+  switch (StackWidth) {
+  default:
+  case 1:
+    Channel = 0;
+    if (ElemIdx > 0) {
+      PtrIncr = 1;
+    } else {
+      PtrIncr = 0;
+    }
+    break;
+  case 2:
+    Channel = ElemIdx % 2;
+    if (ElemIdx == 2) {
+      PtrIncr = 1;
+    } else {
+      PtrIncr = 0;
+    }
+    break;
+  case 4:
+    Channel = ElemIdx;
+    PtrIncr = 0;
+    break;
+  }
+}
+
+SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
+  SDValue Chain = Op.getOperand(0);
+  SDValue Value = Op.getOperand(1);
+  SDValue Ptr = Op.getOperand(2);
+
+  SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
+  if (Result.getNode()) {
+    return Result;
+  }
+
+  if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
+    if (StoreNode->isTruncatingStore()) {
+      EVT VT = Value.getValueType();
+      assert(VT.bitsLE(MVT::i32));
+      EVT MemVT = StoreNode->getMemoryVT();
+      SDValue MaskConstant;
+      if (MemVT == MVT::i8) {
+        MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
+      } else {
+        assert(MemVT == MVT::i16);
+        MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
+      }
+      SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
+                                      DAG.getConstant(2, DL, MVT::i32));
+      SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
+                                      DAG.getConstant(0x00000003, DL, VT));
+      SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
+      SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
+                                   DAG.getConstant(3, DL, VT));
+      SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
+      SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
+      // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
+      // vector instead.
+      SDValue Src[4] = {
+        ShiftedValue,
+        DAG.getConstant(0, DL, MVT::i32),
+        DAG.getConstant(0, DL, MVT::i32),
+        Mask
+      };
+      SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
+      SDValue Args[3] = { Chain, Input, DWordAddr };
+      return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
+                                     Op->getVTList(), Args, MemVT,
+                                     StoreNode->getMemOperand());
+    } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
+               Value.getValueType().bitsGE(MVT::i32)) {
+      // Convert pointer from byte address to dword address.
+      Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
+                        DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
+                                    Ptr, DAG.getConstant(2, DL, MVT::i32)));
+
+      if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
+        llvm_unreachable("Truncated and indexed stores not supported yet");
+      } else {
+        Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
+      }
+      return Chain;
+    }
+  }
+
+  EVT ValueVT = Value.getValueType();
+
+  if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
+    return SDValue();
+  }
+
+  SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
+  if (Ret.getNode()) {
+    return Ret;
+  }
+  // Lowering for indirect addressing
+
+  const MachineFunction &MF = DAG.getMachineFunction();
+  const AMDGPUFrameLowering *TFL =
+      static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
+  unsigned StackWidth = TFL->getStackWidth(MF);
+
+  Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
+
+  if (ValueVT.isVector()) {
+    unsigned NumElemVT = ValueVT.getVectorNumElements();
+    EVT ElemVT = ValueVT.getVectorElementType();
+    SmallVector<SDValue, 4> Stores(NumElemVT);
+
+    assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
+                                      "vector width in load");
+
+    for (unsigned i = 0; i < NumElemVT; ++i) {
+      unsigned Channel, PtrIncr;
+      getStackAddress(StackWidth, i, Channel, PtrIncr);
+      Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
+                        DAG.getConstant(PtrIncr, DL, MVT::i32));
+      SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
+                                 Value, DAG.getConstant(i, DL, MVT::i32));
+
+      Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
+                              Chain, Elem, Ptr,
+                              DAG.getTargetConstant(Channel, DL, MVT::i32));
+    }
+     Chain =  DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
+   } else {
+    if (ValueVT == MVT::i8) {
+      Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
+    }
+    Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
+    DAG.getTargetConstant(0, DL, MVT::i32)); // Channel
+  }
+
+  return Chain;
+}
+
+// return (512 + (kc_bank << 12)
+static int
+ConstantAddressBlock(unsigned AddressSpace) {
+  switch (AddressSpace) {
+  case AMDGPUAS::CONSTANT_BUFFER_0:
+    return 512;
+  case AMDGPUAS::CONSTANT_BUFFER_1:
+    return 512 + 4096;
+  case AMDGPUAS::CONSTANT_BUFFER_2:
+    return 512 + 4096 * 2;
+  case AMDGPUAS::CONSTANT_BUFFER_3:
+    return 512 + 4096 * 3;
+  case AMDGPUAS::CONSTANT_BUFFER_4:
+    return 512 + 4096 * 4;
+  case AMDGPUAS::CONSTANT_BUFFER_5:
+    return 512 + 4096 * 5;
+  case AMDGPUAS::CONSTANT_BUFFER_6:
+    return 512 + 4096 * 6;
+  case AMDGPUAS::CONSTANT_BUFFER_7:
+    return 512 + 4096 * 7;
+  case AMDGPUAS::CONSTANT_BUFFER_8:
+    return 512 + 4096 * 8;
+  case AMDGPUAS::CONSTANT_BUFFER_9:
+    return 512 + 4096 * 9;
+  case AMDGPUAS::CONSTANT_BUFFER_10:
+    return 512 + 4096 * 10;
+  case AMDGPUAS::CONSTANT_BUFFER_11:
+    return 512 + 4096 * 11;
+  case AMDGPUAS::CONSTANT_BUFFER_12:
+    return 512 + 4096 * 12;
+  case AMDGPUAS::CONSTANT_BUFFER_13:
+    return 512 + 4096 * 13;
+  case AMDGPUAS::CONSTANT_BUFFER_14:
+    return 512 + 4096 * 14;
+  case AMDGPUAS::CONSTANT_BUFFER_15:
+    return 512 + 4096 * 15;
+  default:
+    return -1;
+  }
+}
+
+SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
+{
+  EVT VT = Op.getValueType();
+  SDLoc DL(Op);
+  LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
+  SDValue Chain = Op.getOperand(0);
+  SDValue Ptr = Op.getOperand(1);
+  SDValue LoweredLoad;
+
+  SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
+  if (Ret.getNode()) {
+    SDValue Ops[2] = {
+      Ret,
+      Chain
+    };
+    return DAG.getMergeValues(Ops, DL);
+  }
+
+  // Lower loads constant address space global variable loads
+  if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
+      isa<GlobalVariable>(GetUnderlyingObject(
+          LoadNode->getMemOperand()->getValue(), *getDataLayout()))) {
+
+    SDValue Ptr = DAG.getZExtOrTrunc(LoadNode->getBasePtr(), DL,
+        getPointerTy(AMDGPUAS::PRIVATE_ADDRESS));
+    Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
+        DAG.getConstant(2, DL, MVT::i32));
+    return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
+                       LoadNode->getChain(), Ptr,
+                       DAG.getTargetConstant(0, DL, MVT::i32),
+                       Op.getOperand(2));
+  }
+
+  if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
+    SDValue MergedValues[2] = {
+      ScalarizeVectorLoad(Op, DAG),
+      Chain
+    };
+    return DAG.getMergeValues(MergedValues, DL);
+  }
+
+  int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
+  if (ConstantBlock > -1 &&
+      ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
+       (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
+    SDValue Result;
+    if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
+        isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
+        isa<ConstantSDNode>(Ptr)) {
+      SDValue Slots[4];
+      for (unsigned i = 0; i < 4; i++) {
+        // We want Const position encoded with the following formula :
+        // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
+        // const_index is Ptr computed by llvm using an alignment of 16.
+        // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
+        // then div by 4 at the ISel step
+        SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+            DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
+        Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
+      }
+      EVT NewVT = MVT::v4i32;
+      unsigned NumElements = 4;
+      if (VT.isVector()) {
+        NewVT = VT;
+        NumElements = VT.getVectorNumElements();
+      }
+      Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
+                           makeArrayRef(Slots, NumElements));
+    } else {
+      // non-constant ptr can't be folded, keeps it as a v4f32 load
+      Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
+          DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
+                      DAG.getConstant(4, DL, MVT::i32)),
+                      DAG.getConstant(LoadNode->getAddressSpace() -
+                                      AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
+          );
+    }
+
+    if (!VT.isVector()) {
+      Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
+                           DAG.getConstant(0, DL, MVT::i32));
+    }
+
+    SDValue MergedValues[2] = {
+      Result,
+      Chain
+    };
+    return DAG.getMergeValues(MergedValues, DL);
+  }
+
+  // For most operations returning SDValue() will result in the node being
+  // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
+  // need to manually expand loads that may be legal in some address spaces and
+  // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
+  // compute shaders, since the data is sign extended when it is uploaded to the
+  // buffer. However SEXT loads from other address spaces are not supported, so
+  // we need to expand them here.
+  if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
+    EVT MemVT = LoadNode->getMemoryVT();
+    assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
+    SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
+                                  LoadNode->getPointerInfo(), MemVT,
+                                  LoadNode->isVolatile(),
+                                  LoadNode->isNonTemporal(),
+                                  LoadNode->isInvariant(),
+                                  LoadNode->getAlignment());
+    SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
+                              DAG.getValueType(MemVT));
+
+    SDValue MergedValues[2] = { Res, Chain };
+    return DAG.getMergeValues(MergedValues, DL);
+  }
+
+  if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
+    return SDValue();
+  }
+
+  // Lowering for indirect addressing
+  const MachineFunction &MF = DAG.getMachineFunction();
+  const AMDGPUFrameLowering *TFL =
+      static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
+  unsigned StackWidth = TFL->getStackWidth(MF);
+
+  Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
+
+  if (VT.isVector()) {
+    unsigned NumElemVT = VT.getVectorNumElements();
+    EVT ElemVT = VT.getVectorElementType();
+    SDValue Loads[4];
+
+    assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
+                                      "vector width in load");
+
+    for (unsigned i = 0; i < NumElemVT; ++i) {
+      unsigned Channel, PtrIncr;
+      getStackAddress(StackWidth, i, Channel, PtrIncr);
+      Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
+                        DAG.getConstant(PtrIncr, DL, MVT::i32));
+      Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
+                             Chain, Ptr,
+                             DAG.getTargetConstant(Channel, DL, MVT::i32),
+                             Op.getOperand(2));
+    }
+    for (unsigned i = NumElemVT; i < 4; ++i) {
+      Loads[i] = DAG.getUNDEF(ElemVT);
+    }
+    EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
+    LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
+  } else {
+    LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
+                              Chain, Ptr,
+                              DAG.getTargetConstant(0, DL, MVT::i32), // Channel
+                              Op.getOperand(2));
+  }
+
+  SDValue Ops[2] = {
+    LoweredLoad,
+    Chain
+  };
+
+  return DAG.getMergeValues(Ops, DL);
+}
+
+SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Chain = Op.getOperand(0);
+  SDValue Cond  = Op.getOperand(1);
+  SDValue Jump  = Op.getOperand(2);
+
+  return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
+                     Chain, Jump, Cond);
+}
+
+/// XXX Only kernel functions are supported, so we can assume for now that
+/// every function is a kernel function, but in the future we should use
+/// separate calling conventions for kernel and non-kernel functions.
+SDValue R600TargetLowering::LowerFormalArguments(
+                                      SDValue Chain,
+                                      CallingConv::ID CallConv,
+                                      bool isVarArg,
+                                      const SmallVectorImpl<ISD::InputArg> &Ins,
+                                      SDLoc DL, SelectionDAG &DAG,
+                                      SmallVectorImpl<SDValue> &InVals) const {
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+                 *DAG.getContext());
+  MachineFunction &MF = DAG.getMachineFunction();
+  R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
+
+  SmallVector<ISD::InputArg, 8> LocalIns;
+
+  getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
+
+  AnalyzeFormalArguments(CCInfo, LocalIns);
+
+  for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    const ISD::InputArg &In = Ins[i];
+    EVT VT = In.VT;
+    EVT MemVT = VA.getLocVT();
+    if (!VT.isVector() && MemVT.isVector()) {
+      // Get load source type if scalarized.
+      MemVT = MemVT.getVectorElementType();
+    }
+
+    if (MFI->getShaderType() != ShaderType::COMPUTE) {
+      unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
+      SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
+      InVals.push_back(Register);
+      continue;
+    }
+
+    PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
+                                          AMDGPUAS::CONSTANT_BUFFER_0);
+
+    // i64 isn't a legal type, so the register type used ends up as i32, which
+    // isn't expected here. It attempts to create this sextload, but it ends up
+    // being invalid. Somehow this seems to work with i64 arguments, but breaks
+    // for <1 x i64>.
+
+    // The first 36 bytes of the input buffer contains information about
+    // thread group and global sizes.
+    ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
+    if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
+      // FIXME: This should really check the extload type, but the handling of
+      // extload vector parameters seems to be broken.
+
+      // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+      Ext = ISD::SEXTLOAD;
+    }
+
+    // Compute the offset from the value.
+    // XXX - I think PartOffset should give you this, but it seems to give the
+    // size of the register which isn't useful.
+
+    unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
+    unsigned PartOffset = VA.getLocMemOffset();
+    unsigned Offset = 36 + VA.getLocMemOffset();
+
+    MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
+    SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
+                              DAG.getConstant(Offset, DL, MVT::i32),
+                              DAG.getUNDEF(MVT::i32),
+                              PtrInfo,
+                              MemVT, false, true, true, 4);
+
+    // 4 is the preferred alignment for the CONSTANT memory space.
+    InVals.push_back(Arg);
+    MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
+  }
+  return Chain;
+}
+
+EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
+   if (!VT.isVector())
+     return MVT::i32;
+   return VT.changeVectorElementTypeToInteger();
+}
+
+static SDValue CompactSwizzlableVector(
+  SelectionDAG &DAG, SDValue VectorEntry,
+  DenseMap<unsigned, unsigned> &RemapSwizzle) {
+  assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
+  assert(RemapSwizzle.empty());
+  SDValue NewBldVec[4] = {
+    VectorEntry.getOperand(0),
+    VectorEntry.getOperand(1),
+    VectorEntry.getOperand(2),
+    VectorEntry.getOperand(3)
+  };
+
+  for (unsigned i = 0; i < 4; i++) {
+    if (NewBldVec[i].getOpcode() == ISD::UNDEF)
+      // We mask write here to teach later passes that the ith element of this
+      // vector is undef. Thus we can use it to reduce 128 bits reg usage,
+      // break false dependencies and additionnaly make assembly easier to read.
+      RemapSwizzle[i] = 7; // SEL_MASK_WRITE
+    if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
+      if (C->isZero()) {
+        RemapSwizzle[i] = 4; // SEL_0
+        NewBldVec[i] = DAG.getUNDEF(MVT::f32);
+      } else if (C->isExactlyValue(1.0)) {
+        RemapSwizzle[i] = 5; // SEL_1
+        NewBldVec[i] = DAG.getUNDEF(MVT::f32);
+      }
+    }
+
+    if (NewBldVec[i].getOpcode() == ISD::UNDEF)
+      continue;
+    for (unsigned j = 0; j < i; j++) {
+      if (NewBldVec[i] == NewBldVec[j]) {
+        NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
+        RemapSwizzle[i] = j;
+        break;
+      }
+    }
+  }
+
+  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
+                     VectorEntry.getValueType(), NewBldVec);
+}
+
+static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
+                                DenseMap<unsigned, unsigned> &RemapSwizzle) {
+  assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
+  assert(RemapSwizzle.empty());
+  SDValue NewBldVec[4] = {
+      VectorEntry.getOperand(0),
+      VectorEntry.getOperand(1),
+      VectorEntry.getOperand(2),
+      VectorEntry.getOperand(3)
+  };
+  bool isUnmovable[4] = { false, false, false, false };
+  for (unsigned i = 0; i < 4; i++) {
+    RemapSwizzle[i] = i;
+    if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+      unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
+          ->getZExtValue();
+      if (i == Idx)
+        isUnmovable[Idx] = true;
+    }
+  }
+
+  for (unsigned i = 0; i < 4; i++) {
+    if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+      unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
+          ->getZExtValue();
+      if (isUnmovable[Idx])
+        continue;
+      // Swap i and Idx
+      std::swap(NewBldVec[Idx], NewBldVec[i]);
+      std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
+      break;
+    }
+  }
+
+  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
+                     VectorEntry.getValueType(), NewBldVec);
+}
+
+
+SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
+                                            SDValue Swz[4], SelectionDAG &DAG,
+                                            SDLoc DL) const {
+  assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
+  // Old -> New swizzle values
+  DenseMap<unsigned, unsigned> SwizzleRemap;
+
+  BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
+  for (unsigned i = 0; i < 4; i++) {
+    unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
+    if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
+      Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
+  }
+
+  SwizzleRemap.clear();
+  BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
+  for (unsigned i = 0; i < 4; i++) {
+    unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
+    if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
+      Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
+  }
+
+  return BuildVector;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Custom DAG Optimizations
+//===----------------------------------------------------------------------===//
+
+SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
+                                              DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+
+  switch (N->getOpcode()) {
+  default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
+  // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
+  case ISD::FP_ROUND: {
+      SDValue Arg = N->getOperand(0);
+      if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
+        return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
+                           Arg.getOperand(0));
+      }
+      break;
+    }
+
+  // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
+  // (i32 select_cc f32, f32, -1, 0 cc)
+  //
+  // Mesa's GLSL frontend generates the above pattern a lot and we can lower
+  // this to one of the SET*_DX10 instructions.
+  case ISD::FP_TO_SINT: {
+    SDValue FNeg = N->getOperand(0);
+    if (FNeg.getOpcode() != ISD::FNEG) {
+      return SDValue();
+    }
+    SDValue SelectCC = FNeg.getOperand(0);
+    if (SelectCC.getOpcode() != ISD::SELECT_CC ||
+        SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
+        SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
+        !isHWTrueValue(SelectCC.getOperand(2)) ||
+        !isHWFalseValue(SelectCC.getOperand(3))) {
+      return SDValue();
+    }
+
+    SDLoc dl(N);
+    return DAG.getNode(ISD::SELECT_CC, dl, N->getValueType(0),
+                           SelectCC.getOperand(0), // LHS
+                           SelectCC.getOperand(1), // RHS
+                           DAG.getConstant(-1, dl, MVT::i32), // True
+                           DAG.getConstant(0, dl, MVT::i32),  // False
+                           SelectCC.getOperand(4)); // CC
+
+    break;
+  }
+
+  // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
+  // => build_vector elt0, ... , NewEltIdx, ... , eltN
+  case ISD::INSERT_VECTOR_ELT: {
+    SDValue InVec = N->getOperand(0);
+    SDValue InVal = N->getOperand(1);
+    SDValue EltNo = N->getOperand(2);
+    SDLoc dl(N);
+
+    // If the inserted element is an UNDEF, just use the input vector.
+    if (InVal.getOpcode() == ISD::UNDEF)
+      return InVec;
+
+    EVT VT = InVec.getValueType();
+
+    // If we can't generate a legal BUILD_VECTOR, exit
+    if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
+      return SDValue();
+
+    // Check that we know which element is being inserted
+    if (!isa<ConstantSDNode>(EltNo))
+      return SDValue();
+    unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+
+    // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
+    // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
+    // vector elements.
+    SmallVector<SDValue, 8> Ops;
+    if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
+      Ops.append(InVec.getNode()->op_begin(),
+                 InVec.getNode()->op_end());
+    } else if (InVec.getOpcode() == ISD::UNDEF) {
+      unsigned NElts = VT.getVectorNumElements();
+      Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
+    } else {
+      return SDValue();
+    }
+
+    // Insert the element
+    if (Elt < Ops.size()) {
+      // All the operands of BUILD_VECTOR must have the same type;
+      // we enforce that here.
+      EVT OpVT = Ops[0].getValueType();
+      if (InVal.getValueType() != OpVT)
+        InVal = OpVT.bitsGT(InVal.getValueType()) ?
+          DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
+          DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
+      Ops[Elt] = InVal;
+    }
+
+    // Return the new vector
+    return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+  }
+
+  // Extract_vec (Build_vector) generated by custom lowering
+  // also needs to be customly combined
+  case ISD::EXTRACT_VECTOR_ELT: {
+    SDValue Arg = N->getOperand(0);
+    if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
+      if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+        unsigned Element = Const->getZExtValue();
+        return Arg->getOperand(Element);
+      }
+    }
+    if (Arg.getOpcode() == ISD::BITCAST &&
+        Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
+      if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+        unsigned Element = Const->getZExtValue();
+        return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
+            Arg->getOperand(0).getOperand(Element));
+      }
+    }
+  }
+
+  case ISD::SELECT_CC: {
+    // Try common optimizations
+    SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
+    if (Ret.getNode())
+      return Ret;
+
+    // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
+    //      selectcc x, y, a, b, inv(cc)
+    //
+    // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
+    //      selectcc x, y, a, b, cc
+    SDValue LHS = N->getOperand(0);
+    if (LHS.getOpcode() != ISD::SELECT_CC) {
+      return SDValue();
+    }
+
+    SDValue RHS = N->getOperand(1);
+    SDValue True = N->getOperand(2);
+    SDValue False = N->getOperand(3);
+    ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+
+    if (LHS.getOperand(2).getNode() != True.getNode() ||
+        LHS.getOperand(3).getNode() != False.getNode() ||
+        RHS.getNode() != False.getNode()) {
+      return SDValue();
+    }
+
+    switch (NCC) {
+    default: return SDValue();
+    case ISD::SETNE: return LHS;
+    case ISD::SETEQ: {
+      ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
+      LHSCC = ISD::getSetCCInverse(LHSCC,
+                                  LHS.getOperand(0).getValueType().isInteger());
+      if (DCI.isBeforeLegalizeOps() ||
+          isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
+        return DAG.getSelectCC(SDLoc(N),
+                               LHS.getOperand(0),
+                               LHS.getOperand(1),
+                               LHS.getOperand(2),
+                               LHS.getOperand(3),
+                               LHSCC);
+      break;
+    }
+    }
+    return SDValue();
+  }
+
+  case AMDGPUISD::EXPORT: {
+    SDValue Arg = N->getOperand(1);
+    if (Arg.getOpcode() != ISD::BUILD_VECTOR)
+      break;
+
+    SDValue NewArgs[8] = {
+      N->getOperand(0), // Chain
+      SDValue(),
+      N->getOperand(2), // ArrayBase
+      N->getOperand(3), // Type
+      N->getOperand(4), // SWZ_X
+      N->getOperand(5), // SWZ_Y
+      N->getOperand(6), // SWZ_Z
+      N->getOperand(7) // SWZ_W
+    };
+    SDLoc DL(N);
+    NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
+    return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
+  }
+  case AMDGPUISD::TEXTURE_FETCH: {
+    SDValue Arg = N->getOperand(1);
+    if (Arg.getOpcode() != ISD::BUILD_VECTOR)
+      break;
+
+    SDValue NewArgs[19] = {
+      N->getOperand(0),
+      N->getOperand(1),
+      N->getOperand(2),
+      N->getOperand(3),
+      N->getOperand(4),
+      N->getOperand(5),
+      N->getOperand(6),
+      N->getOperand(7),
+      N->getOperand(8),
+      N->getOperand(9),
+      N->getOperand(10),
+      N->getOperand(11),
+      N->getOperand(12),
+      N->getOperand(13),
+      N->getOperand(14),
+      N->getOperand(15),
+      N->getOperand(16),
+      N->getOperand(17),
+      N->getOperand(18),
+    };
+    SDLoc DL(N);
+    NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
+    return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
+  }
+  }
+
+  return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
+}
+
+static bool
+FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
+            SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
+  const R600InstrInfo *TII =
+      static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
+  if (!Src.isMachineOpcode())
+    return false;
+  switch (Src.getMachineOpcode()) {
+  case AMDGPU::FNEG_R600:
+    if (!Neg.getNode())
+      return false;
+    Src = Src.getOperand(0);
+    Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
+    return true;
+  case AMDGPU::FABS_R600:
+    if (!Abs.getNode())
+      return false;
+    Src = Src.getOperand(0);
+    Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
+    return true;
+  case AMDGPU::CONST_COPY: {
+    unsigned Opcode = ParentNode->getMachineOpcode();
+    bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
+
+    if (!Sel.getNode())
+      return false;
+
+    SDValue CstOffset = Src.getOperand(0);
+    if (ParentNode->getValueType(0).isVector())
+      return false;
+
+    // Gather constants values
+    int SrcIndices[] = {
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
+    };
+    std::vector<unsigned> Consts;
+    for (int OtherSrcIdx : SrcIndices) {
+      int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
+      if (OtherSrcIdx < 0 || OtherSelIdx < 0)
+        continue;
+      if (HasDst) {
+        OtherSrcIdx--;
+        OtherSelIdx--;
+      }
+      if (RegisterSDNode *Reg =
+          dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
+        if (Reg->getReg() == AMDGPU::ALU_CONST) {
+          ConstantSDNode *Cst
+            = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
+          Consts.push_back(Cst->getZExtValue());
+        }
+      }
+    }
+
+    ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
+    Consts.push_back(Cst->getZExtValue());
+    if (!TII->fitsConstReadLimitations(Consts)) {
+      return false;
+    }
+
+    Sel = CstOffset;
+    Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
+    return true;
+  }
+  case AMDGPU::MOV_IMM_I32:
+  case AMDGPU::MOV_IMM_F32: {
+    unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
+    uint64_t ImmValue = 0;
+
+
+    if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
+      ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
+      float FloatValue = FPC->getValueAPF().convertToFloat();
+      if (FloatValue == 0.0) {
+        ImmReg = AMDGPU::ZERO;
+      } else if (FloatValue == 0.5) {
+        ImmReg = AMDGPU::HALF;
+      } else if (FloatValue == 1.0) {
+        ImmReg = AMDGPU::ONE;
+      } else {
+        ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
+      }
+    } else {
+      ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
+      uint64_t Value = C->getZExtValue();
+      if (Value == 0) {
+        ImmReg = AMDGPU::ZERO;
+      } else if (Value == 1) {
+        ImmReg = AMDGPU::ONE_INT;
+      } else {
+        ImmValue = Value;
+      }
+    }
+
+    // Check that we aren't already using an immediate.
+    // XXX: It's possible for an instruction to have more than one
+    // immediate operand, but this is not supported yet.
+    if (ImmReg == AMDGPU::ALU_LITERAL_X) {
+      if (!Imm.getNode())
+        return false;
+      ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
+      assert(C);
+      if (C->getZExtValue())
+        return false;
+      Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
+    }
+    Src = DAG.getRegister(ImmReg, MVT::i32);
+    return true;
+  }
+  default:
+    return false;
+  }
+}
+
+
+/// \brief Fold the instructions after selecting them
+SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
+                                            SelectionDAG &DAG) const {
+  const R600InstrInfo *TII =
+      static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
+  if (!Node->isMachineOpcode())
+    return Node;
+  unsigned Opcode = Node->getMachineOpcode();
+  SDValue FakeOp;
+
+  std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
+
+  if (Opcode == AMDGPU::DOT_4) {
+    int OperandIdx[] = {
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
+        };
+    int NegIdx[] = {
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
+    };
+    int AbsIdx[] = {
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
+    };
+    for (unsigned i = 0; i < 8; i++) {
+      if (OperandIdx[i] < 0)
+        return Node;
+      SDValue &Src = Ops[OperandIdx[i] - 1];
+      SDValue &Neg = Ops[NegIdx[i] - 1];
+      SDValue &Abs = Ops[AbsIdx[i] - 1];
+      bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
+      int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
+      if (HasDst)
+        SelIdx--;
+      SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
+      if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
+        return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
+    }
+  } else if (Opcode == AMDGPU::REG_SEQUENCE) {
+    for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
+      SDValue &Src = Ops[i];
+      if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
+        return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
+    }
+  } else if (Opcode == AMDGPU::CLAMP_R600) {
+    SDValue Src = Node->getOperand(0);
+    if (!Src.isMachineOpcode() ||
+        !TII->hasInstrModifiers(Src.getMachineOpcode()))
+      return Node;
+    int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
+        AMDGPU::OpName::clamp);
+    if (ClampIdx < 0)
+      return Node;
+    SDLoc DL(Node);
+    std::vector<SDValue> Ops(Src->op_begin(), Src->op_end());
+    Ops[ClampIdx - 1] = DAG.getTargetConstant(1, DL, MVT::i32);
+    return DAG.getMachineNode(Src.getMachineOpcode(), DL,
+                              Node->getVTList(), Ops);
+  } else {
+    if (!TII->hasInstrModifiers(Opcode))
+      return Node;
+    int OperandIdx[] = {
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
+    };
+    int NegIdx[] = {
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
+    };
+    int AbsIdx[] = {
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
+      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
+      -1
+    };
+    for (unsigned i = 0; i < 3; i++) {
+      if (OperandIdx[i] < 0)
+        return Node;
+      SDValue &Src = Ops[OperandIdx[i] - 1];
+      SDValue &Neg = Ops[NegIdx[i] - 1];
+      SDValue FakeAbs;
+      SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
+      bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
+      int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
+      int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
+      if (HasDst) {
+        SelIdx--;
+        ImmIdx--;
+      }
+      SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
+      SDValue &Imm = Ops[ImmIdx];
+      if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
+        return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
+    }
+  }
+
+  return Node;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.h
new file mode 100644
index 0000000..c252878
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.h
@@ -0,0 +1,80 @@
+//===-- R600ISelLowering.h - R600 DAG Lowering Interface -*- C++ -*--------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief R600 DAG Lowering interface definition
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_R600_R600ISELLOWERING_H
+#define LLVM_LIB_TARGET_R600_R600ISELLOWERING_H
+
+#include "AMDGPUISelLowering.h"
+
+namespace llvm {
+
+class R600InstrInfo;
+
+class R600TargetLowering : public AMDGPUTargetLowering {
+public:
+  R600TargetLowering(TargetMachine &TM, const AMDGPUSubtarget &STI);
+  MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI,
+      MachineBasicBlock * BB) const override;
+  SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+  SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
+  void ReplaceNodeResults(SDNode * N,
+                          SmallVectorImpl<SDValue> &Results,
+                          SelectionDAG &DAG) const override;
+  SDValue LowerFormalArguments(
+                              SDValue Chain,
+                              CallingConv::ID CallConv,
+                              bool isVarArg,
+                              const SmallVectorImpl<ISD::InputArg> &Ins,
+                              SDLoc DL, SelectionDAG &DAG,
+                              SmallVectorImpl<SDValue> &InVals) const override;
+  EVT getSetCCResultType(LLVMContext &, EVT VT) const override;
+private:
+  unsigned Gen;
+  /// Each OpenCL kernel has nine implicit parameters that are stored in the
+  /// first nine dwords of a Vertex Buffer.  These implicit parameters are
+  /// lowered to load instructions which retrieve the values from the Vertex
+  /// Buffer.
+  SDValue LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
+                                 SDLoc DL, unsigned DwordOffset) const;
+
+  void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
+      MachineRegisterInfo & MRI, unsigned dword_offset) const;
+  SDValue OptimizeSwizzle(SDValue BuildVector, SDValue Swz[], SelectionDAG &DAG,
+                          SDLoc DL) const;
+  SDValue vectorToVerticalVector(SelectionDAG &DAG, SDValue Vector) const;
+
+  SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSHLParts(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSRXParts(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
+                        unsigned mainop, unsigned ovf) const;
+
+  SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth,
+                                          SelectionDAG &DAG) const;
+  void getStackAddress(unsigned StackWidth, unsigned ElemIdx,
+                       unsigned &Channel, unsigned &PtrIncr) const;
+  bool isZero(SDValue Op) const;
+  SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600InstrFormats.td b/contrib/llvm/lib/Target/AMDGPU/R600InstrFormats.td
new file mode 100644
index 0000000..0ffd485
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/R600InstrFormats.td
@@ -0,0 +1,495 @@
+//===-- R600InstrFormats.td - R600 Instruction Encodings ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// R600 Instruction format definitions.
+//
+//===----------------------------------------------------------------------===//
+
+class InstR600 <dag outs, dag ins, string asm, list<dag> pattern,
+                InstrItinClass itin>
+    : AMDGPUInst <outs, ins, asm, pattern> {
+
+  field bits<64> Inst;
+  bit Trig = 0;
+  bit Op3 = 0;
+  bit isVector = 0;
+  bits<2> FlagOperandIdx = 0;
+  bit Op1 = 0;
+  bit Op2 = 0;
+  bit LDS_1A = 0;
+  bit LDS_1A1D = 0;
+  bit HasNativeOperands = 0;
+  bit VTXInst = 0;
+  bit TEXInst = 0;
+  bit ALUInst = 0;
+  bit IsExport = 0;
+  bit LDS_1A2D = 0;
+
+  let Namespace = "AMDGPU";
+  let OutOperandList = outs;
+  let InOperandList = ins;
+  let AsmString = asm;
+  let Pattern = pattern;
+  let Itinerary = itin;
+
+  // No AsmMatcher support.
+  let isCodeGenOnly = 1;
+
+  let TSFlags{4} = Trig;
+  let TSFlags{5} = Op3;
+
+  // Vector instructions are instructions that must fill all slots in an
+  // instruction group
+  let TSFlags{6} = isVector;
+  let TSFlags{8-7} = FlagOperandIdx;
+  let TSFlags{9} = HasNativeOperands;
+  let TSFlags{10} = Op1;
+  let TSFlags{11} = Op2;
+  let TSFlags{12} = VTXInst;
+  let TSFlags{13} = TEXInst;
+  let TSFlags{14} = ALUInst;
+  let TSFlags{15} = LDS_1A;
+  let TSFlags{16} = LDS_1A1D;
+  let TSFlags{17} = IsExport;
+  let TSFlags{18} = LDS_1A2D;
+}
+
+//===----------------------------------------------------------------------===//
+// ALU instructions
+//===----------------------------------------------------------------------===//
+
+class R600_ALU_LDS_Word0 {
+  field bits<32> Word0;
+
+  bits<11> src0;
+  bits<1>  src0_rel;
+  bits<11> src1;
+  bits<1>  src1_rel;
+  bits<3>  index_mode = 0;
+  bits<2>  pred_sel;
+  bits<1>  last;
+
+  bits<9>  src0_sel  = src0{8-0};
+  bits<2>  src0_chan = src0{10-9};
+  bits<9>  src1_sel  = src1{8-0};
+  bits<2>  src1_chan = src1{10-9};
+
+  let Word0{8-0}   = src0_sel;
+  let Word0{9}     = src0_rel;
+  let Word0{11-10} = src0_chan;
+  let Word0{21-13} = src1_sel;
+  let Word0{22}    = src1_rel;
+  let Word0{24-23} = src1_chan;
+  let Word0{28-26} = index_mode;
+  let Word0{30-29} = pred_sel;
+  let Word0{31}    = last;
+}
+
+class R600ALU_Word0 : R600_ALU_LDS_Word0 {
+
+  bits<1>  src0_neg;
+  bits<1>  src1_neg;
+
+  let Word0{12}    = src0_neg;
+  let Word0{25}    = src1_neg;
+}
+
+class R600ALU_Word1 {
+  field bits<32> Word1;
+
+  bits<11> dst;
+  bits<3>  bank_swizzle;
+  bits<1>  dst_rel;
+  bits<1>  clamp;
+
+  bits<7>  dst_sel  = dst{6-0};
+  bits<2>  dst_chan = dst{10-9};
+
+  let Word1{20-18} = bank_swizzle;
+  let Word1{27-21} = dst_sel;
+  let Word1{28}    = dst_rel;
+  let Word1{30-29} = dst_chan;
+  let Word1{31}    = clamp;
+}
+
+class R600ALU_Word1_OP2 <bits<11> alu_inst> : R600ALU_Word1{
+
+  bits<1>  src0_abs;
+  bits<1>  src1_abs;
+  bits<1>  update_exec_mask;
+  bits<1>  update_pred;
+  bits<1>  write;
+  bits<2>  omod;
+
+  let Word1{0}     = src0_abs;
+  let Word1{1}     = src1_abs;
+  let Word1{2}     = update_exec_mask;
+  let Word1{3}     = update_pred;
+  let Word1{4}     = write;
+  let Word1{6-5}   = omod;
+  let Word1{17-7}  = alu_inst;
+}
+
+class R600ALU_Word1_OP3 <bits<5> alu_inst> : R600ALU_Word1{
+
+  bits<11> src2;
+  bits<1>  src2_rel;
+  bits<1>  src2_neg;
+
+  bits<9>  src2_sel = src2{8-0};
+  bits<2>  src2_chan = src2{10-9};
+
+  let Word1{8-0}   = src2_sel;
+  let Word1{9}     = src2_rel;
+  let Word1{11-10} = src2_chan;
+  let Word1{12}    = src2_neg;
+  let Word1{17-13} = alu_inst;
+}
+
+class R600LDS_Word1 {
+  field bits<32> Word1;
+
+  bits<11> src2;
+  bits<9>  src2_sel  = src2{8-0};
+  bits<2>  src2_chan = src2{10-9};
+  bits<1>  src2_rel;
+  // offset specifies the stride offset to the second set of data to be read
+  // from.  This is a dword offset.
+  bits<5>  alu_inst = 17; // OP3_INST_LDS_IDX_OP
+  bits<3>  bank_swizzle;
+  bits<6>  lds_op;
+  bits<2>  dst_chan = 0;
+
+  let Word1{8-0}   = src2_sel;
+  let Word1{9}     = src2_rel;
+  let Word1{11-10} = src2_chan;
+  let Word1{17-13} = alu_inst;
+  let Word1{20-18} = bank_swizzle;
+  let Word1{26-21} = lds_op;
+  let Word1{30-29} = dst_chan;
+}
+
+
+/*
+XXX: R600 subtarget uses a slightly different encoding than the other
+subtargets.  We currently handle this in R600MCCodeEmitter, but we may
+want to use these instruction classes in the future.
+
+class R600ALU_Word1_OP2_r600 : R600ALU_Word1_OP2 {
+
+  bits<1>  fog_merge;
+  bits<10> alu_inst;
+
+  let Inst{37}    = fog_merge;
+  let Inst{39-38} = omod;
+  let Inst{49-40} = alu_inst;
+}
+
+class R600ALU_Word1_OP2_r700 : R600ALU_Word1_OP2 {
+
+  bits<11> alu_inst;
+
+  let Inst{38-37} = omod;
+  let Inst{49-39} = alu_inst;
+}
+*/
+
+//===----------------------------------------------------------------------===//
+// Vertex Fetch instructions
+//===----------------------------------------------------------------------===//
+
+class VTX_WORD0 {
+  field bits<32> Word0;
+  bits<7> src_gpr;
+  bits<5> VC_INST;
+  bits<2> FETCH_TYPE;
+  bits<1> FETCH_WHOLE_QUAD;
+  bits<8> BUFFER_ID;
+  bits<1> SRC_REL;
+  bits<2> SRC_SEL_X;
+
+  let Word0{4-0}   = VC_INST;
+  let Word0{6-5}   = FETCH_TYPE;
+  let Word0{7}     = FETCH_WHOLE_QUAD;
+  let Word0{15-8}  = BUFFER_ID;
+  let Word0{22-16} = src_gpr;
+  let Word0{23}    = SRC_REL;
+  let Word0{25-24} = SRC_SEL_X;
+}
+
+class VTX_WORD0_eg : VTX_WORD0 {
+
+  bits<6> MEGA_FETCH_COUNT;
+
+  let Word0{31-26} = MEGA_FETCH_COUNT;
+}
+
+class VTX_WORD0_cm : VTX_WORD0 {
+
+  bits<2> SRC_SEL_Y;
+  bits<2> STRUCTURED_READ;
+  bits<1> LDS_REQ;
+  bits<1> COALESCED_READ;
+
+  let Word0{27-26} = SRC_SEL_Y;
+  let Word0{29-28} = STRUCTURED_READ;
+  let Word0{30}    = LDS_REQ;
+  let Word0{31}    = COALESCED_READ;
+}
+
+class VTX_WORD1_GPR {
+  field bits<32> Word1;
+  bits<7> dst_gpr;
+  bits<1> DST_REL;
+  bits<3> DST_SEL_X;
+  bits<3> DST_SEL_Y;
+  bits<3> DST_SEL_Z;
+  bits<3> DST_SEL_W;
+  bits<1> USE_CONST_FIELDS;
+  bits<6> DATA_FORMAT;
+  bits<2> NUM_FORMAT_ALL;
+  bits<1> FORMAT_COMP_ALL;
+  bits<1> SRF_MODE_ALL;
+
+  let Word1{6-0} = dst_gpr;
+  let Word1{7}    = DST_REL;
+  let Word1{8}    = 0; // Reserved
+  let Word1{11-9} = DST_SEL_X;
+  let Word1{14-12} = DST_SEL_Y;
+  let Word1{17-15} = DST_SEL_Z;
+  let Word1{20-18} = DST_SEL_W;
+  let Word1{21}    = USE_CONST_FIELDS;
+  let Word1{27-22} = DATA_FORMAT;
+  let Word1{29-28} = NUM_FORMAT_ALL;
+  let Word1{30}    = FORMAT_COMP_ALL;
+  let Word1{31}    = SRF_MODE_ALL;
+}
+
+//===----------------------------------------------------------------------===//
+// Texture fetch instructions
+//===----------------------------------------------------------------------===//
+
+class TEX_WORD0 {
+  field bits<32> Word0;
+
+  bits<5> TEX_INST;
+  bits<2> INST_MOD;
+  bits<1> FETCH_WHOLE_QUAD;
+  bits<8> RESOURCE_ID;
+  bits<7> SRC_GPR;
+  bits<1> SRC_REL;
+  bits<1> ALT_CONST;
+  bits<2> RESOURCE_INDEX_MODE;
+  bits<2> SAMPLER_INDEX_MODE;
+
+  let Word0{4-0} = TEX_INST;
+  let Word0{6-5} = INST_MOD;
+  let Word0{7} = FETCH_WHOLE_QUAD;
+  let Word0{15-8} = RESOURCE_ID;
+  let Word0{22-16} = SRC_GPR;
+  let Word0{23} = SRC_REL;
+  let Word0{24} = ALT_CONST;
+  let Word0{26-25} = RESOURCE_INDEX_MODE;
+  let Word0{28-27} = SAMPLER_INDEX_MODE;
+}
+
+class TEX_WORD1 {
+  field bits<32> Word1;
+
+  bits<7> DST_GPR;
+  bits<1> DST_REL;
+  bits<3> DST_SEL_X;
+  bits<3> DST_SEL_Y;
+  bits<3> DST_SEL_Z;
+  bits<3> DST_SEL_W;
+  bits<7> LOD_BIAS;
+  bits<1> COORD_TYPE_X;
+  bits<1> COORD_TYPE_Y;
+  bits<1> COORD_TYPE_Z;
+  bits<1> COORD_TYPE_W;
+
+  let Word1{6-0} = DST_GPR;
+  let Word1{7} = DST_REL;
+  let Word1{11-9} = DST_SEL_X;
+  let Word1{14-12} = DST_SEL_Y;
+  let Word1{17-15} = DST_SEL_Z;
+  let Word1{20-18} = DST_SEL_W;
+  let Word1{27-21} = LOD_BIAS;
+  let Word1{28} = COORD_TYPE_X;
+  let Word1{29} = COORD_TYPE_Y;
+  let Word1{30} = COORD_TYPE_Z;
+  let Word1{31} = COORD_TYPE_W;
+}
+
+class TEX_WORD2 {
+  field bits<32> Word2;
+
+  bits<5> OFFSET_X;
+  bits<5> OFFSET_Y;
+  bits<5> OFFSET_Z;
+  bits<5> SAMPLER_ID;
+  bits<3> SRC_SEL_X;
+  bits<3> SRC_SEL_Y;
+  bits<3> SRC_SEL_Z;
+  bits<3> SRC_SEL_W;
+
+  let Word2{4-0} = OFFSET_X;
+  let Word2{9-5} = OFFSET_Y;
+  let Word2{14-10} = OFFSET_Z;
+  let Word2{19-15} = SAMPLER_ID;
+  let Word2{22-20} = SRC_SEL_X;
+  let Word2{25-23} = SRC_SEL_Y;
+  let Word2{28-26} = SRC_SEL_Z;
+  let Word2{31-29} = SRC_SEL_W;
+}
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions
+//===----------------------------------------------------------------------===//
+
+class CF_WORD1_R600 {
+  field bits<32> Word1;
+
+  bits<3> POP_COUNT;
+  bits<5> CF_CONST;
+  bits<2> COND;
+  bits<3> COUNT;
+  bits<6> CALL_COUNT;
+  bits<1> COUNT_3;
+  bits<1> END_OF_PROGRAM;
+  bits<1> VALID_PIXEL_MODE;
+  bits<7> CF_INST;
+  bits<1> WHOLE_QUAD_MODE;
+  bits<1> BARRIER;
+
+  let Word1{2-0} = POP_COUNT;
+  let Word1{7-3} = CF_CONST;
+  let Word1{9-8} = COND;
+  let Word1{12-10} = COUNT;
+  let Word1{18-13} = CALL_COUNT;
+  let Word1{19} = COUNT_3;
+  let Word1{21} = END_OF_PROGRAM;
+  let Word1{22} = VALID_PIXEL_MODE;
+  let Word1{29-23} = CF_INST;
+  let Word1{30} = WHOLE_QUAD_MODE;
+  let Word1{31} = BARRIER;
+}
+
+class CF_WORD0_EG {
+  field bits<32> Word0;
+
+  bits<24> ADDR;
+  bits<3> JUMPTABLE_SEL;
+
+  let Word0{23-0} = ADDR;
+  let Word0{26-24} = JUMPTABLE_SEL;
+}
+
+class CF_WORD1_EG {
+  field bits<32> Word1;
+
+  bits<3> POP_COUNT;
+  bits<5> CF_CONST;
+  bits<2> COND;
+  bits<6> COUNT;
+  bits<1> VALID_PIXEL_MODE;
+  bits<1> END_OF_PROGRAM;
+  bits<8> CF_INST;
+  bits<1> BARRIER;
+
+  let Word1{2-0} = POP_COUNT;
+  let Word1{7-3} = CF_CONST;
+  let Word1{9-8} = COND;
+  let Word1{15-10} = COUNT;
+  let Word1{20} = VALID_PIXEL_MODE;
+  let Word1{21} = END_OF_PROGRAM;
+  let Word1{29-22} = CF_INST;
+  let Word1{31} = BARRIER;
+}
+
+class CF_ALU_WORD0 {
+  field bits<32> Word0;
+
+  bits<22> ADDR;
+  bits<4> KCACHE_BANK0;
+  bits<4> KCACHE_BANK1;
+  bits<2> KCACHE_MODE0;
+
+  let Word0{21-0} = ADDR;
+  let Word0{25-22} = KCACHE_BANK0;
+  let Word0{29-26} = KCACHE_BANK1;
+  let Word0{31-30} = KCACHE_MODE0;
+}
+
+class CF_ALU_WORD1 {
+  field bits<32> Word1;
+
+  bits<2> KCACHE_MODE1;
+  bits<8> KCACHE_ADDR0;
+  bits<8> KCACHE_ADDR1;
+  bits<7> COUNT;
+  bits<1> ALT_CONST;
+  bits<4> CF_INST;
+  bits<1> WHOLE_QUAD_MODE;
+  bits<1> BARRIER;
+
+  let Word1{1-0} = KCACHE_MODE1;
+  let Word1{9-2} = KCACHE_ADDR0;
+  let Word1{17-10} = KCACHE_ADDR1;
+  let Word1{24-18} = COUNT;
+  let Word1{25} = ALT_CONST;
+  let Word1{29-26} = CF_INST;
+  let Word1{30} = WHOLE_QUAD_MODE;
+  let Word1{31} = BARRIER;
+}
+
+class CF_ALLOC_EXPORT_WORD0_RAT {
+  field bits<32> Word0;
+
+  bits<4> rat_id;
+  bits<6> rat_inst;
+  bits<2> rim;
+  bits<2> type;
+  bits<7> rw_gpr;
+  bits<1> rw_rel;
+  bits<7> index_gpr;
+  bits<2> elem_size;
+
+  let Word0{3-0}   = rat_id;
+  let Word0{9-4}   = rat_inst;
+  let Word0{10}    = 0; // Reserved
+  let Word0{12-11} = rim;
+  let Word0{14-13} = type;
+  let Word0{21-15} = rw_gpr;
+  let Word0{22}    = rw_rel;
+  let Word0{29-23} = index_gpr;
+  let Word0{31-30} = elem_size;
+}
+
+class CF_ALLOC_EXPORT_WORD1_BUF {
+  field bits<32> Word1;
+
+  bits<12> array_size;
+  bits<4>  comp_mask;
+  bits<4>  burst_count;
+  bits<1>  vpm;
+  bits<1>  eop;
+  bits<8>  cf_inst;
+  bits<1>  mark;
+  bits<1>  barrier;
+
+  let Word1{11-0} = array_size;
+  let Word1{15-12} = comp_mask;
+  let Word1{19-16} = burst_count;
+  let Word1{20}    = vpm;
+  let Word1{21}    = eop;
+  let Word1{29-22} = cf_inst;
+  let Word1{30}    = mark;
+  let Word1{31}    = barrier;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
new file mode 100644
index 0000000..5ef883c
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -0,0 +1,1435 @@
+//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief R600 Implementation of TargetInstrInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600InstrInfo.h"
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "AMDGPUTargetMachine.h"
+#include "R600Defines.h"
+#include "R600MachineFunctionInfo.h"
+#include "R600RegisterInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+#define GET_INSTRINFO_CTOR_DTOR
+#include "AMDGPUGenDFAPacketizer.inc"
+
+R600InstrInfo::R600InstrInfo(const AMDGPUSubtarget &st)
+    : AMDGPUInstrInfo(st), RI() {}
+
+const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const {
+  return RI;
+}
+
+bool R600InstrInfo::isTrig(const MachineInstr &MI) const {
+  return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
+}
+
+bool R600InstrInfo::isVector(const MachineInstr &MI) const {
+  return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
+}
+
+void
+R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MI, DebugLoc DL,
+                           unsigned DestReg, unsigned SrcReg,
+                           bool KillSrc) const {
+  unsigned VectorComponents = 0;
+  if ((AMDGPU::R600_Reg128RegClass.contains(DestReg) ||
+      AMDGPU::R600_Reg128VerticalRegClass.contains(DestReg)) &&
+      (AMDGPU::R600_Reg128RegClass.contains(SrcReg) ||
+       AMDGPU::R600_Reg128VerticalRegClass.contains(SrcReg))) {
+    VectorComponents = 4;
+  } else if((AMDGPU::R600_Reg64RegClass.contains(DestReg) ||
+            AMDGPU::R600_Reg64VerticalRegClass.contains(DestReg)) &&
+            (AMDGPU::R600_Reg64RegClass.contains(SrcReg) ||
+             AMDGPU::R600_Reg64VerticalRegClass.contains(SrcReg))) {
+    VectorComponents = 2;
+  }
+
+  if (VectorComponents > 0) {
+    for (unsigned I = 0; I < VectorComponents; I++) {
+      unsigned SubRegIndex = RI.getSubRegFromChannel(I);
+      buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
+                              RI.getSubReg(DestReg, SubRegIndex),
+                              RI.getSubReg(SrcReg, SubRegIndex))
+                              .addReg(DestReg,
+                                      RegState::Define | RegState::Implicit);
+    }
+  } else {
+    MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
+                                                  DestReg, SrcReg);
+    NewMI->getOperand(getOperandIdx(*NewMI, AMDGPU::OpName::src0))
+                                    .setIsKill(KillSrc);
+  }
+}
+
+/// \returns true if \p MBBI can be moved into a new basic.
+bool R600InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MBBI) const {
+  for (MachineInstr::const_mop_iterator I = MBBI->operands_begin(),
+                                        E = MBBI->operands_end(); I != E; ++I) {
+    if (I->isReg() && !TargetRegisterInfo::isVirtualRegister(I->getReg()) &&
+        I->isUse() && RI.isPhysRegLiveAcrossClauses(I->getReg()))
+      return false;
+  }
+  return true;
+}
+
+bool R600InstrInfo::isMov(unsigned Opcode) const {
+
+
+  switch(Opcode) {
+  default: return false;
+  case AMDGPU::MOV:
+  case AMDGPU::MOV_IMM_F32:
+  case AMDGPU::MOV_IMM_I32:
+    return true;
+  }
+}
+
+// Some instructions act as place holders to emulate operations that the GPU
+// hardware does automatically. This function can be used to check if
+// an opcode falls into this category.
+bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const {
+  switch (Opcode) {
+  default: return false;
+  case AMDGPU::RETURN:
+    return true;
+  }
+}
+
+bool R600InstrInfo::isReductionOp(unsigned Opcode) const {
+  return false;
+}
+
+bool R600InstrInfo::isCubeOp(unsigned Opcode) const {
+  switch(Opcode) {
+    default: return false;
+    case AMDGPU::CUBE_r600_pseudo:
+    case AMDGPU::CUBE_r600_real:
+    case AMDGPU::CUBE_eg_pseudo:
+    case AMDGPU::CUBE_eg_real:
+      return true;
+  }
+}
+
+bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
+  unsigned TargetFlags = get(Opcode).TSFlags;
+
+  return (TargetFlags & R600_InstFlag::ALU_INST);
+}
+
+bool R600InstrInfo::hasInstrModifiers(unsigned Opcode) const {
+  unsigned TargetFlags = get(Opcode).TSFlags;
+
+  return ((TargetFlags & R600_InstFlag::OP1) |
+          (TargetFlags & R600_InstFlag::OP2) |
+          (TargetFlags & R600_InstFlag::OP3));
+}
+
+bool R600InstrInfo::isLDSInstr(unsigned Opcode) const {
+  unsigned TargetFlags = get(Opcode).TSFlags;
+
+  return ((TargetFlags & R600_InstFlag::LDS_1A) |
+          (TargetFlags & R600_InstFlag::LDS_1A1D) |
+          (TargetFlags & R600_InstFlag::LDS_1A2D));
+}
+
+bool R600InstrInfo::isLDSNoRetInstr(unsigned Opcode) const {
+  return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) == -1;
+}
+
+bool R600InstrInfo::isLDSRetInstr(unsigned Opcode) const {
+  return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) != -1;
+}
+
+bool R600InstrInfo::canBeConsideredALU(const MachineInstr *MI) const {
+  if (isALUInstr(MI->getOpcode()))
+    return true;
+  if (isVector(*MI) || isCubeOp(MI->getOpcode()))
+    return true;
+  switch (MI->getOpcode()) {
+  case AMDGPU::PRED_X:
+  case AMDGPU::INTERP_PAIR_XY:
+  case AMDGPU::INTERP_PAIR_ZW:
+  case AMDGPU::INTERP_VEC_LOAD:
+  case AMDGPU::COPY:
+  case AMDGPU::DOT_4:
+    return true;
+  default:
+    return false;
+  }
+}
+
+bool R600InstrInfo::isTransOnly(unsigned Opcode) const {
+  if (ST.hasCaymanISA())
+    return false;
+  return (get(Opcode).getSchedClass() == AMDGPU::Sched::TransALU);
+}
+
+bool R600InstrInfo::isTransOnly(const MachineInstr *MI) const {
+  return isTransOnly(MI->getOpcode());
+}
+
+bool R600InstrInfo::isVectorOnly(unsigned Opcode) const {
+  return (get(Opcode).getSchedClass() == AMDGPU::Sched::VecALU);
+}
+
+bool R600InstrInfo::isVectorOnly(const MachineInstr *MI) const {
+  return isVectorOnly(MI->getOpcode());
+}
+
+bool R600InstrInfo::isExport(unsigned Opcode) const {
+  return (get(Opcode).TSFlags & R600_InstFlag::IS_EXPORT);
+}
+
+bool R600InstrInfo::usesVertexCache(unsigned Opcode) const {
+  return ST.hasVertexCache() && IS_VTX(get(Opcode));
+}
+
+bool R600InstrInfo::usesVertexCache(const MachineInstr *MI) const {
+  const MachineFunction *MF = MI->getParent()->getParent();
+  const R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
+  return MFI->getShaderType() != ShaderType::COMPUTE &&
+    usesVertexCache(MI->getOpcode());
+}
+
+bool R600InstrInfo::usesTextureCache(unsigned Opcode) const {
+  return (!ST.hasVertexCache() && IS_VTX(get(Opcode))) || IS_TEX(get(Opcode));
+}
+
+bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const {
+  const MachineFunction *MF = MI->getParent()->getParent();
+  const R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
+  return (MFI->getShaderType() == ShaderType::COMPUTE &&
+          usesVertexCache(MI->getOpcode())) ||
+    usesTextureCache(MI->getOpcode());
+}
+
+bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
+  switch (Opcode) {
+  case AMDGPU::KILLGT:
+  case AMDGPU::GROUP_BARRIER:
+    return true;
+  default:
+    return false;
+  }
+}
+
+bool R600InstrInfo::usesAddressRegister(MachineInstr *MI) const {
+  return  MI->findRegisterUseOperandIdx(AMDGPU::AR_X) != -1;
+}
+
+bool R600InstrInfo::definesAddressRegister(MachineInstr *MI) const {
+  return MI->findRegisterDefOperandIdx(AMDGPU::AR_X) != -1;
+}
+
+bool R600InstrInfo::readsLDSSrcReg(const MachineInstr *MI) const {
+  if (!isALUInstr(MI->getOpcode())) {
+    return false;
+  }
+  for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
+                                        E = MI->operands_end(); I != E; ++I) {
+    if (!I->isReg() || !I->isUse() ||
+        TargetRegisterInfo::isVirtualRegister(I->getReg()))
+      continue;
+
+    if (AMDGPU::R600_LDS_SRC_REGRegClass.contains(I->getReg()))
+      return true;
+  }
+  return false;
+}
+
+int R600InstrInfo::getSrcIdx(unsigned Opcode, unsigned SrcNum) const {
+  static const unsigned OpTable[] = {
+    AMDGPU::OpName::src0,
+    AMDGPU::OpName::src1,
+    AMDGPU::OpName::src2
+  };
+
+  assert (SrcNum < 3);
+  return getOperandIdx(Opcode, OpTable[SrcNum]);
+}
+
+int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const {
+  static const unsigned SrcSelTable[][2] = {
+    {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
+    {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
+    {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
+    {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
+    {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
+    {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
+    {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
+    {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
+    {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
+    {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
+    {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}
+  };
+
+  for (const auto &Row : SrcSelTable) {
+    if (getOperandIdx(Opcode, Row[0]) == (int)SrcIdx) {
+      return getOperandIdx(Opcode, Row[1]);
+    }
+  }
+  return -1;
+}
+
+SmallVector<std::pair<MachineOperand *, int64_t>, 3>
+R600InstrInfo::getSrcs(MachineInstr *MI) const {
+  SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result;
+
+  if (MI->getOpcode() == AMDGPU::DOT_4) {
+    static const unsigned OpTable[8][2] = {
+      {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
+      {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
+      {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
+      {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
+      {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
+      {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
+      {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
+      {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W},
+    };
+
+    for (unsigned j = 0; j < 8; j++) {
+      MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(),
+                                                        OpTable[j][0]));
+      unsigned Reg = MO.getReg();
+      if (Reg == AMDGPU::ALU_CONST) {
+        unsigned Sel = MI->getOperand(getOperandIdx(MI->getOpcode(),
+                                                    OpTable[j][1])).getImm();
+        Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel));
+        continue;
+      }
+
+    }
+    return Result;
+  }
+
+  static const unsigned OpTable[3][2] = {
+    {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
+    {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
+    {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
+  };
+
+  for (unsigned j = 0; j < 3; j++) {
+    int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]);
+    if (SrcIdx < 0)
+      break;
+    MachineOperand &MO = MI->getOperand(SrcIdx);
+    unsigned Reg = MI->getOperand(SrcIdx).getReg();
+    if (Reg == AMDGPU::ALU_CONST) {
+      unsigned Sel = MI->getOperand(
+          getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
+      Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel));
+      continue;
+    }
+    if (Reg == AMDGPU::ALU_LITERAL_X) {
+      unsigned Imm = MI->getOperand(
+          getOperandIdx(MI->getOpcode(), AMDGPU::OpName::literal)).getImm();
+      Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Imm));
+      continue;
+    }
+    Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, 0));
+  }
+  return Result;
+}
+
+std::vector<std::pair<int, unsigned> >
+R600InstrInfo::ExtractSrcs(MachineInstr *MI,
+                           const DenseMap<unsigned, unsigned> &PV,
+                           unsigned &ConstCount) const {
+  ConstCount = 0;
+  ArrayRef<std::pair<MachineOperand *, int64_t>> Srcs = getSrcs(MI);
+  const std::pair<int, unsigned> DummyPair(-1, 0);
+  std::vector<std::pair<int, unsigned> > Result;
+  unsigned i = 0;
+  for (unsigned n = Srcs.size(); i < n; ++i) {
+    unsigned Reg = Srcs[i].first->getReg();
+    unsigned Index = RI.getEncodingValue(Reg) & 0xff;
+    if (Reg == AMDGPU::OQAP) {
+      Result.push_back(std::pair<int, unsigned>(Index, 0));
+    }
+    if (PV.find(Reg) != PV.end()) {
+      // 255 is used to tells its a PS/PV reg
+      Result.push_back(std::pair<int, unsigned>(255, 0));
+      continue;
+    }
+    if (Index > 127) {
+      ConstCount++;
+      Result.push_back(DummyPair);
+      continue;
+    }
+    unsigned Chan = RI.getHWRegChan(Reg);
+    Result.push_back(std::pair<int, unsigned>(Index, Chan));
+  }
+  for (; i < 3; ++i)
+    Result.push_back(DummyPair);
+  return Result;
+}
+
+static std::vector<std::pair<int, unsigned> >
+Swizzle(std::vector<std::pair<int, unsigned> > Src,
+        R600InstrInfo::BankSwizzle Swz) {
+  if (Src[0] == Src[1])
+    Src[1].first = -1;
+  switch (Swz) {
+  case R600InstrInfo::ALU_VEC_012_SCL_210:
+    break;
+  case R600InstrInfo::ALU_VEC_021_SCL_122:
+    std::swap(Src[1], Src[2]);
+    break;
+  case R600InstrInfo::ALU_VEC_102_SCL_221:
+    std::swap(Src[0], Src[1]);
+    break;
+  case R600InstrInfo::ALU_VEC_120_SCL_212:
+    std::swap(Src[0], Src[1]);
+    std::swap(Src[0], Src[2]);
+    break;
+  case R600InstrInfo::ALU_VEC_201:
+    std::swap(Src[0], Src[2]);
+    std::swap(Src[0], Src[1]);
+    break;
+  case R600InstrInfo::ALU_VEC_210:
+    std::swap(Src[0], Src[2]);
+    break;
+  }
+  return Src;
+}
+
+static unsigned
+getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) {
+  switch (Swz) {
+  case R600InstrInfo::ALU_VEC_012_SCL_210: {
+    unsigned Cycles[3] = { 2, 1, 0};
+    return Cycles[Op];
+  }
+  case R600InstrInfo::ALU_VEC_021_SCL_122: {
+    unsigned Cycles[3] = { 1, 2, 2};
+    return Cycles[Op];
+  }
+  case R600InstrInfo::ALU_VEC_120_SCL_212: {
+    unsigned Cycles[3] = { 2, 1, 2};
+    return Cycles[Op];
+  }
+  case R600InstrInfo::ALU_VEC_102_SCL_221: {
+    unsigned Cycles[3] = { 2, 2, 1};
+    return Cycles[Op];
+  }
+  default:
+    llvm_unreachable("Wrong Swizzle for Trans Slot");
+    return 0;
+  }
+}
+
+/// returns how many MIs (whose inputs are represented by IGSrcs) can be packed
+/// in the same Instruction Group while meeting read port limitations given a
+/// Swz swizzle sequence.
+unsigned  R600InstrInfo::isLegalUpTo(
+    const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
+    const std::vector<R600InstrInfo::BankSwizzle> &Swz,
+    const std::vector<std::pair<int, unsigned> > &TransSrcs,
+    R600InstrInfo::BankSwizzle TransSwz) const {
+  int Vector[4][3];
+  memset(Vector, -1, sizeof(Vector));
+  for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) {
+    const std::vector<std::pair<int, unsigned> > &Srcs =
+        Swizzle(IGSrcs[i], Swz[i]);
+    for (unsigned j = 0; j < 3; j++) {
+      const std::pair<int, unsigned> &Src = Srcs[j];
+      if (Src.first < 0 || Src.first == 255)
+        continue;
+      if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) {
+        if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 &&
+            Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_122) {
+            // The value from output queue A (denoted by register OQAP) can
+            // only be fetched during the first cycle.
+            return false;
+        }
+        // OQAP does not count towards the normal read port restrictions
+        continue;
+      }
+      if (Vector[Src.second][j] < 0)
+        Vector[Src.second][j] = Src.first;
+      if (Vector[Src.second][j] != Src.first)
+        return i;
+    }
+  }
+  // Now check Trans Alu
+  for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) {
+    const std::pair<int, unsigned> &Src = TransSrcs[i];
+    unsigned Cycle = getTransSwizzle(TransSwz, i);
+    if (Src.first < 0)
+      continue;
+    if (Src.first == 255)
+      continue;
+    if (Vector[Src.second][Cycle] < 0)
+      Vector[Src.second][Cycle] = Src.first;
+    if (Vector[Src.second][Cycle] != Src.first)
+      return IGSrcs.size() - 1;
+  }
+  return IGSrcs.size();
+}
+
+/// Given a swizzle sequence SwzCandidate and an index Idx, returns the next
+/// (in lexicographic term) swizzle sequence assuming that all swizzles after
+/// Idx can be skipped
+static bool
+NextPossibleSolution(
+    std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
+    unsigned Idx) {
+  assert(Idx < SwzCandidate.size());
+  int ResetIdx = Idx;
+  while (ResetIdx > -1 && SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210)
+    ResetIdx --;
+  for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); i < e; i++) {
+    SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210;
+  }
+  if (ResetIdx == -1)
+    return false;
+  int NextSwizzle = SwzCandidate[ResetIdx] + 1;
+  SwzCandidate[ResetIdx] = (R600InstrInfo::BankSwizzle)NextSwizzle;
+  return true;
+}
+
+/// Enumerate all possible Swizzle sequence to find one that can meet all
+/// read port requirements.
+bool R600InstrInfo::FindSwizzleForVectorSlot(
+    const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
+    std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
+    const std::vector<std::pair<int, unsigned> > &TransSrcs,
+    R600InstrInfo::BankSwizzle TransSwz) const {
+  unsigned ValidUpTo = 0;
+  do {
+    ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz);
+    if (ValidUpTo == IGSrcs.size())
+      return true;
+  } while (NextPossibleSolution(SwzCandidate, ValidUpTo));
+  return false;
+}
+
+/// Instructions in Trans slot can't read gpr at cycle 0 if they also read
+/// a const, and can't read a gpr at cycle 1 if they read 2 const.
+static bool
+isConstCompatible(R600InstrInfo::BankSwizzle TransSwz,
+                  const std::vector<std::pair<int, unsigned> > &TransOps,
+                  unsigned ConstCount) {
+  // TransALU can't read 3 constants
+  if (ConstCount > 2)
+    return false;
+  for (unsigned i = 0, e = TransOps.size(); i < e; ++i) {
+    const std::pair<int, unsigned> &Src = TransOps[i];
+    unsigned Cycle = getTransSwizzle(TransSwz, i);
+    if (Src.first < 0)
+      continue;
+    if (ConstCount > 0 && Cycle == 0)
+      return false;
+    if (ConstCount > 1 && Cycle == 1)
+      return false;
+  }
+  return true;
+}
+
+bool
+R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
+                                       const DenseMap<unsigned, unsigned> &PV,
+                                       std::vector<BankSwizzle> &ValidSwizzle,
+                                       bool isLastAluTrans)
+    const {
+  //Todo : support shared src0 - src1 operand
+
+  std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs;
+  ValidSwizzle.clear();
+  unsigned ConstCount;
+  BankSwizzle TransBS = ALU_VEC_012_SCL_210;
+  for (unsigned i = 0, e = IG.size(); i < e; ++i) {
+    IGSrcs.push_back(ExtractSrcs(IG[i], PV, ConstCount));
+    unsigned Op = getOperandIdx(IG[i]->getOpcode(),
+        AMDGPU::OpName::bank_swizzle);
+    ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)
+        IG[i]->getOperand(Op).getImm());
+  }
+  std::vector<std::pair<int, unsigned> > TransOps;
+  if (!isLastAluTrans)
+    return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS);
+
+  TransOps = std::move(IGSrcs.back());
+  IGSrcs.pop_back();
+  ValidSwizzle.pop_back();
+
+  static const R600InstrInfo::BankSwizzle TransSwz[] = {
+    ALU_VEC_012_SCL_210,
+    ALU_VEC_021_SCL_122,
+    ALU_VEC_120_SCL_212,
+    ALU_VEC_102_SCL_221
+  };
+  for (unsigned i = 0; i < 4; i++) {
+    TransBS = TransSwz[i];
+    if (!isConstCompatible(TransBS, TransOps, ConstCount))
+      continue;
+    bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps,
+        TransBS);
+    if (Result) {
+      ValidSwizzle.push_back(TransBS);
+      return true;
+    }
+  }
+
+  return false;
+}
+
+
+bool
+R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
+    const {
+  assert (Consts.size() <= 12 && "Too many operands in instructions group");
+  unsigned Pair1 = 0, Pair2 = 0;
+  for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
+    unsigned ReadConstHalf = Consts[i] & 2;
+    unsigned ReadConstIndex = Consts[i] & (~3);
+    unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf;
+    if (!Pair1) {
+      Pair1 = ReadHalfConst;
+      continue;
+    }
+    if (Pair1 == ReadHalfConst)
+      continue;
+    if (!Pair2) {
+      Pair2 = ReadHalfConst;
+      continue;
+    }
+    if (Pair2 != ReadHalfConst)
+      return false;
+  }
+  return true;
+}
+
+bool
+R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs)
+    const {
+  std::vector<unsigned> Consts;
+  SmallSet<int64_t, 4> Literals;
+  for (unsigned i = 0, n = MIs.size(); i < n; i++) {
+    MachineInstr *MI = MIs[i];
+    if (!isALUInstr(MI->getOpcode()))
+      continue;
+
+    ArrayRef<std::pair<MachineOperand *, int64_t>> Srcs = getSrcs(MI);
+
+    for (unsigned j = 0, e = Srcs.size(); j < e; j++) {
+      std::pair<MachineOperand *, unsigned> Src = Srcs[j];
+      if (Src.first->getReg() == AMDGPU::ALU_LITERAL_X)
+        Literals.insert(Src.second);
+      if (Literals.size() > 4)
+        return false;
+      if (Src.first->getReg() == AMDGPU::ALU_CONST)
+        Consts.push_back(Src.second);
+      if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) ||
+          AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) {
+        unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff;
+        unsigned Chan = RI.getHWRegChan(Src.first->getReg());
+        Consts.push_back((Index << 2) | Chan);
+      }
+    }
+  }
+  return fitsConstReadLimitations(Consts);
+}
+
+DFAPacketizer *
+R600InstrInfo::CreateTargetScheduleState(const TargetSubtargetInfo &STI) const {
+  const InstrItineraryData *II = STI.getInstrItineraryData();
+  return static_cast<const AMDGPUSubtarget &>(STI).createDFAPacketizer(II);
+}
+
+static bool
+isPredicateSetter(unsigned Opcode) {
+  switch (Opcode) {
+  case AMDGPU::PRED_X:
+    return true;
+  default:
+    return false;
+  }
+}
+
+static MachineInstr *
+findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator I) {
+  while (I != MBB.begin()) {
+    --I;
+    MachineInstr *MI = I;
+    if (isPredicateSetter(MI->getOpcode()))
+      return MI;
+  }
+
+  return nullptr;
+}
+
+static
+bool isJump(unsigned Opcode) {
+  return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND;
+}
+
+static bool isBranch(unsigned Opcode) {
+  return Opcode == AMDGPU::BRANCH || Opcode == AMDGPU::BRANCH_COND_i32 ||
+      Opcode == AMDGPU::BRANCH_COND_f32;
+}
+
+bool
+R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+                             MachineBasicBlock *&TBB,
+                             MachineBasicBlock *&FBB,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             bool AllowModify) const {
+  // Most of the following comes from the ARM implementation of AnalyzeBranch
+
+  // If the block has no terminators, it just falls into the block after it.
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin())
+    return false;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return false;
+    --I;
+  }
+  // AMDGPU::BRANCH* instructions are only available after isel and are not
+  // handled
+  if (isBranch(I->getOpcode()))
+    return true;
+  if (!isJump(static_cast<MachineInstr *>(I)->getOpcode())) {
+    return false;
+  }
+
+  // Remove successive JUMP
+  while (I != MBB.begin() && std::prev(I)->getOpcode() == AMDGPU::JUMP) {
+      MachineBasicBlock::iterator PriorI = std::prev(I);
+      if (AllowModify)
+        I->removeFromParent();
+      I = PriorI;
+  }
+  MachineInstr *LastInst = I;
+
+  // If there is only one terminator instruction, process it.
+  unsigned LastOpc = LastInst->getOpcode();
+  if (I == MBB.begin() ||
+          !isJump(static_cast<MachineInstr *>(--I)->getOpcode())) {
+    if (LastOpc == AMDGPU::JUMP) {
+      TBB = LastInst->getOperand(0).getMBB();
+      return false;
+    } else if (LastOpc == AMDGPU::JUMP_COND) {
+      MachineInstr *predSet = I;
+      while (!isPredicateSetter(predSet->getOpcode())) {
+        predSet = --I;
+      }
+      TBB = LastInst->getOperand(0).getMBB();
+      Cond.push_back(predSet->getOperand(1));
+      Cond.push_back(predSet->getOperand(2));
+      Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
+      return false;
+    }
+    return true;  // Can't handle indirect branch.
+  }
+
+  // Get the instruction before it if it is a terminator.
+  MachineInstr *SecondLastInst = I;
+  unsigned SecondLastOpc = SecondLastInst->getOpcode();
+
+  // If the block ends with a B and a Bcc, handle it.
+  if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) {
+    MachineInstr *predSet = --I;
+    while (!isPredicateSetter(predSet->getOpcode())) {
+      predSet = --I;
+    }
+    TBB = SecondLastInst->getOperand(0).getMBB();
+    FBB = LastInst->getOperand(0).getMBB();
+    Cond.push_back(predSet->getOperand(1));
+    Cond.push_back(predSet->getOperand(2));
+    Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
+    return false;
+  }
+
+  // Otherwise, can't handle this.
+  return true;
+}
+
+static
+MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) {
+  for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend();
+      It != E; ++It) {
+    if (It->getOpcode() == AMDGPU::CF_ALU ||
+        It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
+      return std::prev(It.base());
+  }
+  return MBB.end();
+}
+
+unsigned
+R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
+                            MachineBasicBlock *TBB,
+                            MachineBasicBlock *FBB,
+                            ArrayRef<MachineOperand> Cond,
+                            DebugLoc DL) const {
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+
+  if (!FBB) {
+    if (Cond.empty()) {
+      BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB);
+      return 1;
+    } else {
+      MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
+      assert(PredSet && "No previous predicate !");
+      addFlag(PredSet, 0, MO_FLAG_PUSH);
+      PredSet->getOperand(2).setImm(Cond[1].getImm());
+
+      BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
+             .addMBB(TBB)
+             .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+      MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
+      if (CfAlu == MBB.end())
+        return 1;
+      assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
+      CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
+      return 1;
+    }
+  } else {
+    MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
+    assert(PredSet && "No previous predicate !");
+    addFlag(PredSet, 0, MO_FLAG_PUSH);
+    PredSet->getOperand(2).setImm(Cond[1].getImm());
+    BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
+            .addMBB(TBB)
+            .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+    BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB);
+    MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
+    if (CfAlu == MBB.end())
+      return 2;
+    assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
+    CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
+    return 2;
+  }
+}
+
+unsigned
+R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+
+  // Note : we leave PRED* instructions there.
+  // They may be needed when predicating instructions.
+
+  MachineBasicBlock::iterator I = MBB.end();
+
+  if (I == MBB.begin()) {
+    return 0;
+  }
+  --I;
+  switch (I->getOpcode()) {
+  default:
+    return 0;
+  case AMDGPU::JUMP_COND: {
+    MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
+    clearFlag(predSet, 0, MO_FLAG_PUSH);
+    I->eraseFromParent();
+    MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
+    if (CfAlu == MBB.end())
+      break;
+    assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
+    CfAlu->setDesc(get(AMDGPU::CF_ALU));
+    break;
+  }
+  case AMDGPU::JUMP:
+    I->eraseFromParent();
+    break;
+  }
+  I = MBB.end();
+
+  if (I == MBB.begin()) {
+    return 1;
+  }
+  --I;
+  switch (I->getOpcode()) {
+    // FIXME: only one case??
+  default:
+    return 1;
+  case AMDGPU::JUMP_COND: {
+    MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
+    clearFlag(predSet, 0, MO_FLAG_PUSH);
+    I->eraseFromParent();
+    MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
+    if (CfAlu == MBB.end())
+      break;
+    assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
+    CfAlu->setDesc(get(AMDGPU::CF_ALU));
+    break;
+  }
+  case AMDGPU::JUMP:
+    I->eraseFromParent();
+    break;
+  }
+  return 2;
+}
+
+bool
+R600InstrInfo::isPredicated(const MachineInstr *MI) const {
+  int idx = MI->findFirstPredOperandIdx();
+  if (idx < 0)
+    return false;
+
+  unsigned Reg = MI->getOperand(idx).getReg();
+  switch (Reg) {
+  default: return false;
+  case AMDGPU::PRED_SEL_ONE:
+  case AMDGPU::PRED_SEL_ZERO:
+  case AMDGPU::PREDICATE_BIT:
+    return true;
+  }
+}
+
+bool
+R600InstrInfo::isPredicable(MachineInstr *MI) const {
+  // XXX: KILL* instructions can be predicated, but they must be the last
+  // instruction in a clause, so this means any instructions after them cannot
+  // be predicated.  Until we have proper support for instruction clauses in the
+  // backend, we will mark KILL* instructions as unpredicable.
+
+  if (MI->getOpcode() == AMDGPU::KILLGT) {
+    return false;
+  } else if (MI->getOpcode() == AMDGPU::CF_ALU) {
+    // If the clause start in the middle of MBB then the MBB has more
+    // than a single clause, unable to predicate several clauses.
+    if (MI->getParent()->begin() != MachineBasicBlock::iterator(MI))
+      return false;
+    // TODO: We don't support KC merging atm
+    if (MI->getOperand(3).getImm() != 0 || MI->getOperand(4).getImm() != 0)
+      return false;
+    return true;
+  } else if (isVector(*MI)) {
+    return false;
+  } else {
+    return AMDGPUInstrInfo::isPredicable(MI);
+  }
+}
+
+
+bool
+R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
+                                   unsigned NumCyles,
+                                   unsigned ExtraPredCycles,
+                                   const BranchProbability &Probability) const{
+  return true;
+}
+
+bool
+R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
+                                   unsigned NumTCycles,
+                                   unsigned ExtraTCycles,
+                                   MachineBasicBlock &FMBB,
+                                   unsigned NumFCycles,
+                                   unsigned ExtraFCycles,
+                                   const BranchProbability &Probability) const {
+  return true;
+}
+
+bool
+R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
+                                         unsigned NumCyles,
+                                         const BranchProbability &Probability)
+                                         const {
+  return true;
+}
+
+bool
+R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+                                         MachineBasicBlock &FMBB) const {
+  return false;
+}
+
+
+bool
+R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+  MachineOperand &MO = Cond[1];
+  switch (MO.getImm()) {
+  case OPCODE_IS_ZERO_INT:
+    MO.setImm(OPCODE_IS_NOT_ZERO_INT);
+    break;
+  case OPCODE_IS_NOT_ZERO_INT:
+    MO.setImm(OPCODE_IS_ZERO_INT);
+    break;
+  case OPCODE_IS_ZERO:
+    MO.setImm(OPCODE_IS_NOT_ZERO);
+    break;
+  case OPCODE_IS_NOT_ZERO:
+    MO.setImm(OPCODE_IS_ZERO);
+    break;
+  default:
+    return true;
+  }
+
+  MachineOperand &MO2 = Cond[2];
+  switch (MO2.getReg()) {
+  case AMDGPU::PRED_SEL_ZERO:
+    MO2.setReg(AMDGPU::PRED_SEL_ONE);
+    break;
+  case AMDGPU::PRED_SEL_ONE:
+    MO2.setReg(AMDGPU::PRED_SEL_ZERO);
+    break;
+  default:
+    return true;
+  }
+  return false;
+}
+
+bool
+R600InstrInfo::DefinesPredicate(MachineInstr *MI,
+                                std::vector<MachineOperand> &Pred) const {
+  return isPredicateSetter(MI->getOpcode());
+}
+
+
+bool
+R600InstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
+                                 ArrayRef<MachineOperand> Pred2) const {
+  return false;
+}
+
+
+bool
+R600InstrInfo::PredicateInstruction(MachineInstr *MI,
+                                    ArrayRef<MachineOperand> Pred) const {
+  int PIdx = MI->findFirstPredOperandIdx();
+
+  if (MI->getOpcode() == AMDGPU::CF_ALU) {
+    MI->getOperand(8).setImm(0);
+    return true;
+  }
+
+  if (MI->getOpcode() == AMDGPU::DOT_4) {
+    MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_X))
+        .setReg(Pred[2].getReg());
+    MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Y))
+        .setReg(Pred[2].getReg());
+    MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Z))
+        .setReg(Pred[2].getReg());
+    MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_W))
+        .setReg(Pred[2].getReg());
+    MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
+    MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
+    return true;
+  }
+
+  if (PIdx != -1) {
+    MachineOperand &PMO = MI->getOperand(PIdx);
+    PMO.setReg(Pred[2].getReg());
+    MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
+    MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
+    return true;
+  }
+
+  return false;
+}
+
+unsigned int R600InstrInfo::getPredicationCost(const MachineInstr *) const {
+  return 2;
+}
+
+unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+                                            const MachineInstr *MI,
+                                            unsigned *PredCost) const {
+  if (PredCost)
+    *PredCost = 2;
+  return 2;
+}
+
+bool R600InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+
+  switch(MI->getOpcode()) {
+  default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
+  case AMDGPU::R600_EXTRACT_ELT_V2:
+  case AMDGPU::R600_EXTRACT_ELT_V4:
+    buildIndirectRead(MI->getParent(), MI, MI->getOperand(0).getReg(),
+                      RI.getHWRegIndex(MI->getOperand(1).getReg()), //  Address
+                      MI->getOperand(2).getReg(),
+                      RI.getHWRegChan(MI->getOperand(1).getReg()));
+    break;
+  case AMDGPU::R600_INSERT_ELT_V2:
+  case AMDGPU::R600_INSERT_ELT_V4:
+    buildIndirectWrite(MI->getParent(), MI, MI->getOperand(2).getReg(), // Value
+                       RI.getHWRegIndex(MI->getOperand(1).getReg()),  // Address
+                       MI->getOperand(3).getReg(),                    // Offset
+                       RI.getHWRegChan(MI->getOperand(1).getReg()));  // Channel
+    break;
+  }
+  MI->eraseFromParent();
+  return true;
+}
+
+void  R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved,
+                                             const MachineFunction &MF) const {
+  const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering *>(
+      MF.getSubtarget().getFrameLowering());
+
+  unsigned StackWidth = TFL->getStackWidth(MF);
+  int End = getIndirectIndexEnd(MF);
+
+  if (End == -1)
+    return;
+
+  for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) {
+    unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index);
+    Reserved.set(SuperReg);
+    for (unsigned Chan = 0; Chan < StackWidth; ++Chan) {
+      unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan);
+      Reserved.set(Reg);
+    }
+  }
+}
+
+unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex,
+                                                 unsigned Channel) const {
+  // XXX: Remove when we support a stack width > 2
+  assert(Channel == 0);
+  return RegIndex;
+}
+
+const TargetRegisterClass *R600InstrInfo::getIndirectAddrRegClass() const {
+  return &AMDGPU::R600_TReg32_XRegClass;
+}
+
+MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
+                                       MachineBasicBlock::iterator I,
+                                       unsigned ValueReg, unsigned Address,
+                                       unsigned OffsetReg) const {
+  return buildIndirectWrite(MBB, I, ValueReg, Address, OffsetReg, 0);
+}
+
+MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
+                                       MachineBasicBlock::iterator I,
+                                       unsigned ValueReg, unsigned Address,
+                                       unsigned OffsetReg,
+                                       unsigned AddrChan) const {
+  unsigned AddrReg;
+  switch (AddrChan) {
+    default: llvm_unreachable("Invalid Channel");
+    case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break;
+    case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break;
+    case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break;
+    case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break;
+  }
+  MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
+                                               AMDGPU::AR_X, OffsetReg);
+  setImmOperand(MOVA, AMDGPU::OpName::write, 0);
+
+  MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
+                                      AddrReg, ValueReg)
+                                      .addReg(AMDGPU::AR_X,
+                                           RegState::Implicit | RegState::Kill);
+  setImmOperand(Mov, AMDGPU::OpName::dst_rel, 1);
+  return Mov;
+}
+
+MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
+                                       MachineBasicBlock::iterator I,
+                                       unsigned ValueReg, unsigned Address,
+                                       unsigned OffsetReg) const {
+  return buildIndirectRead(MBB, I, ValueReg, Address, OffsetReg, 0);
+}
+
+MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
+                                       MachineBasicBlock::iterator I,
+                                       unsigned ValueReg, unsigned Address,
+                                       unsigned OffsetReg,
+                                       unsigned AddrChan) const {
+  unsigned AddrReg;
+  switch (AddrChan) {
+    default: llvm_unreachable("Invalid Channel");
+    case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break;
+    case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break;
+    case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break;
+    case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break;
+  }
+  MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
+                                                       AMDGPU::AR_X,
+                                                       OffsetReg);
+  setImmOperand(MOVA, AMDGPU::OpName::write, 0);
+  MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
+                                      ValueReg,
+                                      AddrReg)
+                                      .addReg(AMDGPU::AR_X,
+                                           RegState::Implicit | RegState::Kill);
+  setImmOperand(Mov, AMDGPU::OpName::src0_rel, 1);
+
+  return Mov;
+}
+
+unsigned R600InstrInfo::getMaxAlusPerClause() const {
+  return 115;
+}
+
+MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB,
+                                                  MachineBasicBlock::iterator I,
+                                                  unsigned Opcode,
+                                                  unsigned DstReg,
+                                                  unsigned Src0Reg,
+                                                  unsigned Src1Reg) const {
+  MachineInstrBuilder MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opcode),
+    DstReg);           // $dst
+
+  if (Src1Reg) {
+    MIB.addImm(0)     // $update_exec_mask
+       .addImm(0);    // $update_predicate
+  }
+  MIB.addImm(1)        // $write
+     .addImm(0)        // $omod
+     .addImm(0)        // $dst_rel
+     .addImm(0)        // $dst_clamp
+     .addReg(Src0Reg)  // $src0
+     .addImm(0)        // $src0_neg
+     .addImm(0)        // $src0_rel
+     .addImm(0)        // $src0_abs
+     .addImm(-1);       // $src0_sel
+
+  if (Src1Reg) {
+    MIB.addReg(Src1Reg) // $src1
+       .addImm(0)       // $src1_neg
+       .addImm(0)       // $src1_rel
+       .addImm(0)       // $src1_abs
+       .addImm(-1);      // $src1_sel
+  }
+
+  //XXX: The r600g finalizer expects this to be 1, once we've moved the
+  //scheduling to the backend, we can change the default to 0.
+  MIB.addImm(1)        // $last
+      .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel
+      .addImm(0)         // $literal
+      .addImm(0);        // $bank_swizzle
+
+  return MIB;
+}
+
+#define OPERAND_CASE(Label) \
+  case Label: { \
+    static const unsigned Ops[] = \
+    { \
+      Label##_X, \
+      Label##_Y, \
+      Label##_Z, \
+      Label##_W \
+    }; \
+    return Ops[Slot]; \
+  }
+
+static unsigned getSlotedOps(unsigned  Op, unsigned Slot) {
+  switch (Op) {
+  OPERAND_CASE(AMDGPU::OpName::update_exec_mask)
+  OPERAND_CASE(AMDGPU::OpName::update_pred)
+  OPERAND_CASE(AMDGPU::OpName::write)
+  OPERAND_CASE(AMDGPU::OpName::omod)
+  OPERAND_CASE(AMDGPU::OpName::dst_rel)
+  OPERAND_CASE(AMDGPU::OpName::clamp)
+  OPERAND_CASE(AMDGPU::OpName::src0)
+  OPERAND_CASE(AMDGPU::OpName::src0_neg)
+  OPERAND_CASE(AMDGPU::OpName::src0_rel)
+  OPERAND_CASE(AMDGPU::OpName::src0_abs)
+  OPERAND_CASE(AMDGPU::OpName::src0_sel)
+  OPERAND_CASE(AMDGPU::OpName::src1)
+  OPERAND_CASE(AMDGPU::OpName::src1_neg)
+  OPERAND_CASE(AMDGPU::OpName::src1_rel)
+  OPERAND_CASE(AMDGPU::OpName::src1_abs)
+  OPERAND_CASE(AMDGPU::OpName::src1_sel)
+  OPERAND_CASE(AMDGPU::OpName::pred_sel)
+  default:
+    llvm_unreachable("Wrong Operand");
+  }
+}
+
+#undef OPERAND_CASE
+
+MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction(
+    MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg)
+    const {
+  assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented");
+  unsigned Opcode;
+  if (ST.getGeneration() <= AMDGPUSubtarget::R700)
+    Opcode = AMDGPU::DOT4_r600;
+  else
+    Opcode = AMDGPU::DOT4_eg;
+  MachineBasicBlock::iterator I = MI;
+  MachineOperand &Src0 = MI->getOperand(
+      getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src0, Slot)));
+  MachineOperand &Src1 = MI->getOperand(
+      getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src1, Slot)));
+  MachineInstr *MIB = buildDefaultInstruction(
+      MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg());
+  static const unsigned  Operands[14] = {
+    AMDGPU::OpName::update_exec_mask,
+    AMDGPU::OpName::update_pred,
+    AMDGPU::OpName::write,
+    AMDGPU::OpName::omod,
+    AMDGPU::OpName::dst_rel,
+    AMDGPU::OpName::clamp,
+    AMDGPU::OpName::src0_neg,
+    AMDGPU::OpName::src0_rel,
+    AMDGPU::OpName::src0_abs,
+    AMDGPU::OpName::src0_sel,
+    AMDGPU::OpName::src1_neg,
+    AMDGPU::OpName::src1_rel,
+    AMDGPU::OpName::src1_abs,
+    AMDGPU::OpName::src1_sel,
+  };
+
+  MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(),
+      getSlotedOps(AMDGPU::OpName::pred_sel, Slot)));
+  MIB->getOperand(getOperandIdx(Opcode, AMDGPU::OpName::pred_sel))
+      .setReg(MO.getReg());
+
+  for (unsigned i = 0; i < 14; i++) {
+    MachineOperand &MO = MI->getOperand(
+        getOperandIdx(MI->getOpcode(), getSlotedOps(Operands[i], Slot)));
+    assert (MO.isImm());
+    setImmOperand(MIB, Operands[i], MO.getImm());
+  }
+  MIB->getOperand(20).setImm(0);
+  return MIB;
+}
+
+MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB,
+                                         MachineBasicBlock::iterator I,
+                                         unsigned DstReg,
+                                         uint64_t Imm) const {
+  MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg,
+                                                  AMDGPU::ALU_LITERAL_X);
+  setImmOperand(MovImm, AMDGPU::OpName::literal, Imm);
+  return MovImm;
+}
+
+MachineInstr *R600InstrInfo::buildMovInstr(MachineBasicBlock *MBB,
+                                       MachineBasicBlock::iterator I,
+                                       unsigned DstReg, unsigned SrcReg) const {
+  return buildDefaultInstruction(*MBB, I, AMDGPU::MOV, DstReg, SrcReg);
+}
+
+int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const {
+  return getOperandIdx(MI.getOpcode(), Op);
+}
+
+int R600InstrInfo::getOperandIdx(unsigned Opcode, unsigned Op) const {
+  return AMDGPU::getNamedOperandIdx(Opcode, Op);
+}
+
+void R600InstrInfo::setImmOperand(MachineInstr *MI, unsigned Op,
+                                  int64_t Imm) const {
+  int Idx = getOperandIdx(*MI, Op);
+  assert(Idx != -1 && "Operand not supported for this instruction.");
+  assert(MI->getOperand(Idx).isImm());
+  MI->getOperand(Idx).setImm(Imm);
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction flag getters/setters
+//===----------------------------------------------------------------------===//
+
+bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const {
+  return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0;
+}
+
+MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI, unsigned SrcIdx,
+                                         unsigned Flag) const {
+  unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
+  int FlagIndex = 0;
+  if (Flag != 0) {
+    // If we pass something other than the default value of Flag to this
+    // function, it means we are want to set a flag on an instruction
+    // that uses native encoding.
+    assert(HAS_NATIVE_OPERANDS(TargetFlags));
+    bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3;
+    switch (Flag) {
+    case MO_FLAG_CLAMP:
+      FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::clamp);
+      break;
+    case MO_FLAG_MASK:
+      FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::write);
+      break;
+    case MO_FLAG_NOT_LAST:
+    case MO_FLAG_LAST:
+      FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::last);
+      break;
+    case MO_FLAG_NEG:
+      switch (SrcIdx) {
+      case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_neg); break;
+      case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_neg); break;
+      case 2: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src2_neg); break;
+      }
+      break;
+
+    case MO_FLAG_ABS:
+      assert(!IsOP3 && "Cannot set absolute value modifier for OP3 "
+                       "instructions.");
+      (void)IsOP3;
+      switch (SrcIdx) {
+      case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_abs); break;
+      case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_abs); break;
+      }
+      break;
+
+    default:
+      FlagIndex = -1;
+      break;
+    }
+    assert(FlagIndex != -1 && "Flag not supported for this instruction");
+  } else {
+      FlagIndex = GET_FLAG_OPERAND_IDX(TargetFlags);
+      assert(FlagIndex != 0 &&
+         "Instruction flags not supported for this instruction");
+  }
+
+  MachineOperand &FlagOp = MI->getOperand(FlagIndex);
+  assert(FlagOp.isImm());
+  return FlagOp;
+}
+
+void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand,
+                            unsigned Flag) const {
+  unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
+  if (Flag == 0) {
+    return;
+  }
+  if (HAS_NATIVE_OPERANDS(TargetFlags)) {
+    MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
+    if (Flag == MO_FLAG_NOT_LAST) {
+      clearFlag(MI, Operand, MO_FLAG_LAST);
+    } else if (Flag == MO_FLAG_MASK) {
+      clearFlag(MI, Operand, Flag);
+    } else {
+      FlagOp.setImm(1);
+    }
+  } else {
+      MachineOperand &FlagOp = getFlagOp(MI, Operand);
+      FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand)));
+  }
+}
+
+void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand,
+                              unsigned Flag) const {
+  unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
+  if (HAS_NATIVE_OPERANDS(TargetFlags)) {
+    MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
+    FlagOp.setImm(0);
+  } else {
+    MachineOperand &FlagOp = getFlagOp(MI);
+    unsigned InstFlags = FlagOp.getImm();
+    InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand));
+    FlagOp.setImm(InstFlags);
+  }
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.h b/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.h
new file mode 100644
index 0000000..9c5f76c
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.h
@@ -0,0 +1,303 @@
+//===-- R600InstrInfo.h - R600 Instruction Info Interface -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface definition for R600InstrInfo
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_R600_R600INSTRINFO_H
+#define LLVM_LIB_TARGET_R600_R600INSTRINFO_H
+
+#include "AMDGPUInstrInfo.h"
+#include "R600Defines.h"
+#include "R600RegisterInfo.h"
+#include <map>
+
+namespace llvm {
+
+  class AMDGPUTargetMachine;
+  class DFAPacketizer;
+  class ScheduleDAG;
+  class MachineFunction;
+  class MachineInstr;
+  class MachineInstrBuilder;
+
+  class R600InstrInfo : public AMDGPUInstrInfo {
+  private:
+  const R600RegisterInfo RI;
+
+  std::vector<std::pair<int, unsigned> >
+  ExtractSrcs(MachineInstr *MI, const DenseMap<unsigned, unsigned> &PV, unsigned &ConstCount) const;
+
+
+  MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
+                                        MachineBasicBlock::iterator I,
+                                        unsigned ValueReg, unsigned Address,
+                                        unsigned OffsetReg,
+                                        unsigned AddrChan) const;
+
+  MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
+                                        MachineBasicBlock::iterator I,
+                                        unsigned ValueReg, unsigned Address,
+                                        unsigned OffsetReg,
+                                        unsigned AddrChan) const;
+  public:
+  enum BankSwizzle {
+    ALU_VEC_012_SCL_210 = 0,
+    ALU_VEC_021_SCL_122,
+    ALU_VEC_120_SCL_212,
+    ALU_VEC_102_SCL_221,
+    ALU_VEC_201,
+    ALU_VEC_210
+  };
+
+  explicit R600InstrInfo(const AMDGPUSubtarget &st);
+
+  const R600RegisterInfo &getRegisterInfo() const override;
+  void copyPhysReg(MachineBasicBlock &MBB,
+                   MachineBasicBlock::iterator MI, DebugLoc DL,
+                   unsigned DestReg, unsigned SrcReg,
+                   bool KillSrc) const override;
+  bool isLegalToSplitMBBAt(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MBBI) const override;
+
+  bool isTrig(const MachineInstr &MI) const;
+  bool isPlaceHolderOpcode(unsigned opcode) const;
+  bool isReductionOp(unsigned opcode) const;
+  bool isCubeOp(unsigned opcode) const;
+
+  /// \returns true if this \p Opcode represents an ALU instruction.
+  bool isALUInstr(unsigned Opcode) const;
+  bool hasInstrModifiers(unsigned Opcode) const;
+  bool isLDSInstr(unsigned Opcode) const;
+  bool isLDSNoRetInstr(unsigned Opcode) const;
+  bool isLDSRetInstr(unsigned Opcode) const;
+
+  /// \returns true if this \p Opcode represents an ALU instruction or an
+  /// instruction that will be lowered in ExpandSpecialInstrs Pass.
+  bool canBeConsideredALU(const MachineInstr *MI) const;
+
+  bool isTransOnly(unsigned Opcode) const;
+  bool isTransOnly(const MachineInstr *MI) const;
+  bool isVectorOnly(unsigned Opcode) const;
+  bool isVectorOnly(const MachineInstr *MI) const;
+  bool isExport(unsigned Opcode) const;
+
+  bool usesVertexCache(unsigned Opcode) const;
+  bool usesVertexCache(const MachineInstr *MI) const;
+  bool usesTextureCache(unsigned Opcode) const;
+  bool usesTextureCache(const MachineInstr *MI) const;
+
+  bool mustBeLastInClause(unsigned Opcode) const;
+  bool usesAddressRegister(MachineInstr *MI) const;
+  bool definesAddressRegister(MachineInstr *MI) const;
+  bool readsLDSSrcReg(const MachineInstr *MI) const;
+
+  /// \returns The operand index for the given source number.  Legal values
+  /// for SrcNum are 0, 1, and 2.
+  int getSrcIdx(unsigned Opcode, unsigned SrcNum) const;
+  /// \returns The operand Index for the Sel operand given an index to one
+  /// of the instruction's src operands.
+  int getSelIdx(unsigned Opcode, unsigned SrcIdx) const;
+
+  /// \returns a pair for each src of an ALU instructions.
+  /// The first member of a pair is the register id.
+  /// If register is ALU_CONST, second member is SEL.
+  /// If register is ALU_LITERAL, second member is IMM.
+  /// Otherwise, second member value is undefined.
+  SmallVector<std::pair<MachineOperand *, int64_t>, 3>
+      getSrcs(MachineInstr *MI) const;
+
+  unsigned  isLegalUpTo(
+    const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
+    const std::vector<R600InstrInfo::BankSwizzle> &Swz,
+    const std::vector<std::pair<int, unsigned> > &TransSrcs,
+    R600InstrInfo::BankSwizzle TransSwz) const;
+
+  bool FindSwizzleForVectorSlot(
+    const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
+    std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
+    const std::vector<std::pair<int, unsigned> > &TransSrcs,
+    R600InstrInfo::BankSwizzle TransSwz) const;
+
+  /// Given the order VEC_012 < VEC_021 < VEC_120 < VEC_102 < VEC_201 < VEC_210
+  /// returns true and the first (in lexical order) BankSwizzle affectation
+  /// starting from the one already provided in the Instruction Group MIs that
+  /// fits Read Port limitations in BS if available. Otherwise returns false
+  /// and undefined content in BS.
+  /// isLastAluTrans should be set if the last Alu of MIs will be executed on
+  /// Trans ALU. In this case, ValidTSwizzle returns the BankSwizzle value to
+  /// apply to the last instruction.
+  /// PV holds GPR to PV registers in the Instruction Group MIs.
+  bool fitsReadPortLimitations(const std::vector<MachineInstr *> &MIs,
+                               const DenseMap<unsigned, unsigned> &PV,
+                               std::vector<BankSwizzle> &BS,
+                               bool isLastAluTrans) const;
+
+  /// An instruction group can only access 2 channel pair (either [XY] or [ZW])
+  /// from KCache bank on R700+. This function check if MI set in input meet
+  /// this limitations
+  bool fitsConstReadLimitations(const std::vector<MachineInstr *> &) const;
+  /// Same but using const index set instead of MI set.
+  bool fitsConstReadLimitations(const std::vector<unsigned>&) const;
+
+  /// \brief Vector instructions are instructions that must fill all
+  /// instruction slots within an instruction group.
+  bool isVector(const MachineInstr &MI) const;
+
+  bool isMov(unsigned Opcode) const override;
+
+  DFAPacketizer *
+  CreateTargetScheduleState(const TargetSubtargetInfo &) const override;
+
+  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+
+  bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
+                     SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const override;
+
+  unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                        MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
+                        DebugLoc DL) const override;
+
+  unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
+
+  bool isPredicated(const MachineInstr *MI) const override;
+
+  bool isPredicable(MachineInstr *MI) const override;
+
+  bool
+   isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
+                             const BranchProbability &Probability) const override;
+
+  bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
+                           unsigned ExtraPredCycles,
+                           const BranchProbability &Probability) const override ;
+
+  bool
+   isProfitableToIfCvt(MachineBasicBlock &TMBB,
+                       unsigned NumTCycles, unsigned ExtraTCycles,
+                       MachineBasicBlock &FMBB,
+                       unsigned NumFCycles, unsigned ExtraFCycles,
+                       const BranchProbability &Probability) const override;
+
+  bool DefinesPredicate(MachineInstr *MI,
+                                  std::vector<MachineOperand> &Pred) const override;
+
+  bool SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
+                         ArrayRef<MachineOperand> Pred2) const override;
+
+  bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+                                          MachineBasicBlock &FMBB) const override;
+
+  bool PredicateInstruction(MachineInstr *MI,
+                            ArrayRef<MachineOperand> Pred) const override;
+
+  unsigned int getPredicationCost(const MachineInstr *) const override;
+
+  unsigned int getInstrLatency(const InstrItineraryData *ItinData,
+                               const MachineInstr *MI,
+                               unsigned *PredCost = nullptr) const override;
+
+  int getInstrLatency(const InstrItineraryData *ItinData,
+                      SDNode *Node) const override { return 1;}
+
+  bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override;
+
+  /// \brief Reserve the registers that may be accesed using indirect addressing.
+  void reserveIndirectRegisters(BitVector &Reserved,
+                                const MachineFunction &MF) const;
+
+  unsigned calculateIndirectAddress(unsigned RegIndex,
+                                    unsigned Channel) const override;
+
+  const TargetRegisterClass *getIndirectAddrRegClass() const override;
+
+  MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
+                          MachineBasicBlock::iterator I,
+                          unsigned ValueReg, unsigned Address,
+                          unsigned OffsetReg) const override;
+
+  MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
+                                        MachineBasicBlock::iterator I,
+                                        unsigned ValueReg, unsigned Address,
+                                        unsigned OffsetReg) const override;
+
+  unsigned getMaxAlusPerClause() const;
+
+  ///buildDefaultInstruction - This function returns a MachineInstr with
+  /// all the instruction modifiers initialized to their default values.
+  /// You can use this function to avoid manually specifying each instruction
+  /// modifier operand when building a new instruction.
+  ///
+  /// \returns a MachineInstr with all the instruction modifiers initialized
+  /// to their default values.
+  MachineInstrBuilder buildDefaultInstruction(MachineBasicBlock &MBB,
+                                              MachineBasicBlock::iterator I,
+                                              unsigned Opcode,
+                                              unsigned DstReg,
+                                              unsigned Src0Reg,
+                                              unsigned Src1Reg = 0) const;
+
+  MachineInstr *buildSlotOfVectorInstruction(MachineBasicBlock &MBB,
+                                             MachineInstr *MI,
+                                             unsigned Slot,
+                                             unsigned DstReg) const;
+
+  MachineInstr *buildMovImm(MachineBasicBlock &BB,
+                                  MachineBasicBlock::iterator I,
+                                  unsigned DstReg,
+                                  uint64_t Imm) const;
+
+  MachineInstr *buildMovInstr(MachineBasicBlock *MBB,
+                              MachineBasicBlock::iterator I,
+                              unsigned DstReg, unsigned SrcReg) const override;
+
+  /// \brief Get the index of Op in the MachineInstr.
+  ///
+  /// \returns -1 if the Instruction does not contain the specified \p Op.
+  int getOperandIdx(const MachineInstr &MI, unsigned Op) const;
+
+  /// \brief Get the index of \p Op for the given Opcode.
+  ///
+  /// \returns -1 if the Instruction does not contain the specified \p Op.
+  int getOperandIdx(unsigned Opcode, unsigned Op) const;
+
+  /// \brief Helper function for setting instruction flag values.
+  void setImmOperand(MachineInstr *MI, unsigned Op, int64_t Imm) const;
+
+  /// \returns true if this instruction has an operand for storing target flags.
+  bool hasFlagOperand(const MachineInstr &MI) const;
+
+  ///\brief Add one of the MO_FLAG* flags to the specified \p Operand.
+  void addFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const;
+
+  ///\brief Determine if the specified \p Flag is set on this \p Operand.
+  bool isFlagSet(const MachineInstr &MI, unsigned Operand, unsigned Flag) const;
+
+  /// \param SrcIdx The register source to set the flag on (e.g src0, src1, src2)
+  /// \param Flag The flag being set.
+  ///
+  /// \returns the operand containing the flags for this instruction.
+  MachineOperand &getFlagOp(MachineInstr *MI, unsigned SrcIdx = 0,
+                            unsigned Flag = 0) const;
+
+  /// \brief Clear the specified flag on the instruction.
+  void clearFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const;
+};
+
+namespace AMDGPU {
+
+int getLDSNoRetOp(uint16_t Opcode);
+
+} //End namespace AMDGPU
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td b/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td
new file mode 100644
index 0000000..7beed09
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td
@@ -0,0 +1,1744 @@
+//===-- R600Instructions.td - R600 Instruction defs  -------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TableGen definitions for instructions which are available on R600 family
+// GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+include "R600Intrinsics.td"
+include "R600InstrFormats.td"
+
+class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> :
+    InstR600 <outs, ins, asm, pattern, NullALU> {
+
+  let Namespace = "AMDGPU";
+}
+
+def MEMxi : Operand<iPTR> {
+  let MIOperandInfo = (ops R600_TReg32_X:$ptr, i32imm:$index);
+  let PrintMethod = "printMemOperand";
+}
+
+def MEMrr : Operand<iPTR> {
+  let MIOperandInfo = (ops R600_Reg32:$ptr, R600_Reg32:$index);
+}
+
+// Operands for non-registers
+
+class InstFlag<string PM = "printOperand", int Default = 0>
+    : OperandWithDefaultOps <i32, (ops (i32 Default))> {
+  let PrintMethod = PM;
+}
+
+// src_sel for ALU src operands, see also ALU_CONST, ALU_PARAM registers
+def SEL : OperandWithDefaultOps <i32, (ops (i32 -1))> {
+  let PrintMethod = "printSel";
+}
+def BANK_SWIZZLE : OperandWithDefaultOps <i32, (ops (i32 0))> {
+  let PrintMethod = "printBankSwizzle";
+}
+
+def LITERAL : InstFlag<"printLiteral">;
+
+def WRITE : InstFlag <"printWrite", 1>;
+def OMOD : InstFlag <"printOMOD">;
+def REL : InstFlag <"printRel">;
+def CLAMP : InstFlag <"printClamp">;
+def NEG : InstFlag <"printNeg">;
+def ABS : InstFlag <"printAbs">;
+def UEM : InstFlag <"printUpdateExecMask">;
+def UP : InstFlag <"printUpdatePred">;
+
+// XXX: The r600g finalizer in Mesa expects last to be one in most cases.
+// Once we start using the packetizer in this backend we should have this
+// default to 0.
+def LAST : InstFlag<"printLast", 1>;
+def RSel : Operand<i32> {
+  let PrintMethod = "printRSel";
+}
+def CT: Operand<i32> {
+  let PrintMethod = "printCT";
+}
+
+def FRAMEri : Operand<iPTR> {
+  let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index);
+}
+
+def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>;
+def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>;
+def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
+def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>;
+def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>;
+
+
+def R600_Pred : PredicateOperand<i32, (ops R600_Predicate),
+                                     (ops PRED_SEL_OFF)>;
+
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
+
+// Class for instructions with only one source register.
+// If you add new ins to this instruction, make sure they are listed before
+// $literal, because the backend currently assumes that the last operand is
+// a literal.  Also be sure to update the enum R600Op1OperandIndex::ROI in
+// R600Defines.h, R600InstrInfo::buildDefaultInstruction(),
+// and R600InstrInfo::getOperandIdx().
+class R600_1OP <bits<11> inst, string opName, list<dag> pattern,
+                InstrItinClass itin = AnyALU> :
+    InstR600 <(outs R600_Reg32:$dst),
+              (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
+                   R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
+                   LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal,
+                   BANK_SWIZZLE:$bank_swizzle),
+              !strconcat("  ", opName,
+                   "$clamp $last $dst$write$dst_rel$omod, "
+                   "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
+                   "$pred_sel $bank_swizzle"),
+              pattern,
+              itin>,
+    R600ALU_Word0,
+    R600ALU_Word1_OP2 <inst> {
+
+  let src1 = 0;
+  let src1_rel = 0;
+  let src1_neg = 0;
+  let src1_abs = 0;
+  let update_exec_mask = 0;
+  let update_pred = 0;
+  let HasNativeOperands = 1;
+  let Op1 = 1;
+  let ALUInst = 1;
+  let DisableEncoding = "$literal";
+  let UseNamedOperandTable = 1;
+
+  let Inst{31-0}  = Word0;
+  let Inst{63-32} = Word1;
+}
+
+class R600_1OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
+                    InstrItinClass itin = AnyALU> :
+    R600_1OP <inst, opName,
+              [(set R600_Reg32:$dst, (node R600_Reg32:$src0))], itin
+>;
+
+// If you add or change the operands for R600_2OP instructions, you must
+// also update the R600Op2OperandIndex::ROI enum in R600Defines.h,
+// R600InstrInfo::buildDefaultInstruction(), and R600InstrInfo::getOperandIdx().
+class R600_2OP <bits<11> inst, string opName, list<dag> pattern,
+                InstrItinClass itin = AnyALU> :
+  InstR600 <(outs R600_Reg32:$dst),
+          (ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write,
+               OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
+               R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
+               R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel,
+               LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal,
+               BANK_SWIZZLE:$bank_swizzle),
+          !strconcat("  ", opName,
+                "$clamp $last $update_exec_mask$update_pred$dst$write$dst_rel$omod, "
+                "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
+                "$src1_neg$src1_abs$src1$src1_abs$src1_rel, "
+                "$pred_sel $bank_swizzle"),
+          pattern,
+          itin>,
+    R600ALU_Word0,
+    R600ALU_Word1_OP2 <inst> {
+
+  let HasNativeOperands = 1;
+  let Op2 = 1;
+  let ALUInst = 1;
+  let DisableEncoding = "$literal";
+  let UseNamedOperandTable = 1;
+
+  let Inst{31-0}  = Word0;
+  let Inst{63-32} = Word1;
+}
+
+class R600_2OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
+                       InstrItinClass itin = AnyALU> :
+    R600_2OP <inst, opName,
+              [(set R600_Reg32:$dst, (node R600_Reg32:$src0,
+                                           R600_Reg32:$src1))], itin
+>;
+
+// If you add our change the operands for R600_3OP instructions, you must
+// also update the R600Op3OperandIndex::ROI enum in R600Defines.h,
+// R600InstrInfo::buildDefaultInstruction(), and
+// R600InstrInfo::getOperandIdx().
+class R600_3OP <bits<5> inst, string opName, list<dag> pattern,
+                InstrItinClass itin = AnyALU> :
+  InstR600 <(outs R600_Reg32:$dst),
+          (ins REL:$dst_rel, CLAMP:$clamp,
+               R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel,
+               R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel,
+               R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel,
+               LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal,
+               BANK_SWIZZLE:$bank_swizzle),
+          !strconcat("  ", opName, "$clamp $last $dst$dst_rel, "
+                             "$src0_neg$src0$src0_rel, "
+                             "$src1_neg$src1$src1_rel, "
+                             "$src2_neg$src2$src2_rel, "
+                             "$pred_sel"
+                             "$bank_swizzle"),
+          pattern,
+          itin>,
+    R600ALU_Word0,
+    R600ALU_Word1_OP3<inst>{
+
+  let HasNativeOperands = 1;
+  let DisableEncoding = "$literal";
+  let Op3 = 1;
+  let UseNamedOperandTable = 1;
+  let ALUInst = 1;
+
+  let Inst{31-0}  = Word0;
+  let Inst{63-32} = Word1;
+}
+
+class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern,
+                      InstrItinClass itin = VecALU> :
+  InstR600 <(outs R600_Reg32:$dst),
+          ins,
+          asm,
+          pattern,
+          itin>;
+
+
+
+} // End mayLoad = 1, mayStore = 0, hasSideEffects = 0
+
+def TEX_SHADOW : PatLeaf<
+  (imm),
+  [{uint32_t TType = (uint32_t)N->getZExtValue();
+    return (TType >= 6 && TType <= 8) || TType == 13;
+  }]
+>;
+
+def TEX_RECT : PatLeaf<
+  (imm),
+  [{uint32_t TType = (uint32_t)N->getZExtValue();
+    return TType == 5;
+  }]
+>;
+
+def TEX_ARRAY : PatLeaf<
+  (imm),
+  [{uint32_t TType = (uint32_t)N->getZExtValue();
+    return TType == 9 || TType == 10 || TType == 16;
+  }]
+>;
+
+def TEX_SHADOW_ARRAY : PatLeaf<
+  (imm),
+  [{uint32_t TType = (uint32_t)N->getZExtValue();
+    return TType == 11 || TType == 12 || TType == 17;
+  }]
+>;
+
+def TEX_MSAA : PatLeaf<
+  (imm),
+  [{uint32_t TType = (uint32_t)N->getZExtValue();
+    return TType == 14;
+  }]
+>;
+
+def TEX_ARRAY_MSAA : PatLeaf<
+  (imm),
+  [{uint32_t TType = (uint32_t)N->getZExtValue();
+    return TType == 15;
+  }]
+>;
+
+class EG_CF_RAT <bits <8> cfinst, bits <6> ratinst, bits<4> ratid, bits<4> mask,
+                 dag outs, dag ins, string asm, list<dag> pattern> :
+    InstR600ISA <outs, ins, asm, pattern>,
+    CF_ALLOC_EXPORT_WORD0_RAT, CF_ALLOC_EXPORT_WORD1_BUF  {
+
+  let rat_id = ratid;
+  let rat_inst = ratinst;
+  let rim         = 0;
+  // XXX: Have a separate instruction for non-indexed writes.
+  let type        = 1;
+  let rw_rel      = 0;
+  let elem_size   = 0;
+
+  let array_size  = 0;
+  let comp_mask   = mask;
+  let burst_count = 0;
+  let vpm         = 0;
+  let cf_inst = cfinst;
+  let mark        = 0;
+  let barrier     = 1;
+
+  let Inst{31-0} = Word0;
+  let Inst{63-32} = Word1;
+  let IsExport = 1;
+
+}
+
+class VTX_READ <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
+    : InstR600ISA <outs, (ins MEMxi:$src_gpr), name, pattern>,
+      VTX_WORD1_GPR {
+
+  // Static fields
+  let DST_REL = 0;
+  // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL,
+  // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored,
+  // however, based on my testing if USE_CONST_FIELDS is set, then all
+  // these fields need to be set to 0.
+  let USE_CONST_FIELDS = 0;
+  let NUM_FORMAT_ALL = 1;
+  let FORMAT_COMP_ALL = 0;
+  let SRF_MODE_ALL = 0;
+
+  let Inst{63-32} = Word1;
+  // LLVM can only encode 64-bit instructions, so these fields are manually
+  // encoded in R600CodeEmitter
+  //
+  // bits<16> OFFSET;
+  // bits<2>  ENDIAN_SWAP = 0;
+  // bits<1>  CONST_BUF_NO_STRIDE = 0;
+  // bits<1>  MEGA_FETCH = 0;
+  // bits<1>  ALT_CONST = 0;
+  // bits<2>  BUFFER_INDEX_MODE = 0;
+
+  // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
+  // is done in R600CodeEmitter
+  //
+  // Inst{79-64} = OFFSET;
+  // Inst{81-80} = ENDIAN_SWAP;
+  // Inst{82}    = CONST_BUF_NO_STRIDE;
+  // Inst{83}    = MEGA_FETCH;
+  // Inst{84}    = ALT_CONST;
+  // Inst{86-85} = BUFFER_INDEX_MODE;
+  // Inst{95-86} = 0; Reserved
+
+  // VTX_WORD3 (Padding)
+  //
+  // Inst{127-96} = 0;
+
+  let VTXInst = 1;
+}
+
+class LoadParamFrag <PatFrag load_type> : PatFrag <
+  (ops node:$ptr), (load_type node:$ptr),
+  [{ return isConstantLoad(dyn_cast<LoadSDNode>(N), 0); }]
+>;
+
+def load_param : LoadParamFrag<load>;
+def load_param_exti8 : LoadParamFrag<az_extloadi8>;
+def load_param_exti16 : LoadParamFrag<az_extloadi16>;
+
+def isR600 : Predicate<"Subtarget->getGeneration() <= AMDGPUSubtarget::R700">;
+
+def isR600toCayman
+    : Predicate<
+          "Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS">;
+
+//===----------------------------------------------------------------------===//
+// R600 SDNodes
+//===----------------------------------------------------------------------===//
+
+def INTERP_PAIR_XY :  AMDGPUShaderInst <
+  (outs R600_TReg32_X:$dst0, R600_TReg32_Y:$dst1),
+  (ins i32imm:$src0, R600_TReg32_Y:$src1, R600_TReg32_X:$src2),
+  "INTERP_PAIR_XY $src0 $src1 $src2 : $dst0 dst1",
+  []>;
+
+def INTERP_PAIR_ZW :  AMDGPUShaderInst <
+  (outs R600_TReg32_Z:$dst0, R600_TReg32_W:$dst1),
+  (ins i32imm:$src0, R600_TReg32_Y:$src1, R600_TReg32_X:$src2),
+  "INTERP_PAIR_ZW $src0 $src1 $src2 : $dst0 dst1",
+  []>;
+
+def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS",
+  SDTypeProfile<1, -1, [SDTCisInt<0>, SDTCisPtrTy<1>]>,
+  [SDNPVariadic]
+>;
+
+def DOT4 : SDNode<"AMDGPUISD::DOT4",
+  SDTypeProfile<1, 8, [SDTCisFP<0>, SDTCisVT<1, f32>, SDTCisVT<2, f32>,
+      SDTCisVT<3, f32>, SDTCisVT<4, f32>, SDTCisVT<5, f32>,
+      SDTCisVT<6, f32>, SDTCisVT<7, f32>, SDTCisVT<8, f32>]>,
+  []
+>;
+
+def COS_HW : SDNode<"AMDGPUISD::COS_HW",
+  SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>
+>;
+
+def SIN_HW : SDNode<"AMDGPUISD::SIN_HW",
+  SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>
+>;
+
+def TEXTURE_FETCH_Type : SDTypeProfile<1, 19, [SDTCisFP<0>]>;
+
+def TEXTURE_FETCH: SDNode<"AMDGPUISD::TEXTURE_FETCH", TEXTURE_FETCH_Type, []>;
+
+multiclass TexPattern<bits<32> TextureOp, Instruction inst, ValueType vt = v4f32> {
+def : Pat<(TEXTURE_FETCH (i32 TextureOp), vt:$SRC_GPR,
+          (i32 imm:$srcx), (i32 imm:$srcy), (i32 imm:$srcz), (i32 imm:$srcw),
+          (i32 imm:$offsetx), (i32 imm:$offsety), (i32 imm:$offsetz),
+          (i32 imm:$DST_SEL_X), (i32 imm:$DST_SEL_Y), (i32 imm:$DST_SEL_Z),
+          (i32 imm:$DST_SEL_W),
+          (i32 imm:$RESOURCE_ID), (i32 imm:$SAMPLER_ID),
+          (i32 imm:$COORD_TYPE_X), (i32 imm:$COORD_TYPE_Y), (i32 imm:$COORD_TYPE_Z),
+          (i32 imm:$COORD_TYPE_W)),
+          (inst R600_Reg128:$SRC_GPR,
+          imm:$srcx, imm:$srcy, imm:$srcz, imm:$srcw,
+          imm:$offsetx, imm:$offsety, imm:$offsetz,
+          imm:$DST_SEL_X, imm:$DST_SEL_Y, imm:$DST_SEL_Z,
+          imm:$DST_SEL_W,
+          imm:$RESOURCE_ID, imm:$SAMPLER_ID,
+          imm:$COORD_TYPE_X, imm:$COORD_TYPE_Y, imm:$COORD_TYPE_Z,
+          imm:$COORD_TYPE_W)>;
+}
+
+//===----------------------------------------------------------------------===//
+// Interpolation Instructions
+//===----------------------------------------------------------------------===//
+
+def INTERP_VEC_LOAD :  AMDGPUShaderInst <
+  (outs R600_Reg128:$dst),
+  (ins i32imm:$src0),
+  "INTERP_LOAD $src0 : $dst",
+  [(set R600_Reg128:$dst, (int_R600_interp_const imm:$src0))]>;
+
+def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> {
+  let bank_swizzle = 5;
+}
+
+def INTERP_ZW : R600_2OP <0xD7, "INTERP_ZW", []> {
+  let bank_swizzle = 5;
+}
+
+def INTERP_LOAD_P0 : R600_1OP <0xE0, "INTERP_LOAD_P0", []>;
+
+//===----------------------------------------------------------------------===//
+// Export Instructions
+//===----------------------------------------------------------------------===//
+
+def ExportType : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>;
+
+def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType,
+  [SDNPHasChain, SDNPSideEffect]>;
+
+class ExportWord0 {
+  field bits<32> Word0;
+
+  bits<13> arraybase;
+  bits<2> type;
+  bits<7> gpr;
+  bits<2> elem_size;
+
+  let Word0{12-0} = arraybase;
+  let Word0{14-13} = type;
+  let Word0{21-15} = gpr;
+  let Word0{22} = 0; // RW_REL
+  let Word0{29-23} = 0; // INDEX_GPR
+  let Word0{31-30} = elem_size;
+}
+
+class ExportSwzWord1 {
+  field bits<32> Word1;
+
+  bits<3> sw_x;
+  bits<3> sw_y;
+  bits<3> sw_z;
+  bits<3> sw_w;
+  bits<1> eop;
+  bits<8> inst;
+
+  let Word1{2-0} = sw_x;
+  let Word1{5-3} = sw_y;
+  let Word1{8-6} = sw_z;
+  let Word1{11-9} = sw_w;
+}
+
+class ExportBufWord1 {
+  field bits<32> Word1;
+
+  bits<12> arraySize;
+  bits<4> compMask;
+  bits<1> eop;
+  bits<8> inst;
+
+  let Word1{11-0} = arraySize;
+  let Word1{15-12} = compMask;
+}
+
+multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> {
+  def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg),
+    (ExportInst
+        (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), $reg, sub0),
+        0, 61, 0, 7, 7, 7, cf_inst, 0)
+  >;
+
+  def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg),
+    (ExportInst
+        (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), $reg, sub0),
+        0, 61, 7, 0, 7, 7, cf_inst, 0)
+  >;
+
+  def : Pat<(int_R600_store_dummy (i32 imm:$type)),
+    (ExportInst
+        (v4f32 (IMPLICIT_DEF)), imm:$type, 0, 7, 7, 7, 7, cf_inst, 0)
+  >;
+
+  def : Pat<(int_R600_store_dummy 1),
+    (ExportInst
+        (v4f32 (IMPLICIT_DEF)), 1, 60, 7, 7, 7, 7, cf_inst, 0)
+  >;
+
+  def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type),
+    (i32 imm:$swz_x), (i32 imm:$swz_y), (i32 imm:$swz_z), (i32 imm:$swz_w)),
+        (ExportInst R600_Reg128:$src, imm:$type, imm:$base,
+        imm:$swz_x, imm:$swz_y, imm:$swz_z, imm:$swz_w, cf_inst, 0)
+  >;
+
+}
+
+multiclass SteamOutputExportPattern<Instruction ExportInst,
+    bits<8> buf0inst, bits<8> buf1inst, bits<8> buf2inst, bits<8> buf3inst> {
+// Stream0
+  def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
+      (i32 imm:$arraybase), (i32 0), (i32 imm:$mask)),
+      (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
+      4095, imm:$mask, buf0inst, 0)>;
+// Stream1
+  def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
+      (i32 imm:$arraybase), (i32 1), (i32 imm:$mask)),
+      (ExportInst $src, 0, imm:$arraybase,
+      4095, imm:$mask, buf1inst, 0)>;
+// Stream2
+  def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
+      (i32 imm:$arraybase), (i32 2), (i32 imm:$mask)),
+      (ExportInst $src, 0, imm:$arraybase,
+      4095, imm:$mask, buf2inst, 0)>;
+// Stream3
+  def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
+      (i32 imm:$arraybase), (i32 3), (i32 imm:$mask)),
+      (ExportInst $src, 0, imm:$arraybase,
+      4095, imm:$mask, buf3inst, 0)>;
+}
+
+// Export Instructions should not be duplicated by TailDuplication pass
+// (which assumes that duplicable instruction are affected by exec mask)
+let usesCustomInserter = 1, isNotDuplicable = 1 in {
+
+class ExportSwzInst : InstR600ISA<(
+    outs),
+    (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase,
+    RSel:$sw_x, RSel:$sw_y, RSel:$sw_z, RSel:$sw_w, i32imm:$inst,
+    i32imm:$eop),
+    !strconcat("EXPORT", " $gpr.$sw_x$sw_y$sw_z$sw_w"),
+    []>, ExportWord0, ExportSwzWord1 {
+  let elem_size = 3;
+  let Inst{31-0} = Word0;
+  let Inst{63-32} = Word1;
+  let IsExport = 1;
+}
+
+} // End usesCustomInserter = 1
+
+class ExportBufInst : InstR600ISA<(
+    outs),
+    (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase,
+    i32imm:$arraySize, i32imm:$compMask, i32imm:$inst, i32imm:$eop),
+    !strconcat("EXPORT", " $gpr"),
+    []>, ExportWord0, ExportBufWord1 {
+  let elem_size = 0;
+  let Inst{31-0} = Word0;
+  let Inst{63-32} = Word1;
+  let IsExport = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions
+//===----------------------------------------------------------------------===//
+
+
+def KCACHE : InstFlag<"printKCache">;
+
+class ALU_CLAUSE<bits<4> inst, string OpName> : AMDGPUInst <(outs),
+(ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1,
+KCACHE:$KCACHE_MODE0, KCACHE:$KCACHE_MODE1,
+i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1,
+i32imm:$COUNT, i32imm:$Enabled),
+!strconcat(OpName, " $COUNT, @$ADDR, "
+"KC0[$KCACHE_MODE0], KC1[$KCACHE_MODE1]"),
+[] >, CF_ALU_WORD0, CF_ALU_WORD1 {
+  field bits<64> Inst;
+
+  let CF_INST = inst;
+  let ALT_CONST = 0;
+  let WHOLE_QUAD_MODE = 0;
+  let BARRIER = 1;
+  let isCodeGenOnly = 1;
+  let UseNamedOperandTable = 1;
+
+  let Inst{31-0} = Word0;
+  let Inst{63-32} = Word1;
+}
+
+class CF_WORD0_R600 {
+  field bits<32> Word0;
+
+  bits<32> ADDR;
+
+  let Word0 = ADDR;
+}
+
+class CF_CLAUSE_R600 <bits<7> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
+ins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 {
+  field bits<64> Inst;
+  bits<4> CNT;
+
+  let CF_INST = inst;
+  let BARRIER = 1;
+  let CF_CONST = 0;
+  let VALID_PIXEL_MODE = 0;
+  let COND = 0;
+  let COUNT = CNT{2-0};
+  let CALL_COUNT = 0;
+  let COUNT_3 = CNT{3};
+  let END_OF_PROGRAM = 0;
+  let WHOLE_QUAD_MODE = 0;
+
+  let Inst{31-0} = Word0;
+  let Inst{63-32} = Word1;
+}
+
+class CF_CLAUSE_EG <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
+ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG {
+  field bits<64> Inst;
+
+  let CF_INST = inst;
+  let BARRIER = 1;
+  let JUMPTABLE_SEL = 0;
+  let CF_CONST = 0;
+  let VALID_PIXEL_MODE = 0;
+  let COND = 0;
+  let END_OF_PROGRAM = 0;
+
+  let Inst{31-0} = Word0;
+  let Inst{63-32} = Word1;
+}
+
+def CF_ALU : ALU_CLAUSE<8, "ALU">;
+def CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">;
+def CF_ALU_POP_AFTER : ALU_CLAUSE<10, "ALU_POP_AFTER">;
+def CF_ALU_CONTINUE : ALU_CLAUSE<13, "ALU_CONTINUE">;
+def CF_ALU_BREAK : ALU_CLAUSE<14, "ALU_BREAK">;
+def CF_ALU_ELSE_AFTER : ALU_CLAUSE<15, "ALU_ELSE_AFTER">;
+
+def FETCH_CLAUSE : AMDGPUInst <(outs),
+(ins i32imm:$addr), "Fetch clause starting at $addr:", [] > {
+  field bits<8> Inst;
+  bits<8> num;
+  let Inst = num;
+  let isCodeGenOnly = 1;
+}
+
+def ALU_CLAUSE : AMDGPUInst <(outs),
+(ins i32imm:$addr), "ALU clause starting at $addr:", [] > {
+  field bits<8> Inst;
+  bits<8> num;
+  let Inst = num;
+  let isCodeGenOnly = 1;
+}
+
+def LITERALS : AMDGPUInst <(outs),
+(ins LITERAL:$literal1, LITERAL:$literal2), "$literal1, $literal2", [] > {
+  let isCodeGenOnly = 1;
+
+  field bits<64> Inst;
+  bits<32> literal1;
+  bits<32> literal2;
+
+  let Inst{31-0} = literal1;
+  let Inst{63-32} = literal2;
+}
+
+def PAD : AMDGPUInst <(outs), (ins), "PAD", [] > {
+  field bits<64> Inst;
+}
+
+let Predicates = [isR600toCayman] in {
+
+//===----------------------------------------------------------------------===//
+// Common Instructions R600, R700, Evergreen, Cayman
+//===----------------------------------------------------------------------===//
+
+def ADD : R600_2OP_Helper <0x0, "ADD", fadd>;
+// Non-IEEE MUL: 0 * anything = 0
+def MUL : R600_2OP_Helper <0x1, "MUL NON-IEEE", int_AMDGPU_mul>;
+def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>;
+// TODO: Do these actually match the regular fmin/fmax behavior?
+def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax_legacy>;
+def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin_legacy>;
+// According to https://msdn.microsoft.com/en-us/library/windows/desktop/cc308050%28v=vs.85%29.aspx
+// DX10 min/max returns the other operand if one is NaN,
+// this matches http://llvm.org/docs/LangRef.html#llvm-minnum-intrinsic
+def MAX_DX10 : R600_2OP_Helper <0x5, "MAX_DX10", fmaxnum>;
+def MIN_DX10 : R600_2OP_Helper <0x6, "MIN_DX10", fminnum>;
+
+// For the SET* instructions there is a naming conflict in TargetSelectionDAG.td,
+// so some of the instruction names don't match the asm string.
+// XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics.
+def SETE : R600_2OP <
+  0x08, "SETE",
+  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OEQ))]
+>;
+
+def SGT : R600_2OP <
+  0x09, "SETGT",
+  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OGT))]
+>;
+
+def SGE : R600_2OP <
+  0xA, "SETGE",
+  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OGE))]
+>;
+
+def SNE : R600_2OP <
+  0xB, "SETNE",
+  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_UNE_NE))]
+>;
+
+def SETE_DX10 : R600_2OP <
+  0xC, "SETE_DX10",
+  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OEQ))]
+>;
+
+def SETGT_DX10 : R600_2OP <
+  0xD, "SETGT_DX10",
+  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OGT))]
+>;
+
+def SETGE_DX10 : R600_2OP <
+  0xE, "SETGE_DX10",
+  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OGE))]
+>;
+
+// FIXME: This should probably be COND_ONE
+def SETNE_DX10 : R600_2OP <
+  0xF, "SETNE_DX10",
+  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_UNE_NE))]
+>;
+
+def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>;
+def TRUNC : R600_1OP_Helper <0x11, "TRUNC", ftrunc>;
+def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>;
+def RNDNE : R600_1OP_Helper <0x13, "RNDNE", frint>;
+def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>;
+
+def MOV : R600_1OP <0x19, "MOV", []>;
+
+let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in {
+
+class MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst <
+  (outs R600_Reg32:$dst),
+  (ins immType:$imm),
+  "",
+  []
+>;
+
+} // end let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1
+
+def MOV_IMM_I32 : MOV_IMM<i32, i32imm>;
+def : Pat <
+  (imm:$val),
+  (MOV_IMM_I32 imm:$val)
+>;
+
+def MOV_IMM_F32 : MOV_IMM<f32, f32imm>;
+def : Pat <
+  (fpimm:$val),
+  (MOV_IMM_F32  fpimm:$val)
+>;
+
+def PRED_SETE : R600_2OP <0x20, "PRED_SETE", []>;
+def PRED_SETGT : R600_2OP <0x21, "PRED_SETGT", []>;
+def PRED_SETGE : R600_2OP <0x22, "PRED_SETGE", []>;
+def PRED_SETNE : R600_2OP <0x23, "PRED_SETNE", []>;
+
+let hasSideEffects = 1 in {
+
+def KILLGT : R600_2OP <0x2D, "KILLGT", []>;
+
+} // end hasSideEffects
+
+def AND_INT : R600_2OP_Helper <0x30, "AND_INT", and>;
+def OR_INT : R600_2OP_Helper <0x31, "OR_INT", or>;
+def XOR_INT : R600_2OP_Helper <0x32, "XOR_INT", xor>;
+def NOT_INT : R600_1OP_Helper <0x33, "NOT_INT", not>;
+def ADD_INT : R600_2OP_Helper <0x34, "ADD_INT", add>;
+def SUB_INT : R600_2OP_Helper <0x35, "SUB_INT", sub>;
+def MAX_INT : R600_2OP_Helper <0x36, "MAX_INT", smax>;
+def MIN_INT : R600_2OP_Helper <0x37, "MIN_INT", smin>;
+def MAX_UINT : R600_2OP_Helper <0x38, "MAX_UINT", umax>;
+def MIN_UINT : R600_2OP_Helper <0x39, "MIN_UINT", umin>;
+
+def SETE_INT : R600_2OP <
+  0x3A, "SETE_INT",
+  [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETEQ))]
+>;
+
+def SETGT_INT : R600_2OP <
+  0x3B, "SETGT_INT",
+  [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETGT))]
+>;
+
+def SETGE_INT : R600_2OP <
+  0x3C, "SETGE_INT",
+  [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETGE))]
+>;
+
+def SETNE_INT : R600_2OP <
+  0x3D, "SETNE_INT",
+  [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETNE))]
+>;
+
+def SETGT_UINT : R600_2OP <
+  0x3E, "SETGT_UINT",
+  [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETUGT))]
+>;
+
+def SETGE_UINT : R600_2OP <
+  0x3F, "SETGE_UINT",
+  [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETUGE))]
+>;
+
+def PRED_SETE_INT : R600_2OP <0x42, "PRED_SETE_INT", []>;
+def PRED_SETGT_INT : R600_2OP <0x43, "PRED_SETGE_INT", []>;
+def PRED_SETGE_INT : R600_2OP <0x44, "PRED_SETGE_INT", []>;
+def PRED_SETNE_INT : R600_2OP <0x45, "PRED_SETNE_INT", []>;
+
+def CNDE_INT : R600_3OP <
+  0x1C, "CNDE_INT",
+  [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_EQ))]
+>;
+
+def CNDGE_INT : R600_3OP <
+  0x1E, "CNDGE_INT",
+  [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_SGE))]
+>;
+
+def CNDGT_INT : R600_3OP <
+  0x1D, "CNDGT_INT",
+  [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_SGT))]
+>;
+
+//===----------------------------------------------------------------------===//
+// Texture instructions
+//===----------------------------------------------------------------------===//
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
+
+class R600_TEX <bits<11> inst, string opName> :
+  InstR600 <(outs R600_Reg128:$DST_GPR),
+          (ins R600_Reg128:$SRC_GPR,
+          RSel:$srcx, RSel:$srcy, RSel:$srcz, RSel:$srcw,
+          i32imm:$offsetx, i32imm:$offsety, i32imm:$offsetz,
+          RSel:$DST_SEL_X, RSel:$DST_SEL_Y, RSel:$DST_SEL_Z, RSel:$DST_SEL_W,
+          i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID,
+          CT:$COORD_TYPE_X, CT:$COORD_TYPE_Y, CT:$COORD_TYPE_Z,
+          CT:$COORD_TYPE_W),
+          !strconcat(opName,
+          " $DST_GPR.$DST_SEL_X$DST_SEL_Y$DST_SEL_Z$DST_SEL_W, "
+          "$SRC_GPR.$srcx$srcy$srcz$srcw "
+          "RID:$RESOURCE_ID SID:$SAMPLER_ID "
+          "CT:$COORD_TYPE_X$COORD_TYPE_Y$COORD_TYPE_Z$COORD_TYPE_W"),
+          [],
+          NullALU>, TEX_WORD0, TEX_WORD1, TEX_WORD2 {
+  let Inst{31-0} = Word0;
+  let Inst{63-32} = Word1;
+
+  let TEX_INST = inst{4-0};
+  let SRC_REL = 0;
+  let DST_REL = 0;
+  let LOD_BIAS = 0;
+
+  let INST_MOD = 0;
+  let FETCH_WHOLE_QUAD = 0;
+  let ALT_CONST = 0;
+  let SAMPLER_INDEX_MODE = 0;
+  let RESOURCE_INDEX_MODE = 0;
+
+  let TEXInst = 1;
+}
+
+} // End mayLoad = 0, mayStore = 0, hasSideEffects = 0
+
+
+
+def TEX_SAMPLE : R600_TEX <0x10, "TEX_SAMPLE">;
+def TEX_SAMPLE_C : R600_TEX <0x18, "TEX_SAMPLE_C">;
+def TEX_SAMPLE_L : R600_TEX <0x11, "TEX_SAMPLE_L">;
+def TEX_SAMPLE_C_L : R600_TEX <0x19, "TEX_SAMPLE_C_L">;
+def TEX_SAMPLE_LB : R600_TEX <0x12, "TEX_SAMPLE_LB">;
+def TEX_SAMPLE_C_LB : R600_TEX <0x1A, "TEX_SAMPLE_C_LB">;
+def TEX_LD : R600_TEX <0x03, "TEX_LD">;
+def TEX_LDPTR : R600_TEX <0x03, "TEX_LDPTR"> {
+  let INST_MOD = 1;
+}
+def TEX_GET_TEXTURE_RESINFO : R600_TEX <0x04, "TEX_GET_TEXTURE_RESINFO">;
+def TEX_GET_GRADIENTS_H : R600_TEX <0x07, "TEX_GET_GRADIENTS_H">;
+def TEX_GET_GRADIENTS_V : R600_TEX <0x08, "TEX_GET_GRADIENTS_V">;
+def TEX_SET_GRADIENTS_H : R600_TEX <0x0B, "TEX_SET_GRADIENTS_H">;
+def TEX_SET_GRADIENTS_V : R600_TEX <0x0C, "TEX_SET_GRADIENTS_V">;
+def TEX_SAMPLE_G : R600_TEX <0x14, "TEX_SAMPLE_G">;
+def TEX_SAMPLE_C_G : R600_TEX <0x1C, "TEX_SAMPLE_C_G">;
+
+defm : TexPattern<0, TEX_SAMPLE>;
+defm : TexPattern<1, TEX_SAMPLE_C>;
+defm : TexPattern<2, TEX_SAMPLE_L>;
+defm : TexPattern<3, TEX_SAMPLE_C_L>;
+defm : TexPattern<4, TEX_SAMPLE_LB>;
+defm : TexPattern<5, TEX_SAMPLE_C_LB>;
+defm : TexPattern<6, TEX_LD, v4i32>;
+defm : TexPattern<7, TEX_GET_TEXTURE_RESINFO, v4i32>;
+defm : TexPattern<8, TEX_GET_GRADIENTS_H>;
+defm : TexPattern<9, TEX_GET_GRADIENTS_V>;
+defm : TexPattern<10, TEX_LDPTR, v4i32>;
+
+//===----------------------------------------------------------------------===//
+// Helper classes for common instructions
+//===----------------------------------------------------------------------===//
+
+class MUL_LIT_Common <bits<5> inst> : R600_3OP <
+  inst, "MUL_LIT",
+  []
+>;
+
+class MULADD_Common <bits<5> inst> : R600_3OP <
+  inst, "MULADD",
+  []
+>;
+
+class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
+  inst, "MULADD_IEEE",
+  [(set f32:$dst, (fmad f32:$src0, f32:$src1, f32:$src2))]
+>;
+
+class FMA_Common <bits<5> inst> : R600_3OP <
+  inst, "FMA",
+  [(set f32:$dst, (fma f32:$src0, f32:$src1, f32:$src2))], VecALU
+>;
+
+class CNDE_Common <bits<5> inst> : R600_3OP <
+  inst, "CNDE",
+  [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OEQ))]
+>;
+
+class CNDGT_Common <bits<5> inst> : R600_3OP <
+  inst, "CNDGT",
+  [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OGT))]
+> {
+  let Itinerary = VecALU;
+}
+
+class CNDGE_Common <bits<5> inst> : R600_3OP <
+  inst, "CNDGE",
+  [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OGE))]
+> {
+  let Itinerary = VecALU;
+}
+
+
+let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"  in {
+class R600_VEC2OP<list<dag> pattern> : InstR600 <(outs R600_Reg32:$dst), (ins
+// Slot X
+   UEM:$update_exec_mask_X, UP:$update_pred_X, WRITE:$write_X,
+   OMOD:$omod_X, REL:$dst_rel_X, CLAMP:$clamp_X,
+   R600_TReg32_X:$src0_X, NEG:$src0_neg_X, REL:$src0_rel_X, ABS:$src0_abs_X, SEL:$src0_sel_X,
+   R600_TReg32_X:$src1_X, NEG:$src1_neg_X, REL:$src1_rel_X, ABS:$src1_abs_X, SEL:$src1_sel_X,
+   R600_Pred:$pred_sel_X,
+// Slot Y
+   UEM:$update_exec_mask_Y, UP:$update_pred_Y, WRITE:$write_Y,
+   OMOD:$omod_Y, REL:$dst_rel_Y, CLAMP:$clamp_Y,
+   R600_TReg32_Y:$src0_Y, NEG:$src0_neg_Y, REL:$src0_rel_Y, ABS:$src0_abs_Y, SEL:$src0_sel_Y,
+   R600_TReg32_Y:$src1_Y, NEG:$src1_neg_Y, REL:$src1_rel_Y, ABS:$src1_abs_Y, SEL:$src1_sel_Y,
+   R600_Pred:$pred_sel_Y,
+// Slot Z
+   UEM:$update_exec_mask_Z, UP:$update_pred_Z, WRITE:$write_Z,
+   OMOD:$omod_Z, REL:$dst_rel_Z, CLAMP:$clamp_Z,
+   R600_TReg32_Z:$src0_Z, NEG:$src0_neg_Z, REL:$src0_rel_Z, ABS:$src0_abs_Z, SEL:$src0_sel_Z,
+   R600_TReg32_Z:$src1_Z, NEG:$src1_neg_Z, REL:$src1_rel_Z, ABS:$src1_abs_Z, SEL:$src1_sel_Z,
+   R600_Pred:$pred_sel_Z,
+// Slot W
+   UEM:$update_exec_mask_W, UP:$update_pred_W, WRITE:$write_W,
+   OMOD:$omod_W, REL:$dst_rel_W, CLAMP:$clamp_W,
+   R600_TReg32_W:$src0_W, NEG:$src0_neg_W, REL:$src0_rel_W, ABS:$src0_abs_W, SEL:$src0_sel_W,
+   R600_TReg32_W:$src1_W, NEG:$src1_neg_W, REL:$src1_rel_W, ABS:$src1_abs_W, SEL:$src1_sel_W,
+   R600_Pred:$pred_sel_W,
+   LITERAL:$literal0, LITERAL:$literal1),
+  "",
+  pattern,
+  AnyALU> {
+
+  let UseNamedOperandTable = 1;
+
+}
+}
+
+def DOT_4 : R600_VEC2OP<[(set R600_Reg32:$dst, (DOT4
+  R600_TReg32_X:$src0_X, R600_TReg32_X:$src1_X,
+  R600_TReg32_Y:$src0_Y, R600_TReg32_Y:$src1_Y,
+  R600_TReg32_Z:$src0_Z, R600_TReg32_Z:$src1_Z,
+  R600_TReg32_W:$src0_W, R600_TReg32_W:$src1_W))]>;
+
+
+class DOT4_Common <bits<11> inst> : R600_2OP <inst, "DOT4", []>;
+
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
+multiclass CUBE_Common <bits<11> inst> {
+
+  def _pseudo : InstR600 <
+    (outs R600_Reg128:$dst),
+    (ins R600_Reg128:$src0),
+    "CUBE $dst $src0",
+    [(set v4f32:$dst, (int_AMDGPU_cube v4f32:$src0))],
+    VecALU
+  > {
+    let isPseudo = 1;
+    let UseNamedOperandTable = 1;
+  }
+
+  def _real : R600_2OP <inst, "CUBE", []>;
+}
+} // End mayLoad = 0, mayStore = 0, hasSideEffects = 0
+
+class EXP_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
+  inst, "EXP_IEEE", fexp2
+> {
+  let Itinerary = TransALU;
+}
+
+class FLT_TO_INT_Common <bits<11> inst> : R600_1OP_Helper <
+  inst, "FLT_TO_INT", fp_to_sint
+> {
+  let Itinerary = TransALU;
+}
+
+class INT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper <
+  inst, "INT_TO_FLT", sint_to_fp
+> {
+  let Itinerary = TransALU;
+}
+
+class FLT_TO_UINT_Common <bits<11> inst> : R600_1OP_Helper <
+  inst, "FLT_TO_UINT", fp_to_uint
+> {
+  let Itinerary = TransALU;
+}
+
+class UINT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper <
+  inst, "UINT_TO_FLT", uint_to_fp
+> {
+  let Itinerary = TransALU;
+}
+
+class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP <
+  inst, "LOG_CLAMPED", []
+>;
+
+class LOG_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
+  inst, "LOG_IEEE", flog2
+> {
+  let Itinerary = TransALU;
+}
+
+class LSHL_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHL", shl>;
+class LSHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHR", srl>;
+class ASHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "ASHR", sra>;
+class MULHI_INT_Common <bits<11> inst> : R600_2OP_Helper <
+  inst, "MULHI_INT", mulhs
+> {
+  let Itinerary = TransALU;
+}
+class MULHI_UINT_Common <bits<11> inst> : R600_2OP_Helper <
+  inst, "MULHI", mulhu
+> {
+  let Itinerary = TransALU;
+}
+class MULLO_INT_Common <bits<11> inst> : R600_2OP_Helper <
+  inst, "MULLO_INT", mul
+> {
+  let Itinerary = TransALU;
+}
+class MULLO_UINT_Common <bits<11> inst> : R600_2OP <inst, "MULLO_UINT", []> {
+  let Itinerary = TransALU;
+}
+
+class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP <
+  inst, "RECIP_CLAMPED", []
+> {
+  let Itinerary = TransALU;
+}
+
+class RECIP_IEEE_Common <bits<11> inst> : R600_1OP <
+  inst, "RECIP_IEEE", [(set f32:$dst, (AMDGPUrcp f32:$src0))]
+> {
+  let Itinerary = TransALU;
+}
+
+class RECIP_UINT_Common <bits<11> inst> : R600_1OP_Helper <
+  inst, "RECIP_UINT", AMDGPUurecip
+> {
+  let Itinerary = TransALU;
+}
+
+// Clamped to maximum.
+class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_Helper <
+  inst, "RECIPSQRT_CLAMPED", AMDGPUrsq_clamped
+> {
+  let Itinerary = TransALU;
+}
+
+class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
+  inst, "RECIPSQRT_IEEE", AMDGPUrsq_legacy
+> {
+  let Itinerary = TransALU;
+}
+
+// TODO: There is also RECIPSQRT_FF which clamps to zero.
+
+class SIN_Common <bits<11> inst> : R600_1OP <
+  inst, "SIN", [(set f32:$dst, (SIN_HW f32:$src0))]>{
+  let Trig = 1;
+  let Itinerary = TransALU;
+}
+
+class COS_Common <bits<11> inst> : R600_1OP <
+  inst, "COS", [(set f32:$dst, (COS_HW f32:$src0))]> {
+  let Trig = 1;
+  let Itinerary = TransALU;
+}
+
+def CLAMP_R600 :  CLAMP <R600_Reg32>;
+def FABS_R600 : FABS<R600_Reg32>;
+def FNEG_R600 : FNEG<R600_Reg32>;
+
+//===----------------------------------------------------------------------===//
+// Helper patterns for complex intrinsics
+//===----------------------------------------------------------------------===//
+
+// FIXME: Should be predicated on unsafe fp math.
+multiclass DIV_Common <InstR600 recip_ieee> {
+def : Pat<
+  (int_AMDGPU_div f32:$src0, f32:$src1),
+  (MUL_IEEE $src0, (recip_ieee $src1))
+>;
+
+def : Pat<
+  (fdiv f32:$src0, f32:$src1),
+  (MUL_IEEE $src0, (recip_ieee $src1))
+>;
+
+def : RcpPat<recip_ieee, f32>;
+}
+
+class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee>
+  : Pat <
+  (int_TGSI_lit_z f32:$src_x, f32:$src_y, f32:$src_w),
+  (exp_ieee (mul_lit (log_clamped (MAX $src_y, (f32 ZERO))), $src_w, $src_x))
+>;
+
+//===----------------------------------------------------------------------===//
+// R600 / R700 Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isR600] in {
+
+  def MUL_LIT_r600 : MUL_LIT_Common<0x0C>;
+  def MULADD_r600 : MULADD_Common<0x10>;
+  def MULADD_IEEE_r600 : MULADD_IEEE_Common<0x14>;
+  def CNDE_r600 : CNDE_Common<0x18>;
+  def CNDGT_r600 : CNDGT_Common<0x19>;
+  def CNDGE_r600 : CNDGE_Common<0x1A>;
+  def DOT4_r600 : DOT4_Common<0x50>;
+  defm CUBE_r600 : CUBE_Common<0x52>;
+  def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>;
+  def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>;
+  def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>;
+  def RECIP_CLAMPED_r600 : RECIP_CLAMPED_Common<0x64>;
+  def RECIP_IEEE_r600 : RECIP_IEEE_Common<0x66>;
+  def RECIPSQRT_CLAMPED_r600 : RECIPSQRT_CLAMPED_Common<0x67>;
+  def RECIPSQRT_IEEE_r600 : RECIPSQRT_IEEE_Common<0x69>;
+  def FLT_TO_INT_r600 : FLT_TO_INT_Common<0x6b>;
+  def INT_TO_FLT_r600 : INT_TO_FLT_Common<0x6c>;
+  def FLT_TO_UINT_r600 : FLT_TO_UINT_Common<0x79>;
+  def UINT_TO_FLT_r600 : UINT_TO_FLT_Common<0x6d>;
+  def SIN_r600 : SIN_Common<0x6E>;
+  def COS_r600 : COS_Common<0x6F>;
+  def ASHR_r600 : ASHR_Common<0x70>;
+  def LSHR_r600 : LSHR_Common<0x71>;
+  def LSHL_r600 : LSHL_Common<0x72>;
+  def MULLO_INT_r600 : MULLO_INT_Common<0x73>;
+  def MULHI_INT_r600 : MULHI_INT_Common<0x74>;
+  def MULLO_UINT_r600 : MULLO_UINT_Common<0x75>;
+  def MULHI_UINT_r600 : MULHI_UINT_Common<0x76>;
+  def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>;
+
+  defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>;
+  def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL>;
+  def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
+
+  def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>;
+  def : RsqPat<RECIPSQRT_IEEE_r600, f32>;
+
+  def R600_ExportSwz : ExportSwzInst {
+    let Word1{20-17} = 0; // BURST_COUNT
+    let Word1{21} = eop;
+    let Word1{22} = 0; // VALID_PIXEL_MODE
+    let Word1{30-23} = inst;
+    let Word1{31} = 1; // BARRIER
+  }
+  defm : ExportPattern<R600_ExportSwz, 39>;
+
+  def R600_ExportBuf : ExportBufInst {
+    let Word1{20-17} = 0; // BURST_COUNT
+    let Word1{21} = eop;
+    let Word1{22} = 0; // VALID_PIXEL_MODE
+    let Word1{30-23} = inst;
+    let Word1{31} = 1; // BARRIER
+  }
+  defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>;
+
+  def CF_TC_R600 : CF_CLAUSE_R600<1, (ins i32imm:$ADDR, i32imm:$CNT),
+  "TEX $CNT @$ADDR"> {
+    let POP_COUNT = 0;
+  }
+  def CF_VC_R600 : CF_CLAUSE_R600<2, (ins i32imm:$ADDR, i32imm:$CNT),
+  "VTX $CNT @$ADDR"> {
+    let POP_COUNT = 0;
+  }
+  def WHILE_LOOP_R600 : CF_CLAUSE_R600<6, (ins i32imm:$ADDR),
+  "LOOP_START_DX10 @$ADDR"> {
+    let POP_COUNT = 0;
+    let CNT = 0;
+  }
+  def END_LOOP_R600 : CF_CLAUSE_R600<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
+    let POP_COUNT = 0;
+    let CNT = 0;
+  }
+  def LOOP_BREAK_R600 : CF_CLAUSE_R600<9, (ins i32imm:$ADDR),
+  "LOOP_BREAK @$ADDR"> {
+    let POP_COUNT = 0;
+    let CNT = 0;
+  }
+  def CF_CONTINUE_R600 : CF_CLAUSE_R600<8, (ins i32imm:$ADDR),
+  "CONTINUE @$ADDR"> {
+    let POP_COUNT = 0;
+    let CNT = 0;
+  }
+  def CF_JUMP_R600 : CF_CLAUSE_R600<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+  "JUMP @$ADDR POP:$POP_COUNT"> {
+    let CNT = 0;
+  }
+  def CF_PUSH_ELSE_R600 : CF_CLAUSE_R600<12, (ins i32imm:$ADDR),
+  "PUSH_ELSE @$ADDR"> {
+    let CNT = 0;
+    let POP_COUNT = 0; // FIXME?
+  }
+  def CF_ELSE_R600 : CF_CLAUSE_R600<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+  "ELSE @$ADDR POP:$POP_COUNT"> {
+    let CNT = 0;
+  }
+  def CF_CALL_FS_R600 : CF_CLAUSE_R600<19, (ins), "CALL_FS"> {
+    let ADDR = 0;
+    let CNT = 0;
+    let POP_COUNT = 0;
+  }
+  def POP_R600 : CF_CLAUSE_R600<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+  "POP @$ADDR POP:$POP_COUNT"> {
+    let CNT = 0;
+  }
+  def CF_END_R600 : CF_CLAUSE_R600<0, (ins), "CF_END"> {
+    let CNT = 0;
+    let POP_COUNT = 0;
+    let ADDR = 0;
+    let END_OF_PROGRAM = 1;
+  }
+
+}
+
+
+//===----------------------------------------------------------------------===//
+// Regist loads and stores - for indirect addressing
+//===----------------------------------------------------------------------===//
+
+defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>;
+
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+
+let isPseudo = 1 in {
+
+def PRED_X : InstR600 <
+  (outs R600_Predicate_Bit:$dst),
+  (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags),
+  "", [], NullALU> {
+  let FlagOperandIdx = 3;
+}
+
+let isTerminator = 1, isBranch = 1 in {
+def JUMP_COND : InstR600 <
+          (outs),
+          (ins brtarget:$target, R600_Predicate_Bit:$p),
+          "JUMP $target ($p)",
+          [], AnyALU
+  >;
+
+def JUMP : InstR600 <
+          (outs),
+          (ins brtarget:$target),
+          "JUMP $target",
+          [], AnyALU
+  >
+{
+  let isPredicable = 1;
+  let isBarrier = 1;
+}
+
+}  // End isTerminator = 1, isBranch = 1
+
+let usesCustomInserter = 1 in {
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in {
+
+def MASK_WRITE : AMDGPUShaderInst <
+    (outs),
+    (ins R600_Reg32:$src),
+    "MASK_WRITE $src",
+    []
+>;
+
+} // End mayLoad = 0, mayStore = 0, hasSideEffects = 1
+
+
+def TXD: InstR600 <
+  (outs R600_Reg128:$dst),
+  (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2,
+       i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
+  "TXD $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
+  [(set v4f32:$dst, (int_AMDGPU_txd v4f32:$src0, v4f32:$src1, v4f32:$src2,
+                     imm:$resourceId, imm:$samplerId, imm:$textureTarget))],
+  NullALU > {
+  let TEXInst = 1;
+}
+
+def TXD_SHADOW: InstR600 <
+  (outs R600_Reg128:$dst),
+  (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2,
+       i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
+  "TXD_SHADOW $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
+  [(set v4f32:$dst, (int_AMDGPU_txd v4f32:$src0, v4f32:$src1, v4f32:$src2,
+        imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))],
+   NullALU
+> {
+  let TEXInst = 1;
+}
+} // End isPseudo = 1
+} // End usesCustomInserter = 1
+
+
+//===----------------------------------------------------------------------===//
+// Constant Buffer Addressing Support
+//===----------------------------------------------------------------------===//
+
+let usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"  in {
+def CONST_COPY : Instruction {
+  let OutOperandList = (outs R600_Reg32:$dst);
+  let InOperandList = (ins i32imm:$src);
+  let Pattern =
+      [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))];
+  let AsmString = "CONST_COPY";
+  let hasSideEffects = 0;
+  let isAsCheapAsAMove = 1;
+  let Itinerary = NullALU;
+}
+} // end usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"
+
+def TEX_VTX_CONSTBUF :
+  InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "VTX_READ_eg $dst, $ptr",
+      [(set v4i32:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$BUFFER_ID)))]>,
+  VTX_WORD1_GPR, VTX_WORD0_eg {
+
+  let VC_INST = 0;
+  let FETCH_TYPE = 2;
+  let FETCH_WHOLE_QUAD = 0;
+  let SRC_REL = 0;
+  let SRC_SEL_X = 0;
+  let DST_REL = 0;
+  let USE_CONST_FIELDS = 0;
+  let NUM_FORMAT_ALL = 2;
+  let FORMAT_COMP_ALL = 1;
+  let SRF_MODE_ALL = 1;
+  let MEGA_FETCH_COUNT = 16;
+  let DST_SEL_X        = 0;
+  let DST_SEL_Y        = 1;
+  let DST_SEL_Z        = 2;
+  let DST_SEL_W        = 3;
+  let DATA_FORMAT      = 35;
+
+  let Inst{31-0} = Word0;
+  let Inst{63-32} = Word1;
+
+// LLVM can only encode 64-bit instructions, so these fields are manually
+// encoded in R600CodeEmitter
+//
+// bits<16> OFFSET;
+// bits<2>  ENDIAN_SWAP = 0;
+// bits<1>  CONST_BUF_NO_STRIDE = 0;
+// bits<1>  MEGA_FETCH = 0;
+// bits<1>  ALT_CONST = 0;
+// bits<2>  BUFFER_INDEX_MODE = 0;
+
+
+
+// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
+// is done in R600CodeEmitter
+//
+// Inst{79-64} = OFFSET;
+// Inst{81-80} = ENDIAN_SWAP;
+// Inst{82}    = CONST_BUF_NO_STRIDE;
+// Inst{83}    = MEGA_FETCH;
+// Inst{84}    = ALT_CONST;
+// Inst{86-85} = BUFFER_INDEX_MODE;
+// Inst{95-86} = 0; Reserved
+
+// VTX_WORD3 (Padding)
+//
+// Inst{127-96} = 0;
+  let VTXInst = 1;
+}
+
+def TEX_VTX_TEXBUF:
+  InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr",
+      [(set v4f32:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>,
+VTX_WORD1_GPR, VTX_WORD0_eg {
+
+let VC_INST = 0;
+let FETCH_TYPE = 2;
+let FETCH_WHOLE_QUAD = 0;
+let SRC_REL = 0;
+let SRC_SEL_X = 0;
+let DST_REL = 0;
+let USE_CONST_FIELDS = 1;
+let NUM_FORMAT_ALL = 0;
+let FORMAT_COMP_ALL = 0;
+let SRF_MODE_ALL = 1;
+let MEGA_FETCH_COUNT = 16;
+let DST_SEL_X        = 0;
+let DST_SEL_Y        = 1;
+let DST_SEL_Z        = 2;
+let DST_SEL_W        = 3;
+let DATA_FORMAT      = 0;
+
+let Inst{31-0} = Word0;
+let Inst{63-32} = Word1;
+
+// LLVM can only encode 64-bit instructions, so these fields are manually
+// encoded in R600CodeEmitter
+//
+// bits<16> OFFSET;
+// bits<2>  ENDIAN_SWAP = 0;
+// bits<1>  CONST_BUF_NO_STRIDE = 0;
+// bits<1>  MEGA_FETCH = 0;
+// bits<1>  ALT_CONST = 0;
+// bits<2>  BUFFER_INDEX_MODE = 0;
+
+
+
+// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
+// is done in R600CodeEmitter
+//
+// Inst{79-64} = OFFSET;
+// Inst{81-80} = ENDIAN_SWAP;
+// Inst{82}    = CONST_BUF_NO_STRIDE;
+// Inst{83}    = MEGA_FETCH;
+// Inst{84}    = ALT_CONST;
+// Inst{86-85} = BUFFER_INDEX_MODE;
+// Inst{95-86} = 0; Reserved
+
+// VTX_WORD3 (Padding)
+//
+// Inst{127-96} = 0;
+  let VTXInst = 1;
+}
+
+//===---------------------------------------------------------------------===//
+// Flow and Program control Instructions
+//===---------------------------------------------------------------------===//
+class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
+: Instruction {
+
+     let Namespace = "AMDGPU";
+     dag OutOperandList = outs;
+     dag InOperandList = ins;
+     let Pattern = pattern;
+     let AsmString = !strconcat(asmstr, "\n");
+     let isPseudo = 1;
+     let Itinerary = NullALU;
+     bit hasIEEEFlag = 0;
+     bit hasZeroOpFlag = 0;
+     let mayLoad = 0;
+     let mayStore = 0;
+     let hasSideEffects = 0;
+     let isCodeGenOnly = 1;
+}
+
+multiclass BranchConditional<SDNode Op, RegisterClass rci, RegisterClass rcf> {
+    def _i32 : ILFormat<(outs),
+  (ins brtarget:$target, rci:$src0),
+        "; i32 Pseudo branch instruction",
+  [(Op bb:$target, (i32 rci:$src0))]>;
+    def _f32 : ILFormat<(outs),
+  (ins brtarget:$target, rcf:$src0),
+        "; f32 Pseudo branch instruction",
+  [(Op bb:$target, (f32 rcf:$src0))]>;
+}
+
+// Only scalar types should generate flow control
+multiclass BranchInstr<string name> {
+  def _i32 : ILFormat<(outs), (ins R600_Reg32:$src),
+      !strconcat(name, " $src"), []>;
+  def _f32 : ILFormat<(outs), (ins R600_Reg32:$src),
+      !strconcat(name, " $src"), []>;
+}
+// Only scalar types should generate flow control
+multiclass BranchInstr2<string name> {
+  def _i32 : ILFormat<(outs), (ins R600_Reg32:$src0, R600_Reg32:$src1),
+      !strconcat(name, " $src0, $src1"), []>;
+  def _f32 : ILFormat<(outs), (ins R600_Reg32:$src0, R600_Reg32:$src1),
+      !strconcat(name, " $src0, $src1"), []>;
+}
+
+//===---------------------------------------------------------------------===//
+// Custom Inserter for Branches and returns, this eventually will be a
+// separate pass
+//===---------------------------------------------------------------------===//
+let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in {
+  def BRANCH : ILFormat<(outs), (ins brtarget:$target),
+      "; Pseudo unconditional branch instruction",
+      [(br bb:$target)]>;
+  defm BRANCH_COND : BranchConditional<IL_brcond, R600_Reg32, R600_Reg32>;
+}
+
+//===---------------------------------------------------------------------===//
+// Return instruction
+//===---------------------------------------------------------------------===//
+let isTerminator = 1, isReturn = 1, hasCtrlDep = 1,
+    usesCustomInserter = 1 in {
+  def RETURN          : ILFormat<(outs), (ins variable_ops),
+      "RETURN", [(IL_retflag)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch Instructions
+//===----------------------------------------------------------------------===//
+
+def IF_PREDICATE_SET  : ILFormat<(outs), (ins R600_Reg32:$src),
+  "IF_PREDICATE_SET $src", []>;
+
+let isTerminator=1 in {
+  def BREAK       : ILFormat< (outs), (ins),
+      "BREAK", []>;
+  def CONTINUE    : ILFormat< (outs), (ins),
+      "CONTINUE", []>;
+  def DEFAULT     : ILFormat< (outs), (ins),
+      "DEFAULT", []>;
+  def ELSE        : ILFormat< (outs), (ins),
+      "ELSE", []>;
+  def ENDSWITCH   : ILFormat< (outs), (ins),
+      "ENDSWITCH", []>;
+  def ENDMAIN     : ILFormat< (outs), (ins),
+      "ENDMAIN", []>;
+  def END         : ILFormat< (outs), (ins),
+      "END", []>;
+  def ENDFUNC     : ILFormat< (outs), (ins),
+      "ENDFUNC", []>;
+  def ENDIF       : ILFormat< (outs), (ins),
+      "ENDIF", []>;
+  def WHILELOOP   : ILFormat< (outs), (ins),
+      "WHILE", []>;
+  def ENDLOOP     : ILFormat< (outs), (ins),
+      "ENDLOOP", []>;
+  def FUNC        : ILFormat< (outs), (ins),
+      "FUNC", []>;
+  def RETDYN      : ILFormat< (outs), (ins),
+      "RET_DYN", []>;
+  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+  defm IF_LOGICALNZ  : BranchInstr<"IF_LOGICALNZ">;
+  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+  defm IF_LOGICALZ   : BranchInstr<"IF_LOGICALZ">;
+  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+  defm BREAK_LOGICALNZ : BranchInstr<"BREAK_LOGICALNZ">;
+  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+  defm BREAK_LOGICALZ : BranchInstr<"BREAK_LOGICALZ">;
+  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+  defm CONTINUE_LOGICALNZ : BranchInstr<"CONTINUE_LOGICALNZ">;
+  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+  defm CONTINUE_LOGICALZ : BranchInstr<"CONTINUE_LOGICALZ">;
+  defm IFC         : BranchInstr2<"IFC">;
+  defm BREAKC      : BranchInstr2<"BREAKC">;
+  defm CONTINUEC   : BranchInstr2<"CONTINUEC">;
+}
+
+//===----------------------------------------------------------------------===//
+// Indirect addressing pseudo instructions
+//===----------------------------------------------------------------------===//
+
+let isPseudo = 1 in {
+
+class ExtractVertical <RegisterClass vec_rc> : InstR600 <
+  (outs R600_Reg32:$dst),
+  (ins vec_rc:$vec, R600_Reg32:$index), "",
+  [],
+  AnyALU
+>;
+
+let Constraints = "$dst = $vec" in {
+
+class InsertVertical <RegisterClass vec_rc> : InstR600 <
+  (outs vec_rc:$dst),
+  (ins vec_rc:$vec, R600_Reg32:$value, R600_Reg32:$index), "",
+  [],
+  AnyALU
+>;
+
+} // End Constraints = "$dst = $vec"
+
+} // End isPseudo = 1
+
+def R600_EXTRACT_ELT_V2 : ExtractVertical <R600_Reg64Vertical>;
+def R600_EXTRACT_ELT_V4 : ExtractVertical <R600_Reg128Vertical>;
+
+def R600_INSERT_ELT_V2 : InsertVertical <R600_Reg64Vertical>;
+def R600_INSERT_ELT_V4 : InsertVertical <R600_Reg128Vertical>;
+
+class ExtractVerticalPat <Instruction inst, ValueType vec_ty,
+                          ValueType scalar_ty> : Pat <
+  (scalar_ty (extractelt vec_ty:$vec, i32:$index)),
+  (inst $vec, $index)
+>;
+
+def : ExtractVerticalPat <R600_EXTRACT_ELT_V2, v2i32, i32>;
+def : ExtractVerticalPat <R600_EXTRACT_ELT_V2, v2f32, f32>;
+def : ExtractVerticalPat <R600_EXTRACT_ELT_V4, v4i32, i32>;
+def : ExtractVerticalPat <R600_EXTRACT_ELT_V4, v4f32, f32>;
+
+class InsertVerticalPat <Instruction inst, ValueType vec_ty,
+                         ValueType scalar_ty> : Pat <
+  (vec_ty (insertelt vec_ty:$vec, scalar_ty:$value, i32:$index)),
+  (inst $vec, $value, $index)
+>;
+
+def : InsertVerticalPat <R600_INSERT_ELT_V2, v2i32, i32>;
+def : InsertVerticalPat <R600_INSERT_ELT_V2, v2f32, f32>;
+def : InsertVerticalPat <R600_INSERT_ELT_V4, v4i32, i32>;
+def : InsertVerticalPat <R600_INSERT_ELT_V4, v4f32, f32>;
+
+//===----------------------------------------------------------------------===//
+// ISel Patterns
+//===----------------------------------------------------------------------===//
+
+// CND*_INT Pattterns for f32 True / False values
+
+class CND_INT_f32 <InstR600 cnd, CondCode cc> : Pat <
+  (selectcc i32:$src0, 0, f32:$src1, f32:$src2, cc),
+  (cnd $src0, $src1, $src2)
+>;
+
+def : CND_INT_f32 <CNDE_INT,  SETEQ>;
+def : CND_INT_f32 <CNDGT_INT, SETGT>;
+def : CND_INT_f32 <CNDGE_INT, SETGE>;
+
+//CNDGE_INT extra pattern
+def : Pat <
+  (selectcc i32:$src0, -1, i32:$src1, i32:$src2, COND_SGT),
+  (CNDGE_INT $src0, $src1, $src2)
+>;
+
+// KIL Patterns
+def KILP : Pat <
+  (int_AMDGPU_kilp),
+  (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO)))
+>;
+
+def KIL : Pat <
+  (int_AMDGPU_kill f32:$src0),
+  (MASK_WRITE (KILLGT (f32 ZERO), $src0))
+>;
+
+def : Extract_Element <f32, v4f32, 0, sub0>;
+def : Extract_Element <f32, v4f32, 1, sub1>;
+def : Extract_Element <f32, v4f32, 2, sub2>;
+def : Extract_Element <f32, v4f32, 3, sub3>;
+
+def : Insert_Element <f32, v4f32, 0, sub0>;
+def : Insert_Element <f32, v4f32, 1, sub1>;
+def : Insert_Element <f32, v4f32, 2, sub2>;
+def : Insert_Element <f32, v4f32, 3, sub3>;
+
+def : Extract_Element <i32, v4i32, 0, sub0>;
+def : Extract_Element <i32, v4i32, 1, sub1>;
+def : Extract_Element <i32, v4i32, 2, sub2>;
+def : Extract_Element <i32, v4i32, 3, sub3>;
+
+def : Insert_Element <i32, v4i32, 0, sub0>;
+def : Insert_Element <i32, v4i32, 1, sub1>;
+def : Insert_Element <i32, v4i32, 2, sub2>;
+def : Insert_Element <i32, v4i32, 3, sub3>;
+
+def : Extract_Element <f32, v2f32, 0, sub0>;
+def : Extract_Element <f32, v2f32, 1, sub1>;
+
+def : Insert_Element <f32, v2f32, 0, sub0>;
+def : Insert_Element <f32, v2f32, 1, sub1>;
+
+def : Extract_Element <i32, v2i32, 0, sub0>;
+def : Extract_Element <i32, v2i32, 1, sub1>;
+
+def : Insert_Element <i32, v2i32, 0, sub0>;
+def : Insert_Element <i32, v2i32, 1, sub1>;
+
+// bitconvert patterns
+
+def : BitConvert <i32, f32, R600_Reg32>;
+def : BitConvert <f32, i32, R600_Reg32>;
+def : BitConvert <v2f32, v2i32, R600_Reg64>;
+def : BitConvert <v2i32, v2f32, R600_Reg64>;
+def : BitConvert <v4f32, v4i32, R600_Reg128>;
+def : BitConvert <v4i32, v4f32, R600_Reg128>;
+
+// DWORDADDR pattern
+def : DwordAddrPat  <i32, R600_Reg32>;
+
+} // End isR600toCayman Predicate
+
+let Predicates = [isR600] in {
+// Intrinsic patterns
+defm : Expand24IBitOps<MULLO_INT_r600, ADD_INT>;
+defm : Expand24UBitOps<MULLO_UINT_r600, ADD_INT>;
+} // End isR600
+
+def getLDSNoRetOp : InstrMapping {
+  let FilterClass = "R600_LDS_1A1D";
+  let RowFields = ["BaseOp"];
+  let ColFields = ["DisableEncoding"];
+  let KeyCol = ["$dst"];
+  let ValueCols = [[""""]];
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600Intrinsics.td b/contrib/llvm/lib/Target/AMDGPU/R600Intrinsics.td
new file mode 100644
index 0000000..9681747
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/R600Intrinsics.td
@@ -0,0 +1,75 @@
+//===-- R600Intrinsics.td - R600 Instrinsic defs -------*- tablegen -*-----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// R600 Intrinsic Definitions
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "R600", isTarget = 1 in {
+  class TextureIntrinsicFloatInput :
+    Intrinsic<[llvm_v4f32_ty], [
+      llvm_v4f32_ty, // Coord
+      llvm_i32_ty, // offset_x
+      llvm_i32_ty, // offset_y,
+      llvm_i32_ty, // offset_z,
+      llvm_i32_ty, // resource_id
+      llvm_i32_ty, // samplerid
+      llvm_i32_ty, // coord_type_x
+      llvm_i32_ty, // coord_type_y
+      llvm_i32_ty, // coord_type_z
+      llvm_i32_ty // coord_type_w
+    ], [IntrNoMem]>;
+  class TextureIntrinsicInt32Input :
+    Intrinsic<[llvm_v4i32_ty], [
+      llvm_v4i32_ty, // Coord
+      llvm_i32_ty, // offset_x
+      llvm_i32_ty, // offset_y,
+      llvm_i32_ty, // offset_z,
+      llvm_i32_ty, // resource_id
+      llvm_i32_ty, // samplerid
+      llvm_i32_ty, // coord_type_x
+      llvm_i32_ty, // coord_type_y
+      llvm_i32_ty, // coord_type_z
+      llvm_i32_ty // coord_type_w
+    ], [IntrNoMem]>;
+
+  def int_R600_load_input :
+    Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+  def int_R600_interp_input :
+    Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_R600_interp_const :
+    Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>;
+def int_R600_interp_xy :
+    Intrinsic<[llvm_v2f32_ty], [llvm_i32_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+def int_R600_interp_zw :
+    Intrinsic<[llvm_v2f32_ty], [llvm_i32_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+  def int_R600_load_texbuf :
+    Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_R600_tex : TextureIntrinsicFloatInput;
+  def int_R600_texc : TextureIntrinsicFloatInput;
+  def int_R600_txl : TextureIntrinsicFloatInput;
+  def int_R600_txlc : TextureIntrinsicFloatInput;
+  def int_R600_txb : TextureIntrinsicFloatInput;
+  def int_R600_txbc : TextureIntrinsicFloatInput;
+  def int_R600_txf : TextureIntrinsicInt32Input;
+  def int_R600_ldptr : TextureIntrinsicInt32Input;
+  def int_R600_txq : TextureIntrinsicInt32Input;
+  def int_R600_ddx : TextureIntrinsicFloatInput;
+  def int_R600_ddy : TextureIntrinsicFloatInput;
+  def int_R600_store_swizzle :
+    Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+  def int_R600_store_stream_output :
+    Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+  def int_R600_store_pixel_depth :
+      Intrinsic<[], [llvm_float_ty], []>;
+  def int_R600_store_pixel_stencil :
+      Intrinsic<[], [llvm_float_ty], []>;
+  def int_R600_store_dummy :
+      Intrinsic<[], [llvm_i32_ty], []>;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp
new file mode 100644
index 0000000..01105c6
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp
@@ -0,0 +1,20 @@
+//===-- R600MachineFunctionInfo.cpp - R600 Machine Function Info-*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#include "R600MachineFunctionInfo.h"
+
+using namespace llvm;
+
+
+// Pin the vtable to this file.
+void R600MachineFunctionInfo::anchor() {}
+
+R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
+  : AMDGPUMachineFunction(MF) { }
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600MachineFunctionInfo.h b/contrib/llvm/lib/Target/AMDGPU/R600MachineFunctionInfo.h
new file mode 100644
index 0000000..f5556c1
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/R600MachineFunctionInfo.h
@@ -0,0 +1,34 @@
+//===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_R600_R600MACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_R600_R600MACHINEFUNCTIONINFO_H
+
+#include "AMDGPUMachineFunction.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include <vector>
+
+namespace llvm {
+
+class R600MachineFunctionInfo : public AMDGPUMachineFunction {
+  void anchor() override;
+public:
+  R600MachineFunctionInfo(const MachineFunction &MF);
+  SmallVector<unsigned, 4> LiveOuts;
+  std::vector<unsigned> IndirectRegs;
+  unsigned StackSize;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp b/contrib/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp
new file mode 100644
index 0000000..bcde5fb
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp
@@ -0,0 +1,469 @@
+//===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ -*-----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief R600 Machine Scheduler interface
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600MachineScheduler.h"
+#include "AMDGPUSubtarget.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "misched"
+
+void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
+  assert(dag->hasVRegLiveness() && "R600SchedStrategy needs vreg liveness");
+  DAG = static_cast<ScheduleDAGMILive*>(dag);
+  const AMDGPUSubtarget &ST = DAG->MF.getSubtarget<AMDGPUSubtarget>();
+  TII = static_cast<const R600InstrInfo*>(DAG->TII);
+  TRI = static_cast<const R600RegisterInfo*>(DAG->TRI);
+  VLIW5 = !ST.hasCaymanISA();
+  MRI = &DAG->MRI;
+  CurInstKind = IDOther;
+  CurEmitted = 0;
+  OccupedSlotsMask = 31;
+  InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();
+  InstKindLimit[IDOther] = 32;
+  InstKindLimit[IDFetch] = ST.getTexVTXClauseSize();
+  AluInstCount = 0;
+  FetchInstCount = 0;
+}
+
+void R600SchedStrategy::MoveUnits(std::vector<SUnit *> &QSrc,
+                                  std::vector<SUnit *> &QDst)
+{
+  QDst.insert(QDst.end(), QSrc.begin(), QSrc.end());
+  QSrc.clear();
+}
+
+static
+unsigned getWFCountLimitedByGPR(unsigned GPRCount) {
+  assert (GPRCount && "GPRCount cannot be 0");
+  return 248 / GPRCount;
+}
+
+SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
+  SUnit *SU = nullptr;
+  NextInstKind = IDOther;
+
+  IsTopNode = false;
+
+  // check if we might want to switch current clause type
+  bool AllowSwitchToAlu = (CurEmitted >= InstKindLimit[CurInstKind]) ||
+      (Available[CurInstKind].empty());
+  bool AllowSwitchFromAlu = (CurEmitted >= InstKindLimit[CurInstKind]) &&
+      (!Available[IDFetch].empty() || !Available[IDOther].empty());
+
+  if (CurInstKind == IDAlu && !Available[IDFetch].empty()) {
+    // We use the heuristic provided by AMD Accelerated Parallel Processing
+    // OpenCL Programming Guide :
+    // The approx. number of WF that allows TEX inst to hide ALU inst is :
+    // 500 (cycles for TEX) / (AluFetchRatio * 8 (cycles for ALU))
+    float ALUFetchRationEstimate =
+        (AluInstCount + AvailablesAluCount() + Pending[IDAlu].size()) /
+        (FetchInstCount + Available[IDFetch].size());
+    if (ALUFetchRationEstimate == 0) {
+      AllowSwitchFromAlu = true;
+    } else {
+      unsigned NeededWF = 62.5f / ALUFetchRationEstimate;
+      DEBUG( dbgs() << NeededWF << " approx. Wavefronts Required\n" );
+      // We assume the local GPR requirements to be "dominated" by the requirement
+      // of the TEX clause (which consumes 128 bits regs) ; ALU inst before and
+      // after TEX are indeed likely to consume or generate values from/for the
+      // TEX clause.
+      // Available[IDFetch].size() * 2 : GPRs required in the Fetch clause
+      // We assume that fetch instructions are either TnXYZW = TEX TnXYZW (need
+      // one GPR) or TmXYZW = TnXYZW (need 2 GPR).
+      // (TODO : use RegisterPressure)
+      // If we are going too use too many GPR, we flush Fetch instruction to lower
+      // register pressure on 128 bits regs.
+      unsigned NearRegisterRequirement = 2 * Available[IDFetch].size();
+      if (NeededWF > getWFCountLimitedByGPR(NearRegisterRequirement))
+        AllowSwitchFromAlu = true;
+    }
+  }
+
+  if (!SU && ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
+      (!AllowSwitchFromAlu && CurInstKind == IDAlu))) {
+    // try to pick ALU
+    SU = pickAlu();
+    if (!SU && !PhysicalRegCopy.empty()) {
+      SU = PhysicalRegCopy.front();
+      PhysicalRegCopy.erase(PhysicalRegCopy.begin());
+    }
+    if (SU) {
+      if (CurEmitted >= InstKindLimit[IDAlu])
+        CurEmitted = 0;
+      NextInstKind = IDAlu;
+    }
+  }
+
+  if (!SU) {
+    // try to pick FETCH
+    SU = pickOther(IDFetch);
+    if (SU)
+      NextInstKind = IDFetch;
+  }
+
+  // try to pick other
+  if (!SU) {
+    SU = pickOther(IDOther);
+    if (SU)
+      NextInstKind = IDOther;
+  }
+
+  DEBUG(
+      if (SU) {
+        dbgs() << " ** Pick node **\n";
+        SU->dump(DAG);
+      } else {
+        dbgs() << "NO NODE \n";
+        for (unsigned i = 0; i < DAG->SUnits.size(); i++) {
+          const SUnit &S = DAG->SUnits[i];
+          if (!S.isScheduled)
+            S.dump(DAG);
+        }
+      }
+  );
+
+  return SU;
+}
+
+void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
+  if (NextInstKind != CurInstKind) {
+    DEBUG(dbgs() << "Instruction Type Switch\n");
+    if (NextInstKind != IDAlu)
+      OccupedSlotsMask |= 31;
+    CurEmitted = 0;
+    CurInstKind = NextInstKind;
+  }
+
+  if (CurInstKind == IDAlu) {
+    AluInstCount ++;
+    switch (getAluKind(SU)) {
+    case AluT_XYZW:
+      CurEmitted += 4;
+      break;
+    case AluDiscarded:
+      break;
+    default: {
+      ++CurEmitted;
+      for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(),
+          E = SU->getInstr()->operands_end(); It != E; ++It) {
+        MachineOperand &MO = *It;
+        if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
+          ++CurEmitted;
+      }
+    }
+    }
+  } else {
+    ++CurEmitted;
+  }
+
+
+  DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n");
+
+  if (CurInstKind != IDFetch) {
+    MoveUnits(Pending[IDFetch], Available[IDFetch]);
+  } else
+    FetchInstCount++;
+}
+
+static bool
+isPhysicalRegCopy(MachineInstr *MI) {
+  if (MI->getOpcode() != AMDGPU::COPY)
+    return false;
+
+  return !TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg());
+}
+
+void R600SchedStrategy::releaseTopNode(SUnit *SU) {
+  DEBUG(dbgs() << "Top Releasing ";SU->dump(DAG););
+}
+
+void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
+  DEBUG(dbgs() << "Bottom Releasing ";SU->dump(DAG););
+  if (isPhysicalRegCopy(SU->getInstr())) {
+    PhysicalRegCopy.push_back(SU);
+    return;
+  }
+
+  int IK = getInstKind(SU);
+
+  // There is no export clause, we can schedule one as soon as its ready
+  if (IK == IDOther)
+    Available[IDOther].push_back(SU);
+  else
+    Pending[IK].push_back(SU);
+
+}
+
+bool R600SchedStrategy::regBelongsToClass(unsigned Reg,
+                                          const TargetRegisterClass *RC) const {
+  if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+    return RC->contains(Reg);
+  } else {
+    return MRI->getRegClass(Reg) == RC;
+  }
+}
+
+R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
+  MachineInstr *MI = SU->getInstr();
+
+  if (TII->isTransOnly(MI))
+    return AluTrans;
+
+    switch (MI->getOpcode()) {
+    case AMDGPU::PRED_X:
+      return AluPredX;
+    case AMDGPU::INTERP_PAIR_XY:
+    case AMDGPU::INTERP_PAIR_ZW:
+    case AMDGPU::INTERP_VEC_LOAD:
+    case AMDGPU::DOT_4:
+      return AluT_XYZW;
+    case AMDGPU::COPY:
+      if (MI->getOperand(1).isUndef()) {
+        // MI will become a KILL, don't considers it in scheduling
+        return AluDiscarded;
+      }
+    default:
+      break;
+    }
+
+    // Does the instruction take a whole IG ?
+    // XXX: Is it possible to add a helper function in R600InstrInfo that can
+    // be used here and in R600PacketizerList::isSoloInstruction() ?
+    if(TII->isVector(*MI) ||
+        TII->isCubeOp(MI->getOpcode()) ||
+        TII->isReductionOp(MI->getOpcode()) ||
+        MI->getOpcode() == AMDGPU::GROUP_BARRIER) {
+      return AluT_XYZW;
+    }
+
+    if (TII->isLDSInstr(MI->getOpcode())) {
+      return AluT_X;
+    }
+
+    // Is the result already assigned to a channel ?
+    unsigned DestSubReg = MI->getOperand(0).getSubReg();
+    switch (DestSubReg) {
+    case AMDGPU::sub0:
+      return AluT_X;
+    case AMDGPU::sub1:
+      return AluT_Y;
+    case AMDGPU::sub2:
+      return AluT_Z;
+    case AMDGPU::sub3:
+      return AluT_W;
+    default:
+      break;
+    }
+
+    // Is the result already member of a X/Y/Z/W class ?
+    unsigned DestReg = MI->getOperand(0).getReg();
+    if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) ||
+        regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass))
+      return AluT_X;
+    if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass))
+      return AluT_Y;
+    if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass))
+      return AluT_Z;
+    if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass))
+      return AluT_W;
+    if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass))
+      return AluT_XYZW;
+
+    // LDS src registers cannot be used in the Trans slot.
+    if (TII->readsLDSSrcReg(MI))
+      return AluT_XYZW;
+
+    return AluAny;
+
+}
+
+int R600SchedStrategy::getInstKind(SUnit* SU) {
+  int Opcode = SU->getInstr()->getOpcode();
+
+  if (TII->usesTextureCache(Opcode) || TII->usesVertexCache(Opcode))
+    return IDFetch;
+
+  if (TII->isALUInstr(Opcode)) {
+    return IDAlu;
+  }
+
+  switch (Opcode) {
+  case AMDGPU::PRED_X:
+  case AMDGPU::COPY:
+  case AMDGPU::CONST_COPY:
+  case AMDGPU::INTERP_PAIR_XY:
+  case AMDGPU::INTERP_PAIR_ZW:
+  case AMDGPU::INTERP_VEC_LOAD:
+  case AMDGPU::DOT_4:
+    return IDAlu;
+  default:
+    return IDOther;
+  }
+}
+
+SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q, bool AnyALU) {
+  if (Q.empty())
+    return nullptr;
+  for (std::vector<SUnit *>::reverse_iterator It = Q.rbegin(), E = Q.rend();
+      It != E; ++It) {
+    SUnit *SU = *It;
+    InstructionsGroupCandidate.push_back(SU->getInstr());
+    if (TII->fitsConstReadLimitations(InstructionsGroupCandidate)
+        && (!AnyALU || !TII->isVectorOnly(SU->getInstr()))
+    ) {
+      InstructionsGroupCandidate.pop_back();
+      Q.erase((It + 1).base());
+      return SU;
+    } else {
+      InstructionsGroupCandidate.pop_back();
+    }
+  }
+  return nullptr;
+}
+
+void R600SchedStrategy::LoadAlu() {
+  std::vector<SUnit *> &QSrc = Pending[IDAlu];
+  for (unsigned i = 0, e = QSrc.size(); i < e; ++i) {
+    AluKind AK = getAluKind(QSrc[i]);
+    AvailableAlus[AK].push_back(QSrc[i]);
+  }
+  QSrc.clear();
+}
+
+void R600SchedStrategy::PrepareNextSlot() {
+  DEBUG(dbgs() << "New Slot\n");
+  assert (OccupedSlotsMask && "Slot wasn't filled");
+  OccupedSlotsMask = 0;
+//  if (HwGen == AMDGPUSubtarget::NORTHERN_ISLANDS)
+//    OccupedSlotsMask |= 16;
+  InstructionsGroupCandidate.clear();
+  LoadAlu();
+}
+
+void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {
+  int DstIndex = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
+  if (DstIndex == -1) {
+    return;
+  }
+  unsigned DestReg = MI->getOperand(DstIndex).getReg();
+  // PressureRegister crashes if an operand is def and used in the same inst
+  // and we try to constraint its regclass
+  for (MachineInstr::mop_iterator It = MI->operands_begin(),
+      E = MI->operands_end(); It != E; ++It) {
+    MachineOperand &MO = *It;
+    if (MO.isReg() && !MO.isDef() &&
+        MO.getReg() == DestReg)
+      return;
+  }
+  // Constrains the regclass of DestReg to assign it to Slot
+  switch (Slot) {
+  case 0:
+    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_XRegClass);
+    break;
+  case 1:
+    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_YRegClass);
+    break;
+  case 2:
+    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass);
+    break;
+  case 3:
+    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_WRegClass);
+    break;
+  }
+}
+
+SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot, bool AnyAlu) {
+  static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
+  SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]], AnyAlu);
+  if (SlotedSU)
+    return SlotedSU;
+  SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny], AnyAlu);
+  if (UnslotedSU)
+    AssignSlot(UnslotedSU->getInstr(), Slot);
+  return UnslotedSU;
+}
+
+unsigned R600SchedStrategy::AvailablesAluCount() const {
+  return AvailableAlus[AluAny].size() + AvailableAlus[AluT_XYZW].size() +
+      AvailableAlus[AluT_X].size() + AvailableAlus[AluT_Y].size() +
+      AvailableAlus[AluT_Z].size() + AvailableAlus[AluT_W].size() +
+      AvailableAlus[AluTrans].size() + AvailableAlus[AluDiscarded].size() +
+      AvailableAlus[AluPredX].size();
+}
+
+SUnit* R600SchedStrategy::pickAlu() {
+  while (AvailablesAluCount() || !Pending[IDAlu].empty()) {
+    if (!OccupedSlotsMask) {
+      // Bottom up scheduling : predX must comes first
+      if (!AvailableAlus[AluPredX].empty()) {
+        OccupedSlotsMask |= 31;
+        return PopInst(AvailableAlus[AluPredX], false);
+      }
+      // Flush physical reg copies (RA will discard them)
+      if (!AvailableAlus[AluDiscarded].empty()) {
+        OccupedSlotsMask |= 31;
+        return PopInst(AvailableAlus[AluDiscarded], false);
+      }
+      // If there is a T_XYZW alu available, use it
+      if (!AvailableAlus[AluT_XYZW].empty()) {
+        OccupedSlotsMask |= 15;
+        return PopInst(AvailableAlus[AluT_XYZW], false);
+      }
+    }
+    bool TransSlotOccuped = OccupedSlotsMask & 16;
+    if (!TransSlotOccuped && VLIW5) {
+      if (!AvailableAlus[AluTrans].empty()) {
+        OccupedSlotsMask |= 16;
+        return PopInst(AvailableAlus[AluTrans], false);
+      }
+      SUnit *SU = AttemptFillSlot(3, true);
+      if (SU) {
+        OccupedSlotsMask |= 16;
+        return SU;
+      }
+    }
+    for (int Chan = 3; Chan > -1; --Chan) {
+      bool isOccupied = OccupedSlotsMask & (1 << Chan);
+      if (!isOccupied) {
+        SUnit *SU = AttemptFillSlot(Chan, false);
+        if (SU) {
+          OccupedSlotsMask |= (1 << Chan);
+          InstructionsGroupCandidate.push_back(SU->getInstr());
+          return SU;
+        }
+      }
+    }
+    PrepareNextSlot();
+  }
+  return nullptr;
+}
+
+SUnit* R600SchedStrategy::pickOther(int QID) {
+  SUnit *SU = nullptr;
+  std::vector<SUnit *> &AQ = Available[QID];
+
+  if (AQ.empty()) {
+    MoveUnits(Pending[QID], AQ);
+  }
+  if (!AQ.empty()) {
+    SU = AQ.back();
+    AQ.resize(AQ.size() - 1);
+  }
+  return SU;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600MachineScheduler.h b/contrib/llvm/lib/Target/AMDGPU/R600MachineScheduler.h
new file mode 100644
index 0000000..fc5b95c
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/R600MachineScheduler.h
@@ -0,0 +1,103 @@
+//===-- R600MachineScheduler.h - R600 Scheduler Interface -*- C++ -*-------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief R600 Machine Scheduler interface
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_R600_R600MACHINESCHEDULER_H
+#define LLVM_LIB_TARGET_R600_R600MACHINESCHEDULER_H
+
+#include "R600InstrInfo.h"
+#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+namespace llvm {
+
+class R600SchedStrategy : public MachineSchedStrategy {
+
+  const ScheduleDAGMILive *DAG;
+  const R600InstrInfo *TII;
+  const R600RegisterInfo *TRI;
+  MachineRegisterInfo *MRI;
+
+  enum InstKind {
+    IDAlu,
+    IDFetch,
+    IDOther,
+    IDLast
+  };
+
+  enum AluKind {
+    AluAny,
+    AluT_X,
+    AluT_Y,
+    AluT_Z,
+    AluT_W,
+    AluT_XYZW,
+    AluPredX,
+    AluTrans,
+    AluDiscarded, // LLVM Instructions that are going to be eliminated
+    AluLast
+  };
+
+  std::vector<SUnit *> Available[IDLast], Pending[IDLast];
+  std::vector<SUnit *> AvailableAlus[AluLast];
+  std::vector<SUnit *> PhysicalRegCopy;
+
+  InstKind CurInstKind;
+  int CurEmitted;
+  InstKind NextInstKind;
+
+  unsigned AluInstCount;
+  unsigned FetchInstCount;
+
+  int InstKindLimit[IDLast];
+
+  int OccupedSlotsMask;
+
+public:
+  R600SchedStrategy() :
+    DAG(nullptr), TII(nullptr), TRI(nullptr), MRI(nullptr) {
+  }
+
+  virtual ~R600SchedStrategy() {}
+
+  void initialize(ScheduleDAGMI *dag) override;
+  SUnit *pickNode(bool &IsTopNode) override;
+  void schedNode(SUnit *SU, bool IsTopNode) override;
+  void releaseTopNode(SUnit *SU) override;
+  void releaseBottomNode(SUnit *SU) override;
+
+private:
+  std::vector<MachineInstr *> InstructionsGroupCandidate;
+  bool VLIW5;
+
+  int getInstKind(SUnit *SU);
+  bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const;
+  AluKind getAluKind(SUnit *SU) const;
+  void LoadAlu();
+  unsigned AvailablesAluCount() const;
+  SUnit *AttemptFillSlot (unsigned Slot, bool AnyAlu);
+  void PrepareNextSlot();
+  SUnit *PopInst(std::vector<SUnit*> &Q, bool AnyALU);
+
+  void AssignSlot(MachineInstr *MI, unsigned Slot);
+  SUnit* pickAlu();
+  SUnit* pickOther(int QID);
+  void MoveUnits(std::vector<SUnit *> &QSrc, std::vector<SUnit *> &QDst);
+};
+
+} // namespace llvm
+
+#endif /* R600MACHINESCHEDULER_H_ */
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp b/contrib/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
new file mode 100644
index 0000000..a1a1b40
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
@@ -0,0 +1,382 @@
+//===--------------------- R600MergeVectorRegisters.cpp -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass merges inputs of swizzeable instructions into vector sharing
+/// common data and/or have enough undef subreg using swizzle abilities.
+///
+/// For instance let's consider the following pseudo code :
+/// vreg5<def> = REG_SEQ vreg1, sub0, vreg2, sub1, vreg3, sub2, undef, sub3
+/// ...
+/// vreg7<def> = REG_SEQ vreg1, sub0, vreg3, sub1, undef, sub2, vreg4, sub3
+/// (swizzable Inst) vreg7, SwizzleMask : sub0, sub1, sub2, sub3
+///
+/// is turned into :
+/// vreg5<def> = REG_SEQ vreg1, sub0, vreg2, sub1, vreg3, sub2, undef, sub3
+/// ...
+/// vreg7<def> = INSERT_SUBREG vreg4, sub3
+/// (swizzable Inst) vreg7, SwizzleMask : sub0, sub2, sub1, sub3
+///
+/// This allow regalloc to reduce register pressure for vector registers and
+/// to reduce MOV count.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "R600InstrInfo.h"
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "vec-merger"
+
+namespace {
+
+static bool
+isImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg) {
+  for (MachineRegisterInfo::def_instr_iterator It = MRI.def_instr_begin(Reg),
+      E = MRI.def_instr_end(); It != E; ++It) {
+    return (*It).isImplicitDef();
+  }
+  if (MRI.isReserved(Reg)) {
+    return false;
+  }
+  llvm_unreachable("Reg without a def");
+  return false;
+}
+
+class RegSeqInfo {
+public:
+  MachineInstr *Instr;
+  DenseMap<unsigned, unsigned> RegToChan;
+  std::vector<unsigned> UndefReg;
+  RegSeqInfo(MachineRegisterInfo &MRI, MachineInstr *MI) : Instr(MI) {
+    assert(MI->getOpcode() == AMDGPU::REG_SEQUENCE);
+    for (unsigned i = 1, e = Instr->getNumOperands(); i < e; i+=2) {
+      MachineOperand &MO = Instr->getOperand(i);
+      unsigned Chan = Instr->getOperand(i + 1).getImm();
+      if (isImplicitlyDef(MRI, MO.getReg()))
+        UndefReg.push_back(Chan);
+      else
+        RegToChan[MO.getReg()] = Chan;
+    }
+  }
+  RegSeqInfo() {}
+
+  bool operator==(const RegSeqInfo &RSI) const {
+    return RSI.Instr == Instr;
+  }
+};
+
+class R600VectorRegMerger : public MachineFunctionPass {
+private:
+  MachineRegisterInfo *MRI;
+  const R600InstrInfo *TII;
+  bool canSwizzle(const MachineInstr &) const;
+  bool areAllUsesSwizzeable(unsigned Reg) const;
+  void SwizzleInput(MachineInstr &,
+      const std::vector<std::pair<unsigned, unsigned> > &) const;
+  bool tryMergeVector(const RegSeqInfo *, RegSeqInfo *,
+      std::vector<std::pair<unsigned, unsigned> > &Remap) const;
+  bool tryMergeUsingCommonSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI,
+      std::vector<std::pair<unsigned, unsigned> > &RemapChan);
+  bool tryMergeUsingFreeSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI,
+      std::vector<std::pair<unsigned, unsigned> > &RemapChan);
+  MachineInstr *RebuildVector(RegSeqInfo *MI,
+      const RegSeqInfo *BaseVec,
+      const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const;
+  void RemoveMI(MachineInstr *);
+  void trackRSI(const RegSeqInfo &RSI);
+
+  typedef DenseMap<unsigned, std::vector<MachineInstr *> > InstructionSetMap;
+  DenseMap<MachineInstr *, RegSeqInfo> PreviousRegSeq;
+  InstructionSetMap PreviousRegSeqByReg;
+  InstructionSetMap PreviousRegSeqByUndefCount;
+public:
+  static char ID;
+  R600VectorRegMerger(TargetMachine &tm) : MachineFunctionPass(ID),
+  TII(nullptr) { }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addRequired<MachineDominatorTree>();
+    AU.addPreserved<MachineDominatorTree>();
+    AU.addRequired<MachineLoopInfo>();
+    AU.addPreserved<MachineLoopInfo>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  const char *getPassName() const override {
+    return "R600 Vector Registers Merge Pass";
+  }
+
+  bool runOnMachineFunction(MachineFunction &Fn) override;
+};
+
+char R600VectorRegMerger::ID = 0;
+
+bool R600VectorRegMerger::canSwizzle(const MachineInstr &MI)
+    const {
+  if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST)
+    return true;
+  switch (MI.getOpcode()) {
+  case AMDGPU::R600_ExportSwz:
+  case AMDGPU::EG_ExportSwz:
+    return true;
+  default:
+    return false;
+  }
+}
+
+bool R600VectorRegMerger::tryMergeVector(const RegSeqInfo *Untouched,
+    RegSeqInfo *ToMerge, std::vector< std::pair<unsigned, unsigned> > &Remap)
+    const {
+  unsigned CurrentUndexIdx = 0;
+  for (DenseMap<unsigned, unsigned>::iterator It = ToMerge->RegToChan.begin(),
+      E = ToMerge->RegToChan.end(); It != E; ++It) {
+    DenseMap<unsigned, unsigned>::const_iterator PosInUntouched =
+        Untouched->RegToChan.find((*It).first);
+    if (PosInUntouched != Untouched->RegToChan.end()) {
+      Remap.push_back(std::pair<unsigned, unsigned>
+          ((*It).second, (*PosInUntouched).second));
+      continue;
+    }
+    if (CurrentUndexIdx >= Untouched->UndefReg.size())
+      return false;
+    Remap.push_back(std::pair<unsigned, unsigned>
+        ((*It).second, Untouched->UndefReg[CurrentUndexIdx++]));
+  }
+
+  return true;
+}
+
+static
+unsigned getReassignedChan(
+    const std::vector<std::pair<unsigned, unsigned> > &RemapChan,
+    unsigned Chan) {
+  for (unsigned j = 0, je = RemapChan.size(); j < je; j++) {
+    if (RemapChan[j].first == Chan)
+      return RemapChan[j].second;
+  }
+  llvm_unreachable("Chan wasn't reassigned");
+}
+
+MachineInstr *R600VectorRegMerger::RebuildVector(
+    RegSeqInfo *RSI, const RegSeqInfo *BaseRSI,
+    const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const {
+  unsigned Reg = RSI->Instr->getOperand(0).getReg();
+  MachineBasicBlock::iterator Pos = RSI->Instr;
+  MachineBasicBlock &MBB = *Pos->getParent();
+  DebugLoc DL = Pos->getDebugLoc();
+
+  unsigned SrcVec = BaseRSI->Instr->getOperand(0).getReg();
+  DenseMap<unsigned, unsigned> UpdatedRegToChan = BaseRSI->RegToChan;
+  std::vector<unsigned> UpdatedUndef = BaseRSI->UndefReg;
+  for (DenseMap<unsigned, unsigned>::iterator It = RSI->RegToChan.begin(),
+      E = RSI->RegToChan.end(); It != E; ++It) {
+    unsigned DstReg = MRI->createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
+    unsigned SubReg = (*It).first;
+    unsigned Swizzle = (*It).second;
+    unsigned Chan = getReassignedChan(RemapChan, Swizzle);
+
+    MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(AMDGPU::INSERT_SUBREG),
+        DstReg)
+        .addReg(SrcVec)
+        .addReg(SubReg)
+        .addImm(Chan);
+    UpdatedRegToChan[SubReg] = Chan;
+    std::vector<unsigned>::iterator ChanPos =
+        std::find(UpdatedUndef.begin(), UpdatedUndef.end(), Chan);
+    if (ChanPos != UpdatedUndef.end())
+      UpdatedUndef.erase(ChanPos);
+    assert(std::find(UpdatedUndef.begin(), UpdatedUndef.end(), Chan) ==
+               UpdatedUndef.end() &&
+           "UpdatedUndef shouldn't contain Chan more than once!");
+    DEBUG(dbgs() << "    ->"; Tmp->dump(););
+    (void)Tmp;
+    SrcVec = DstReg;
+  }
+  Pos = BuildMI(MBB, Pos, DL, TII->get(AMDGPU::COPY), Reg)
+      .addReg(SrcVec);
+  DEBUG(dbgs() << "    ->"; Pos->dump(););
+
+  DEBUG(dbgs() << "  Updating Swizzle:\n");
+  for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg),
+      E = MRI->use_instr_end(); It != E; ++It) {
+    DEBUG(dbgs() << "    ";(*It).dump(); dbgs() << "    ->");
+    SwizzleInput(*It, RemapChan);
+    DEBUG((*It).dump());
+  }
+  RSI->Instr->eraseFromParent();
+
+  // Update RSI
+  RSI->Instr = Pos;
+  RSI->RegToChan = UpdatedRegToChan;
+  RSI->UndefReg = UpdatedUndef;
+
+  return Pos;
+}
+
+void R600VectorRegMerger::RemoveMI(MachineInstr *MI) {
+  for (InstructionSetMap::iterator It = PreviousRegSeqByReg.begin(),
+      E = PreviousRegSeqByReg.end(); It != E; ++It) {
+    std::vector<MachineInstr *> &MIs = (*It).second;
+    MIs.erase(std::find(MIs.begin(), MIs.end(), MI), MIs.end());
+  }
+  for (InstructionSetMap::iterator It = PreviousRegSeqByUndefCount.begin(),
+      E = PreviousRegSeqByUndefCount.end(); It != E; ++It) {
+    std::vector<MachineInstr *> &MIs = (*It).second;
+    MIs.erase(std::find(MIs.begin(), MIs.end(), MI), MIs.end());
+  }
+}
+
+void R600VectorRegMerger::SwizzleInput(MachineInstr &MI,
+    const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const {
+  unsigned Offset;
+  if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST)
+    Offset = 2;
+  else
+    Offset = 3;
+  for (unsigned i = 0; i < 4; i++) {
+    unsigned Swizzle = MI.getOperand(i + Offset).getImm() + 1;
+    for (unsigned j = 0, e = RemapChan.size(); j < e; j++) {
+      if (RemapChan[j].first == Swizzle) {
+        MI.getOperand(i + Offset).setImm(RemapChan[j].second - 1);
+        break;
+      }
+    }
+  }
+}
+
+bool R600VectorRegMerger::areAllUsesSwizzeable(unsigned Reg) const {
+  for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg),
+      E = MRI->use_instr_end(); It != E; ++It) {
+    if (!canSwizzle(*It))
+      return false;
+  }
+  return true;
+}
+
+bool R600VectorRegMerger::tryMergeUsingCommonSlot(RegSeqInfo &RSI,
+    RegSeqInfo &CompatibleRSI,
+    std::vector<std::pair<unsigned, unsigned> > &RemapChan) {
+  for (MachineInstr::mop_iterator MOp = RSI.Instr->operands_begin(),
+      MOE = RSI.Instr->operands_end(); MOp != MOE; ++MOp) {
+    if (!MOp->isReg())
+      continue;
+    if (PreviousRegSeqByReg[MOp->getReg()].empty())
+      continue;
+    for (MachineInstr *MI : PreviousRegSeqByReg[MOp->getReg()]) {
+      CompatibleRSI = PreviousRegSeq[MI];
+      if (RSI == CompatibleRSI)
+        continue;
+      if (tryMergeVector(&CompatibleRSI, &RSI, RemapChan))
+        return true;
+    }
+  }
+  return false;
+}
+
+bool R600VectorRegMerger::tryMergeUsingFreeSlot(RegSeqInfo &RSI,
+    RegSeqInfo &CompatibleRSI,
+    std::vector<std::pair<unsigned, unsigned> > &RemapChan) {
+  unsigned NeededUndefs = 4 - RSI.UndefReg.size();
+  if (PreviousRegSeqByUndefCount[NeededUndefs].empty())
+    return false;
+  std::vector<MachineInstr *> &MIs =
+      PreviousRegSeqByUndefCount[NeededUndefs];
+  CompatibleRSI = PreviousRegSeq[MIs.back()];
+  tryMergeVector(&CompatibleRSI, &RSI, RemapChan);
+  return true;
+}
+
+void R600VectorRegMerger::trackRSI(const RegSeqInfo &RSI) {
+  for (DenseMap<unsigned, unsigned>::const_iterator
+  It = RSI.RegToChan.begin(), E = RSI.RegToChan.end(); It != E; ++It) {
+    PreviousRegSeqByReg[(*It).first].push_back(RSI.Instr);
+  }
+  PreviousRegSeqByUndefCount[RSI.UndefReg.size()].push_back(RSI.Instr);
+  PreviousRegSeq[RSI.Instr] = RSI;
+}
+
+bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) {
+  TII = static_cast<const R600InstrInfo *>(Fn.getSubtarget().getInstrInfo());
+  MRI = &(Fn.getRegInfo());
+  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+       MBB != MBBe; ++MBB) {
+    MachineBasicBlock *MB = MBB;
+    PreviousRegSeq.clear();
+    PreviousRegSeqByReg.clear();
+    PreviousRegSeqByUndefCount.clear();
+
+    for (MachineBasicBlock::iterator MII = MB->begin(), MIIE = MB->end();
+         MII != MIIE; ++MII) {
+      MachineInstr *MI = MII;
+      if (MI->getOpcode() != AMDGPU::REG_SEQUENCE) {
+        if (TII->get(MI->getOpcode()).TSFlags & R600_InstFlag::TEX_INST) {
+          unsigned Reg = MI->getOperand(1).getReg();
+          for (MachineRegisterInfo::def_instr_iterator
+               It = MRI->def_instr_begin(Reg), E = MRI->def_instr_end();
+               It != E; ++It) {
+            RemoveMI(&(*It));
+          }
+        }
+        continue;
+      }
+
+
+      RegSeqInfo RSI(*MRI, MI);
+
+      // All uses of MI are swizzeable ?
+      unsigned Reg = MI->getOperand(0).getReg();
+      if (!areAllUsesSwizzeable(Reg))
+        continue;
+
+      DEBUG (dbgs() << "Trying to optimize ";
+          MI->dump();
+      );
+
+      RegSeqInfo CandidateRSI;
+      std::vector<std::pair<unsigned, unsigned> > RemapChan;
+      DEBUG(dbgs() << "Using common slots...\n";);
+      if (tryMergeUsingCommonSlot(RSI, CandidateRSI, RemapChan)) {
+        // Remove CandidateRSI mapping
+        RemoveMI(CandidateRSI.Instr);
+        MII = RebuildVector(&RSI, &CandidateRSI, RemapChan);
+        trackRSI(RSI);
+        continue;
+      }
+      DEBUG(dbgs() << "Using free slots...\n";);
+      RemapChan.clear();
+      if (tryMergeUsingFreeSlot(RSI, CandidateRSI, RemapChan)) {
+        RemoveMI(CandidateRSI.Instr);
+        MII = RebuildVector(&RSI, &CandidateRSI, RemapChan);
+        trackRSI(RSI);
+        continue;
+      }
+      //Failed to merge
+      trackRSI(RSI);
+    }
+  }
+  return false;
+}
+
+} // namespace
+
+llvm::FunctionPass *llvm::createR600VectorRegMerger(TargetMachine &tm) {
+  return new R600VectorRegMerger(tm);
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600Packetizer.cpp b/contrib/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
new file mode 100644
index 0000000..deee5bc
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
@@ -0,0 +1,408 @@
+//===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass implements instructions packetization for R600. It unsets isLast
+/// bit of instructions inside a bundle and substitutes src register with
+/// PreviousVector when applicable.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Debug.h"
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "R600InstrInfo.h"
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "packets"
+
+namespace {
+
+class R600Packetizer : public MachineFunctionPass {
+
+public:
+  static char ID;
+  R600Packetizer(const TargetMachine &TM) : MachineFunctionPass(ID) {}
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addRequired<MachineDominatorTree>();
+    AU.addPreserved<MachineDominatorTree>();
+    AU.addRequired<MachineLoopInfo>();
+    AU.addPreserved<MachineLoopInfo>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  const char *getPassName() const override {
+    return "R600 Packetizer";
+  }
+
+  bool runOnMachineFunction(MachineFunction &Fn) override;
+};
+char R600Packetizer::ID = 0;
+
+class R600PacketizerList : public VLIWPacketizerList {
+
+private:
+  const R600InstrInfo *TII;
+  const R600RegisterInfo &TRI;
+  bool VLIW5;
+  bool ConsideredInstUsesAlreadyWrittenVectorElement;
+
+  unsigned getSlot(const MachineInstr *MI) const {
+    return TRI.getHWRegChan(MI->getOperand(0).getReg());
+  }
+
+  /// \returns register to PV chan mapping for bundle/single instructions that
+  /// immediately precedes I.
+  DenseMap<unsigned, unsigned> getPreviousVector(MachineBasicBlock::iterator I)
+      const {
+    DenseMap<unsigned, unsigned> Result;
+    I--;
+    if (!TII->isALUInstr(I->getOpcode()) && !I->isBundle())
+      return Result;
+    MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
+    if (I->isBundle())
+      BI++;
+    int LastDstChan = -1;
+    do {
+      bool isTrans = false;
+      int BISlot = getSlot(BI);
+      if (LastDstChan >= BISlot)
+        isTrans = true;
+      LastDstChan = BISlot;
+      if (TII->isPredicated(BI))
+        continue;
+      int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write);
+      if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)
+        continue;
+      int DstIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::dst);
+      if (DstIdx == -1) {
+        continue;
+      }
+      unsigned Dst = BI->getOperand(DstIdx).getReg();
+      if (isTrans || TII->isTransOnly(BI)) {
+        Result[Dst] = AMDGPU::PS;
+        continue;
+      }
+      if (BI->getOpcode() == AMDGPU::DOT4_r600 ||
+          BI->getOpcode() == AMDGPU::DOT4_eg) {
+        Result[Dst] = AMDGPU::PV_X;
+        continue;
+      }
+      if (Dst == AMDGPU::OQAP) {
+        continue;
+      }
+      unsigned PVReg = 0;
+      switch (TRI.getHWRegChan(Dst)) {
+      case 0:
+        PVReg = AMDGPU::PV_X;
+        break;
+      case 1:
+        PVReg = AMDGPU::PV_Y;
+        break;
+      case 2:
+        PVReg = AMDGPU::PV_Z;
+        break;
+      case 3:
+        PVReg = AMDGPU::PV_W;
+        break;
+      default:
+        llvm_unreachable("Invalid Chan");
+      }
+      Result[Dst] = PVReg;
+    } while ((++BI)->isBundledWithPred());
+    return Result;
+  }
+
+  void substitutePV(MachineInstr *MI, const DenseMap<unsigned, unsigned> &PVs)
+      const {
+    unsigned Ops[] = {
+      AMDGPU::OpName::src0,
+      AMDGPU::OpName::src1,
+      AMDGPU::OpName::src2
+    };
+    for (unsigned i = 0; i < 3; i++) {
+      int OperandIdx = TII->getOperandIdx(MI->getOpcode(), Ops[i]);
+      if (OperandIdx < 0)
+        continue;
+      unsigned Src = MI->getOperand(OperandIdx).getReg();
+      const DenseMap<unsigned, unsigned>::const_iterator It = PVs.find(Src);
+      if (It != PVs.end())
+        MI->getOperand(OperandIdx).setReg(It->second);
+    }
+  }
+public:
+  // Ctor.
+  R600PacketizerList(MachineFunction &MF, MachineLoopInfo &MLI)
+      : VLIWPacketizerList(MF, MLI, true),
+        TII(static_cast<const R600InstrInfo *>(
+            MF.getSubtarget().getInstrInfo())),
+        TRI(TII->getRegisterInfo()) {
+    VLIW5 = !MF.getSubtarget<AMDGPUSubtarget>().hasCaymanISA();
+  }
+
+  // initPacketizerState - initialize some internal flags.
+  void initPacketizerState() override {
+    ConsideredInstUsesAlreadyWrittenVectorElement = false;
+  }
+
+  // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
+  bool ignorePseudoInstruction(MachineInstr *MI,
+                               MachineBasicBlock *MBB) override {
+    return false;
+  }
+
+  // isSoloInstruction - return true if instruction MI can not be packetized
+  // with any other instruction, which means that MI itself is a packet.
+  bool isSoloInstruction(MachineInstr *MI) override {
+    if (TII->isVector(*MI))
+      return true;
+    if (!TII->isALUInstr(MI->getOpcode()))
+      return true;
+    if (MI->getOpcode() == AMDGPU::GROUP_BARRIER)
+      return true;
+    // XXX: This can be removed once the packetizer properly handles all the
+    // LDS instruction group restrictions.
+    if (TII->isLDSInstr(MI->getOpcode()))
+      return true;
+    return false;
+  }
+
+  // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
+  // together.
+  bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override {
+    MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr();
+    if (getSlot(MII) == getSlot(MIJ))
+      ConsideredInstUsesAlreadyWrittenVectorElement = true;
+    // Does MII and MIJ share the same pred_sel ?
+    int OpI = TII->getOperandIdx(MII->getOpcode(), AMDGPU::OpName::pred_sel),
+        OpJ = TII->getOperandIdx(MIJ->getOpcode(), AMDGPU::OpName::pred_sel);
+    unsigned PredI = (OpI > -1)?MII->getOperand(OpI).getReg():0,
+        PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg():0;
+    if (PredI != PredJ)
+      return false;
+    if (SUJ->isSucc(SUI)) {
+      for (unsigned i = 0, e = SUJ->Succs.size(); i < e; ++i) {
+        const SDep &Dep = SUJ->Succs[i];
+        if (Dep.getSUnit() != SUI)
+          continue;
+        if (Dep.getKind() == SDep::Anti)
+          continue;
+        if (Dep.getKind() == SDep::Output)
+          if (MII->getOperand(0).getReg() != MIJ->getOperand(0).getReg())
+            continue;
+        return false;
+      }
+    }
+
+    bool ARDef = TII->definesAddressRegister(MII) ||
+                 TII->definesAddressRegister(MIJ);
+    bool ARUse = TII->usesAddressRegister(MII) ||
+                 TII->usesAddressRegister(MIJ);
+    if (ARDef && ARUse)
+      return false;
+
+    return true;
+  }
+
+  // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
+  // and SUJ.
+  bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override {
+    return false;
+  }
+
+  void setIsLastBit(MachineInstr *MI, unsigned Bit) const {
+    unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::last);
+    MI->getOperand(LastOp).setImm(Bit);
+  }
+
+  bool isBundlableWithCurrentPMI(MachineInstr *MI,
+                                 const DenseMap<unsigned, unsigned> &PV,
+                                 std::vector<R600InstrInfo::BankSwizzle> &BS,
+                                 bool &isTransSlot) {
+    isTransSlot = TII->isTransOnly(MI);
+    assert (!isTransSlot || VLIW5);
+
+    // Is the dst reg sequence legal ?
+    if (!isTransSlot && !CurrentPacketMIs.empty()) {
+      if (getSlot(MI) <= getSlot(CurrentPacketMIs.back())) {
+        if (ConsideredInstUsesAlreadyWrittenVectorElement  &&
+            !TII->isVectorOnly(MI) && VLIW5) {
+          isTransSlot = true;
+          DEBUG(dbgs() << "Considering as Trans Inst :"; MI->dump(););
+        }
+        else
+          return false;
+      }
+    }
+
+    // Are the Constants limitations met ?
+    CurrentPacketMIs.push_back(MI);
+    if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) {
+      DEBUG(
+        dbgs() << "Couldn't pack :\n";
+        MI->dump();
+        dbgs() << "with the following packets :\n";
+        for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
+          CurrentPacketMIs[i]->dump();
+          dbgs() << "\n";
+        }
+        dbgs() << "because of Consts read limitations\n";
+      );
+      CurrentPacketMIs.pop_back();
+      return false;
+    }
+
+    // Is there a BankSwizzle set that meet Read Port limitations ?
+    if (!TII->fitsReadPortLimitations(CurrentPacketMIs,
+            PV, BS, isTransSlot)) {
+      DEBUG(
+        dbgs() << "Couldn't pack :\n";
+        MI->dump();
+        dbgs() << "with the following packets :\n";
+        for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
+          CurrentPacketMIs[i]->dump();
+          dbgs() << "\n";
+        }
+        dbgs() << "because of Read port limitations\n";
+      );
+      CurrentPacketMIs.pop_back();
+      return false;
+    }
+
+    // We cannot read LDS source registrs from the Trans slot.
+    if (isTransSlot && TII->readsLDSSrcReg(MI))
+      return false;
+
+    CurrentPacketMIs.pop_back();
+    return true;
+  }
+
+  MachineBasicBlock::iterator addToPacket(MachineInstr *MI) override {
+    MachineBasicBlock::iterator FirstInBundle =
+        CurrentPacketMIs.empty() ? MI : CurrentPacketMIs.front();
+    const DenseMap<unsigned, unsigned> &PV =
+        getPreviousVector(FirstInBundle);
+    std::vector<R600InstrInfo::BankSwizzle> BS;
+    bool isTransSlot;
+
+    if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) {
+      for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) {
+        MachineInstr *MI = CurrentPacketMIs[i];
+        unsigned Op = TII->getOperandIdx(MI->getOpcode(),
+            AMDGPU::OpName::bank_swizzle);
+        MI->getOperand(Op).setImm(BS[i]);
+      }
+      unsigned Op = TII->getOperandIdx(MI->getOpcode(),
+          AMDGPU::OpName::bank_swizzle);
+      MI->getOperand(Op).setImm(BS.back());
+      if (!CurrentPacketMIs.empty())
+        setIsLastBit(CurrentPacketMIs.back(), 0);
+      substitutePV(MI, PV);
+      MachineBasicBlock::iterator It = VLIWPacketizerList::addToPacket(MI);
+      if (isTransSlot) {
+        endPacket(std::next(It)->getParent(), std::next(It));
+      }
+      return It;
+    }
+    endPacket(MI->getParent(), MI);
+    if (TII->isTransOnly(MI))
+      return MI;
+    return VLIWPacketizerList::addToPacket(MI);
+  }
+};
+
+bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
+  const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo();
+  MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+
+  // Instantiate the packetizer.
+  R600PacketizerList Packetizer(Fn, MLI);
+
+  // DFA state table should not be empty.
+  assert(Packetizer.getResourceTracker() && "Empty DFA table!");
+
+  //
+  // Loop over all basic blocks and remove KILL pseudo-instructions
+  // These instructions confuse the dependence analysis. Consider:
+  // D0 = ...   (Insn 0)
+  // R0 = KILL R0, D0 (Insn 1)
+  // R0 = ... (Insn 2)
+  // Here, Insn 1 will result in the dependence graph not emitting an output
+  // dependence between Insn 0 and Insn 2. This can lead to incorrect
+  // packetization
+  //
+  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+       MBB != MBBe; ++MBB) {
+    MachineBasicBlock::iterator End = MBB->end();
+    MachineBasicBlock::iterator MI = MBB->begin();
+    while (MI != End) {
+      if (MI->isKill() || MI->getOpcode() == AMDGPU::IMPLICIT_DEF ||
+          (MI->getOpcode() == AMDGPU::CF_ALU && !MI->getOperand(8).getImm())) {
+        MachineBasicBlock::iterator DeleteMI = MI;
+        ++MI;
+        MBB->erase(DeleteMI);
+        End = MBB->end();
+        continue;
+      }
+      ++MI;
+    }
+  }
+
+  // Loop over all of the basic blocks.
+  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+       MBB != MBBe; ++MBB) {
+    // Find scheduling regions and schedule / packetize each region.
+    unsigned RemainingCount = MBB->size();
+    for(MachineBasicBlock::iterator RegionEnd = MBB->end();
+        RegionEnd != MBB->begin();) {
+      // The next region starts above the previous region. Look backward in the
+      // instruction stream until we find the nearest boundary.
+      MachineBasicBlock::iterator I = RegionEnd;
+      for(;I != MBB->begin(); --I, --RemainingCount) {
+        if (TII->isSchedulingBoundary(std::prev(I), MBB, Fn))
+          break;
+      }
+      I = MBB->begin();
+
+      // Skip empty scheduling regions.
+      if (I == RegionEnd) {
+        RegionEnd = std::prev(RegionEnd);
+        --RemainingCount;
+        continue;
+      }
+      // Skip regions with one instruction.
+      if (I == std::prev(RegionEnd)) {
+        RegionEnd = std::prev(RegionEnd);
+        continue;
+      }
+
+      Packetizer.PacketizeMIs(MBB, I, RegionEnd);
+      RegionEnd = I;
+    }
+  }
+
+  return true;
+
+}
+
+} // end anonymous namespace
+
+llvm::FunctionPass *llvm::createR600Packetizer(TargetMachine &tm) {
+  return new R600Packetizer(tm);
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp
new file mode 100644
index 0000000..fb0359c
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp
@@ -0,0 +1,91 @@
+//===-- R600RegisterInfo.cpp - R600 Register Information ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief R600 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600RegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+#include "R600Defines.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+
+using namespace llvm;
+
+R600RegisterInfo::R600RegisterInfo() : AMDGPURegisterInfo() {
+  RCW.RegWeight = 0;
+  RCW.WeightLimit = 0;
+}
+
+BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  BitVector Reserved(getNumRegs());
+
+  const R600InstrInfo *TII =
+      static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+  Reserved.set(AMDGPU::ZERO);
+  Reserved.set(AMDGPU::HALF);
+  Reserved.set(AMDGPU::ONE);
+  Reserved.set(AMDGPU::ONE_INT);
+  Reserved.set(AMDGPU::NEG_HALF);
+  Reserved.set(AMDGPU::NEG_ONE);
+  Reserved.set(AMDGPU::PV_X);
+  Reserved.set(AMDGPU::ALU_LITERAL_X);
+  Reserved.set(AMDGPU::ALU_CONST);
+  Reserved.set(AMDGPU::PREDICATE_BIT);
+  Reserved.set(AMDGPU::PRED_SEL_OFF);
+  Reserved.set(AMDGPU::PRED_SEL_ZERO);
+  Reserved.set(AMDGPU::PRED_SEL_ONE);
+  Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
+
+  for (TargetRegisterClass::iterator I = AMDGPU::R600_AddrRegClass.begin(),
+                        E = AMDGPU::R600_AddrRegClass.end(); I != E; ++I) {
+    Reserved.set(*I);
+  }
+
+  TII->reserveIndirectRegisters(Reserved, MF);
+
+  return Reserved;
+}
+
+unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const {
+  return this->getEncodingValue(reg) >> HW_CHAN_SHIFT;
+}
+
+unsigned R600RegisterInfo::getHWRegIndex(unsigned Reg) const {
+  return GET_REG_INDEX(getEncodingValue(Reg));
+}
+
+const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass(
+                                                                   MVT VT) const {
+  switch(VT.SimpleTy) {
+  default:
+  case MVT::i32: return &AMDGPU::R600_TReg32RegClass;
+  }
+}
+
+const RegClassWeight &R600RegisterInfo::getRegClassWeight(
+  const TargetRegisterClass *RC) const {
+  return RCW;
+}
+
+bool R600RegisterInfo::isPhysRegLiveAcrossClauses(unsigned Reg) const {
+  assert(!TargetRegisterInfo::isVirtualRegister(Reg));
+
+  switch (Reg) {
+  case AMDGPU::OQAP:
+  case AMDGPU::OQBP:
+  case AMDGPU::AR_X:
+    return false;
+  default:
+    return true;
+  }
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600RegisterInfo.h b/contrib/llvm/lib/Target/AMDGPU/R600RegisterInfo.h
new file mode 100644
index 0000000..9713e60
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/R600RegisterInfo.h
@@ -0,0 +1,49 @@
+//===-- R600RegisterInfo.h - R600 Register Info Interface ------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface definition for R600RegisterInfo
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_R600_R600REGISTERINFO_H
+#define LLVM_LIB_TARGET_R600_R600REGISTERINFO_H
+
+#include "AMDGPURegisterInfo.h"
+
+namespace llvm {
+
+class AMDGPUSubtarget;
+
+struct R600RegisterInfo : public AMDGPURegisterInfo {
+  RegClassWeight RCW;
+
+  R600RegisterInfo();
+
+  BitVector getReservedRegs(const MachineFunction &MF) const override;
+
+  /// \brief get the HW encoding for a register's channel.
+  unsigned getHWRegChan(unsigned reg) const;
+
+  unsigned getHWRegIndex(unsigned Reg) const override;
+
+  /// \brief get the register class of the specified type to use in the
+  /// CFGStructurizer
+  const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const override;
+
+  const RegClassWeight &
+    getRegClassWeight(const TargetRegisterClass *RC) const override;
+
+  // \returns true if \p Reg can be defined in one ALU caluse and used in another.
+  bool isPhysRegLiveAcrossClauses(unsigned Reg) const;
+};
+
+} // End namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600RegisterInfo.td b/contrib/llvm/lib/Target/AMDGPU/R600RegisterInfo.td
new file mode 100644
index 0000000..cc667d9
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/R600RegisterInfo.td
@@ -0,0 +1,252 @@
+
+class R600Reg <string name, bits<16> encoding> : Register<name> {
+  let Namespace = "AMDGPU";
+  let HWEncoding = encoding;
+}
+
+class R600RegWithChan <string name, bits<9> sel, string chan> :
+    Register <name> {
+
+  field bits<2> chan_encoding = !if(!eq(chan, "X"), 0,
+                                !if(!eq(chan, "Y"), 1,
+                                !if(!eq(chan, "Z"), 2,
+                                !if(!eq(chan, "W"), 3, 0))));
+  let HWEncoding{8-0}  = sel;
+  let HWEncoding{10-9} = chan_encoding;
+  let Namespace = "AMDGPU";
+}
+
+class R600Reg_128<string n, list<Register> subregs, bits<16> encoding> :
+    RegisterWithSubRegs<n, subregs> {
+  field bits<2> chan_encoding = 0;
+  let Namespace = "AMDGPU";
+  let SubRegIndices = [sub0, sub1, sub2, sub3];
+  let HWEncoding{8-0} = encoding{8-0};
+  let HWEncoding{10-9} = chan_encoding;
+}
+
+class R600Reg_64<string n, list<Register> subregs, bits<16> encoding> :
+    RegisterWithSubRegs<n, subregs> {
+  field bits<2> chan_encoding = 0;
+  let Namespace = "AMDGPU";
+  let SubRegIndices = [sub0, sub1];
+  let HWEncoding = encoding;
+  let HWEncoding{8-0} = encoding{8-0};
+  let HWEncoding{10-9} = chan_encoding;
+}
+
+class R600Reg_64Vertical<int lo, int hi, string chan> : R600Reg_64 <
+  "V"#lo#hi#"_"#chan,
+  [!cast<Register>("T"#lo#"_"#chan), !cast<Register>("T"#hi#"_"#chan)],
+  lo
+>;
+
+foreach Index = 0-127 in {
+  foreach Chan = [ "X", "Y", "Z", "W" ] in {
+    // 32-bit Temporary Registers
+    def T#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index, Chan>;
+
+    // Indirect addressing offset registers
+    def Addr#Index#_#Chan : R600RegWithChan <"T("#Index#" + AR.x)."#Chan,
+                                              Index, Chan>;
+  }
+  // 128-bit Temporary Registers
+  def T#Index#_XYZW : R600Reg_128 <"T"#Index#"",
+                                   [!cast<Register>("T"#Index#"_X"),
+                                    !cast<Register>("T"#Index#"_Y"),
+                                    !cast<Register>("T"#Index#"_Z"),
+                                    !cast<Register>("T"#Index#"_W")],
+                                   Index>;
+
+  def T#Index#_XY : R600Reg_64 <"T"#Index#"",
+                                   [!cast<Register>("T"#Index#"_X"),
+                                    !cast<Register>("T"#Index#"_Y")],
+                                   Index>;
+}
+
+foreach Chan = [ "X", "Y", "Z", "W"] in {
+
+  let chan_encoding = !if(!eq(Chan, "X"), 0,
+                      !if(!eq(Chan, "Y"), 1,
+                      !if(!eq(Chan, "Z"), 2,
+                      !if(!eq(Chan, "W"), 3, 0)))) in {
+    def V0123_#Chan : R600Reg_128 <"V0123_"#Chan,
+                                   [!cast<Register>("T0_"#Chan),
+                                    !cast<Register>("T1_"#Chan),
+                                    !cast<Register>("T2_"#Chan),
+                                    !cast<Register>("T3_"#Chan)],
+                                    0>;
+    def V01_#Chan : R600Reg_64Vertical<0, 1, Chan>;
+    def V23_#Chan : R600Reg_64Vertical<2, 3, Chan>;
+  }
+}
+
+
+// KCACHE_BANK0
+foreach Index = 159-128 in {
+  foreach Chan = [ "X", "Y", "Z", "W" ] in {
+    // 32-bit Temporary Registers
+    def KC0_#Index#_#Chan : R600RegWithChan <"KC0["#!add(Index,-128)#"]."#Chan, Index, Chan>;
+  }
+  // 128-bit Temporary Registers
+  def KC0_#Index#_XYZW : R600Reg_128 <"KC0["#!add(Index, -128)#"].XYZW",
+                                 [!cast<Register>("KC0_"#Index#"_X"),
+                                  !cast<Register>("KC0_"#Index#"_Y"),
+                                  !cast<Register>("KC0_"#Index#"_Z"),
+                                  !cast<Register>("KC0_"#Index#"_W")],
+                                 Index>;
+}
+
+// KCACHE_BANK1
+foreach Index = 191-160 in {
+  foreach Chan = [ "X", "Y", "Z", "W" ] in {
+    // 32-bit Temporary Registers
+    def KC1_#Index#_#Chan : R600RegWithChan <"KC1["#!add(Index,-160)#"]."#Chan, Index, Chan>;
+  }
+  // 128-bit Temporary Registers
+  def KC1_#Index#_XYZW : R600Reg_128 <"KC1["#!add(Index, -160)#"].XYZW",
+                                 [!cast<Register>("KC1_"#Index#"_X"),
+                                  !cast<Register>("KC1_"#Index#"_Y"),
+                                  !cast<Register>("KC1_"#Index#"_Z"),
+                                  !cast<Register>("KC1_"#Index#"_W")],
+                                 Index>;
+}
+
+
+// Array Base Register holding input in FS
+foreach Index = 448-480 in {
+  def ArrayBase#Index :  R600Reg<"ARRAY_BASE", Index>;
+}
+
+
+// Special Registers
+
+def OQA : R600Reg<"OQA", 219>;
+def OQB : R600Reg<"OQB", 220>;
+def OQAP : R600Reg<"OQAP", 221>;
+def OQBP : R600Reg<"OQAP", 222>;
+def LDS_DIRECT_A : R600Reg<"LDS_DIRECT_A", 223>;
+def LDS_DIRECT_B : R600Reg<"LDS_DIRECT_B", 224>;
+def ZERO : R600Reg<"0.0", 248>;
+def ONE : R600Reg<"1.0", 249>;
+def NEG_ONE : R600Reg<"-1.0", 249>;
+def ONE_INT : R600Reg<"1", 250>;
+def HALF : R600Reg<"0.5", 252>;
+def NEG_HALF : R600Reg<"-0.5", 252>;
+def ALU_LITERAL_X : R600RegWithChan<"literal.x", 253, "X">;
+def ALU_LITERAL_Y : R600RegWithChan<"literal.y", 253, "Y">;
+def ALU_LITERAL_Z : R600RegWithChan<"literal.z", 253, "Z">;
+def ALU_LITERAL_W : R600RegWithChan<"literal.w", 253, "W">;
+def PV_X : R600RegWithChan<"PV.X", 254, "X">;
+def PV_Y : R600RegWithChan<"PV.Y", 254, "Y">;
+def PV_Z : R600RegWithChan<"PV.Z", 254, "Z">;
+def PV_W : R600RegWithChan<"PV.W", 254, "W">;
+def PS: R600Reg<"PS", 255>;
+def PREDICATE_BIT : R600Reg<"PredicateBit", 0>;
+def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>;
+def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>;
+def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>;
+def AR_X : R600Reg<"AR.x", 0>;
+
+def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32,
+                          (add (sequence "ArrayBase%u", 448, 480))>;
+// special registers for ALU src operands
+// const buffer reference, SRCx_SEL contains index
+def ALU_CONST : R600Reg<"CBuf", 0>;
+// interpolation param reference, SRCx_SEL contains index
+def ALU_PARAM : R600Reg<"Param", 0>;
+
+let isAllocatable = 0 in {
+
+def R600_Addr : RegisterClass <"AMDGPU", [i32], 32, (add (sequence "Addr%u_X", 0, 127))>;
+
+// We only use Addr_[YZW] for vertical vectors.
+// FIXME if we add more vertical vector registers we will need to ad more
+// registers to these classes.
+def R600_Addr_Y : RegisterClass <"AMDGPU", [i32], 32, (add Addr0_Y)>;
+def R600_Addr_Z : RegisterClass <"AMDGPU", [i32], 32, (add Addr0_Z)>;
+def R600_Addr_W : RegisterClass <"AMDGPU", [i32], 32, (add Addr0_W)>;
+
+def R600_LDS_SRC_REG : RegisterClass<"AMDGPU", [i32], 32,
+  (add OQA, OQB, OQAP, OQBP, LDS_DIRECT_A, LDS_DIRECT_B)>;
+
+def R600_KC0_X : RegisterClass <"AMDGPU", [f32, i32], 32,
+                              (add (sequence "KC0_%u_X", 128, 159))>;
+
+def R600_KC0_Y : RegisterClass <"AMDGPU", [f32, i32], 32,
+                              (add (sequence "KC0_%u_Y", 128, 159))>;
+
+def R600_KC0_Z : RegisterClass <"AMDGPU", [f32, i32], 32,
+                              (add (sequence "KC0_%u_Z", 128, 159))>;
+
+def R600_KC0_W : RegisterClass <"AMDGPU", [f32, i32], 32,
+                              (add (sequence "KC0_%u_W", 128, 159))>;
+
+def R600_KC0 : RegisterClass <"AMDGPU", [f32, i32], 32,
+                                   (interleave R600_KC0_X, R600_KC0_Y,
+                                               R600_KC0_Z, R600_KC0_W)>;
+
+def R600_KC1_X : RegisterClass <"AMDGPU", [f32, i32], 32,
+                              (add (sequence "KC1_%u_X", 160, 191))>;
+
+def R600_KC1_Y : RegisterClass <"AMDGPU", [f32, i32], 32,
+                              (add (sequence "KC1_%u_Y", 160, 191))>;
+
+def R600_KC1_Z : RegisterClass <"AMDGPU", [f32, i32], 32,
+                              (add (sequence "KC1_%u_Z", 160, 191))>;
+
+def R600_KC1_W : RegisterClass <"AMDGPU", [f32, i32], 32,
+                              (add (sequence "KC1_%u_W", 160, 191))>;
+
+def R600_KC1 : RegisterClass <"AMDGPU", [f32, i32], 32,
+                                   (interleave R600_KC1_X, R600_KC1_Y,
+                                               R600_KC1_Z, R600_KC1_W)>;
+
+} // End isAllocatable = 0
+
+def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32,
+                                   (add (sequence "T%u_X", 0, 127), AR_X)>;
+
+def R600_TReg32_Y : RegisterClass <"AMDGPU", [f32, i32], 32,
+                                   (add (sequence "T%u_Y", 0, 127))>;
+
+def R600_TReg32_Z : RegisterClass <"AMDGPU", [f32, i32], 32,
+                                   (add (sequence "T%u_Z", 0, 127))>;
+
+def R600_TReg32_W : RegisterClass <"AMDGPU", [f32, i32], 32,
+                                   (add (sequence "T%u_W", 0, 127))>;
+
+def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
+                                   (interleave R600_TReg32_X, R600_TReg32_Y,
+                                               R600_TReg32_Z, R600_TReg32_W)>;
+
+def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
+    R600_TReg32,
+    R600_ArrayBase,
+    R600_Addr,
+    R600_KC0, R600_KC1,
+    ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF,
+    ALU_CONST, ALU_PARAM, OQAP
+    )>;
+
+def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add
+    PRED_SEL_OFF, PRED_SEL_ZERO, PRED_SEL_ONE)>;
+
+def R600_Predicate_Bit: RegisterClass <"AMDGPU", [i32], 32, (add
+    PREDICATE_BIT)>;
+
+def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128,
+                                (add (sequence "T%u_XYZW", 0, 127))> {
+  let CopyCost = -1;
+}
+
+def R600_Reg128Vertical : RegisterClass<"AMDGPU", [v4f32, v4i32], 128,
+  (add V0123_W, V0123_Z, V0123_Y, V0123_X)
+>;
+
+def R600_Reg64 : RegisterClass<"AMDGPU", [v2f32, v2i32], 64,
+                                (add (sequence "T%u_XY", 0, 63))>;
+
+def R600_Reg64Vertical : RegisterClass<"AMDGPU", [v2f32, v2i32], 64,
+                                      (add V01_X, V01_Y, V01_Z, V01_W,
+                                           V23_X, V23_Y, V23_Z, V23_W)>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600Schedule.td b/contrib/llvm/lib/Target/AMDGPU/R600Schedule.td
new file mode 100644
index 0000000..df62bf8
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/R600Schedule.td
@@ -0,0 +1,49 @@
+//===-- R600Schedule.td - R600 Scheduling definitions ------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// R600 has a VLIW architecture.  On pre-cayman cards there are 5 instruction
+// slots ALU.X, ALU.Y, ALU.Z, ALU.W, and TRANS.  For cayman cards, the TRANS
+// slot has been removed. 
+//
+//===----------------------------------------------------------------------===//
+
+
+def ALU_X : FuncUnit;
+def ALU_Y : FuncUnit;
+def ALU_Z : FuncUnit;
+def ALU_W : FuncUnit;
+def TRANS : FuncUnit;
+
+def AnyALU : InstrItinClass;
+def VecALU : InstrItinClass;
+def TransALU : InstrItinClass;
+def XALU : InstrItinClass;
+
+def R600_VLIW5_Itin : ProcessorItineraries <
+  [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS, ALU_NULL],
+  [],
+  [
+    InstrItinData<AnyALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS]>]>,
+    InstrItinData<VecALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_Z, ALU_W]>]>,
+    InstrItinData<TransALU, [InstrStage<1, [TRANS]>]>,
+    InstrItinData<XALU, [InstrStage<1, [ALU_X]>]>,
+    InstrItinData<NullALU, [InstrStage<1, [ALU_NULL]>]>
+  ]
+>;
+
+def R600_VLIW4_Itin : ProcessorItineraries <
+  [ALU_X, ALU_Y, ALU_Z, ALU_W, ALU_NULL],
+  [],
+  [
+    InstrItinData<AnyALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_Z, ALU_W]>]>,
+    InstrItinData<VecALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_Z, ALU_W]>]>,
+    InstrItinData<TransALU, [InstrStage<1, [ALU_NULL]>]>,
+    InstrItinData<NullALU, [InstrStage<1, [ALU_NULL]>]>
+  ]
+>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600TextureIntrinsicsReplacer.cpp b/contrib/llvm/lib/Target/AMDGPU/R600TextureIntrinsicsReplacer.cpp
new file mode 100644
index 0000000..93bcf68
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/R600TextureIntrinsicsReplacer.cpp
@@ -0,0 +1,303 @@
+//===-- R600TextureIntrinsicsReplacer.cpp ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass translates tgsi-like texture intrinsics into R600 texture
+/// closer to hardware intrinsics.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
+
+using namespace llvm;
+
+namespace {
+class R600TextureIntrinsicsReplacer :
+    public FunctionPass, public InstVisitor<R600TextureIntrinsicsReplacer> {
+  static char ID;
+
+  Module *Mod;
+  Type *FloatType;
+  Type *Int32Type;
+  Type *V4f32Type;
+  Type *V4i32Type;
+  FunctionType *TexSign;
+  FunctionType *TexQSign;
+
+  void getAdjustmentFromTextureTarget(unsigned TextureType, bool hasLOD,
+                                      unsigned SrcSelect[4], unsigned CT[4],
+                                      bool &useShadowVariant) {
+    enum TextureTypes {
+      TEXTURE_1D = 1,
+      TEXTURE_2D,
+      TEXTURE_3D,
+      TEXTURE_CUBE,
+      TEXTURE_RECT,
+      TEXTURE_SHADOW1D,
+      TEXTURE_SHADOW2D,
+      TEXTURE_SHADOWRECT,
+      TEXTURE_1D_ARRAY,
+      TEXTURE_2D_ARRAY,
+      TEXTURE_SHADOW1D_ARRAY,
+      TEXTURE_SHADOW2D_ARRAY,
+      TEXTURE_SHADOWCUBE,
+      TEXTURE_2D_MSAA,
+      TEXTURE_2D_ARRAY_MSAA,
+      TEXTURE_CUBE_ARRAY,
+      TEXTURE_SHADOWCUBE_ARRAY
+    };
+
+    switch (TextureType) {
+    case 0:
+      useShadowVariant = false;
+      return;
+    case TEXTURE_RECT:
+    case TEXTURE_1D:
+    case TEXTURE_2D:
+    case TEXTURE_3D:
+    case TEXTURE_CUBE:
+    case TEXTURE_1D_ARRAY:
+    case TEXTURE_2D_ARRAY:
+    case TEXTURE_CUBE_ARRAY:
+    case TEXTURE_2D_MSAA:
+    case TEXTURE_2D_ARRAY_MSAA:
+      useShadowVariant = false;
+      break;
+    case TEXTURE_SHADOW1D:
+    case TEXTURE_SHADOW2D:
+    case TEXTURE_SHADOWRECT:
+    case TEXTURE_SHADOW1D_ARRAY:
+    case TEXTURE_SHADOW2D_ARRAY:
+    case TEXTURE_SHADOWCUBE:
+    case TEXTURE_SHADOWCUBE_ARRAY:
+      useShadowVariant = true;
+      break;
+    default:
+      llvm_unreachable("Unknow Texture Type");
+    }
+
+    if (TextureType == TEXTURE_RECT ||
+        TextureType == TEXTURE_SHADOWRECT) {
+      CT[0] = 0;
+      CT[1] = 0;
+    }
+
+    if (TextureType == TEXTURE_CUBE_ARRAY ||
+        TextureType == TEXTURE_SHADOWCUBE_ARRAY)
+      CT[2] = 0;
+
+    if (TextureType == TEXTURE_1D_ARRAY ||
+        TextureType == TEXTURE_SHADOW1D_ARRAY) {
+      if (hasLOD && useShadowVariant) {
+        CT[1] = 0;
+      } else {
+        CT[2] = 0;
+        SrcSelect[2] = 1;
+      }
+    } else if (TextureType == TEXTURE_2D_ARRAY ||
+        TextureType == TEXTURE_SHADOW2D_ARRAY) {
+      CT[2] = 0;
+    }
+
+    if ((TextureType == TEXTURE_SHADOW1D ||
+        TextureType == TEXTURE_SHADOW2D ||
+        TextureType == TEXTURE_SHADOWRECT ||
+        TextureType == TEXTURE_SHADOW1D_ARRAY) &&
+        !(hasLOD && useShadowVariant))
+      SrcSelect[3] = 2;
+  }
+
+  void ReplaceCallInst(CallInst &I, FunctionType *FT, const char *Name,
+                       unsigned SrcSelect[4], Value *Offset[3], Value *Resource,
+                       Value *Sampler, unsigned CT[4], Value *Coord) {
+    IRBuilder<> Builder(&I);
+    Constant *Mask[] = {
+      ConstantInt::get(Int32Type, SrcSelect[0]),
+      ConstantInt::get(Int32Type, SrcSelect[1]),
+      ConstantInt::get(Int32Type, SrcSelect[2]),
+      ConstantInt::get(Int32Type, SrcSelect[3])
+    };
+    Value *SwizzleMask = ConstantVector::get(Mask);
+    Value *SwizzledCoord =
+        Builder.CreateShuffleVector(Coord, Coord, SwizzleMask);
+
+    Value *Args[] = {
+      SwizzledCoord,
+      Offset[0],
+      Offset[1],
+      Offset[2],
+      Resource,
+      Sampler,
+      ConstantInt::get(Int32Type, CT[0]),
+      ConstantInt::get(Int32Type, CT[1]),
+      ConstantInt::get(Int32Type, CT[2]),
+      ConstantInt::get(Int32Type, CT[3])
+    };
+
+    Function *F = Mod->getFunction(Name);
+    if (!F) {
+      F = Function::Create(FT, GlobalValue::ExternalLinkage, Name, Mod);
+      F->addFnAttr(Attribute::ReadNone);
+    }
+    I.replaceAllUsesWith(Builder.CreateCall(F, Args));
+    I.eraseFromParent();
+  }
+
+  void ReplaceTexIntrinsic(CallInst &I, bool hasLOD, FunctionType *FT,
+                           const char *VanillaInt,
+                           const char *ShadowInt) {
+    Value *Coord = I.getArgOperand(0);
+    Value *ResourceId = I.getArgOperand(1);
+    Value *SamplerId = I.getArgOperand(2);
+
+    unsigned TextureType =
+        cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+
+    unsigned SrcSelect[4] = { 0, 1, 2, 3 };
+    unsigned CT[4] = {1, 1, 1, 1};
+    Value *Offset[3] = {
+      ConstantInt::get(Int32Type, 0),
+      ConstantInt::get(Int32Type, 0),
+      ConstantInt::get(Int32Type, 0)
+    };
+    bool useShadowVariant;
+
+    getAdjustmentFromTextureTarget(TextureType, hasLOD, SrcSelect, CT,
+                                   useShadowVariant);
+
+    ReplaceCallInst(I, FT, useShadowVariant?ShadowInt:VanillaInt, SrcSelect,
+                    Offset, ResourceId, SamplerId, CT, Coord);
+  }
+
+  void ReplaceTXF(CallInst &I) {
+    Value *Coord = I.getArgOperand(0);
+    Value *ResourceId = I.getArgOperand(4);
+    Value *SamplerId = I.getArgOperand(5);
+
+    unsigned TextureType =
+        cast<ConstantInt>(I.getArgOperand(6))->getZExtValue();
+
+    unsigned SrcSelect[4] = { 0, 1, 2, 3 };
+    unsigned CT[4] = {1, 1, 1, 1};
+    Value *Offset[3] = {
+      I.getArgOperand(1),
+      I.getArgOperand(2),
+      I.getArgOperand(3),
+    };
+    bool useShadowVariant;
+
+    getAdjustmentFromTextureTarget(TextureType, false, SrcSelect, CT,
+                                   useShadowVariant);
+
+    ReplaceCallInst(I, TexQSign, "llvm.R600.txf", SrcSelect,
+                    Offset, ResourceId, SamplerId, CT, Coord);
+  }
+
+public:
+  R600TextureIntrinsicsReplacer():
+    FunctionPass(ID) {
+  }
+
+  bool doInitialization(Module &M) override {
+    LLVMContext &Ctx = M.getContext();
+    Mod = &M;
+    FloatType = Type::getFloatTy(Ctx);
+    Int32Type = Type::getInt32Ty(Ctx);
+    V4f32Type = VectorType::get(FloatType, 4);
+    V4i32Type = VectorType::get(Int32Type, 4);
+    Type *ArgsType[] = {
+      V4f32Type,
+      Int32Type,
+      Int32Type,
+      Int32Type,
+      Int32Type,
+      Int32Type,
+      Int32Type,
+      Int32Type,
+      Int32Type,
+      Int32Type,
+    };
+    TexSign = FunctionType::get(V4f32Type, ArgsType, /*isVarArg=*/false);
+    Type *ArgsQType[] = {
+      V4i32Type,
+      Int32Type,
+      Int32Type,
+      Int32Type,
+      Int32Type,
+      Int32Type,
+      Int32Type,
+      Int32Type,
+      Int32Type,
+      Int32Type,
+    };
+    TexQSign = FunctionType::get(V4f32Type, ArgsQType, /*isVarArg=*/false);
+    return false;
+  }
+
+  bool runOnFunction(Function &F) override {
+    visit(F);
+    return false;
+  }
+
+  const char *getPassName() const override {
+    return "R600 Texture Intrinsics Replacer";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+  }
+
+  void visitCallInst(CallInst &I) {
+    if (!I.getCalledFunction())
+      return;
+
+    StringRef Name = I.getCalledFunction()->getName();
+    if (Name == "llvm.AMDGPU.tex") {
+      ReplaceTexIntrinsic(I, false, TexSign, "llvm.R600.tex", "llvm.R600.texc");
+      return;
+    }
+    if (Name == "llvm.AMDGPU.txl") {
+      ReplaceTexIntrinsic(I, true, TexSign, "llvm.R600.txl", "llvm.R600.txlc");
+      return;
+    }
+    if (Name == "llvm.AMDGPU.txb") {
+      ReplaceTexIntrinsic(I, true, TexSign, "llvm.R600.txb", "llvm.R600.txbc");
+      return;
+    }
+    if (Name == "llvm.AMDGPU.txf") {
+      ReplaceTXF(I);
+      return;
+    }
+    if (Name == "llvm.AMDGPU.txq") {
+      ReplaceTexIntrinsic(I, false, TexQSign, "llvm.R600.txq", "llvm.R600.txq");
+      return;
+    }
+    if (Name == "llvm.AMDGPU.ddx") {
+      ReplaceTexIntrinsic(I, false, TexSign, "llvm.R600.ddx", "llvm.R600.ddx");
+      return;
+    }
+    if (Name == "llvm.AMDGPU.ddy") {
+      ReplaceTexIntrinsic(I, false, TexSign, "llvm.R600.ddy", "llvm.R600.ddy");
+      return;
+    }
+  }
+
+};
+
+char R600TextureIntrinsicsReplacer::ID = 0;
+
+} // namespace
+
+FunctionPass *llvm::createR600TextureIntrinsicsReplacer() {
+  return new R600TextureIntrinsicsReplacer();
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/R700Instructions.td b/contrib/llvm/lib/Target/AMDGPU/R700Instructions.td
new file mode 100644
index 0000000..613a0d7
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/R700Instructions.td
@@ -0,0 +1,21 @@
+//===-- R700Instructions.td - R700 Instruction defs  -------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TableGen definitions for instructions which are:
+// - Available to R700 and newer VLIW4/VLIW5 GPUs
+// - Available only on R700 family GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+def isR700 : Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::R700">;
+
+let Predicates = [isR700] in {
+  def SIN_r700 : SIN_Common<0x6E>;
+  def COS_r700 : COS_Common<0x6F>;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/contrib/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
new file mode 100644
index 0000000..ccfbf1b
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
@@ -0,0 +1,365 @@
+//===-- SIAnnotateControlFlow.cpp -  ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Annotates the control flow with hardware specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-annotate-control-flow"
+
+namespace {
+
+// Complex types used in this pass
+typedef std::pair<BasicBlock *, Value *> StackEntry;
+typedef SmallVector<StackEntry, 16> StackVector;
+
+// Intrinsic names the control flow is annotated with
+static const char *const IfIntrinsic = "llvm.SI.if";
+static const char *const ElseIntrinsic = "llvm.SI.else";
+static const char *const BreakIntrinsic = "llvm.SI.break";
+static const char *const IfBreakIntrinsic = "llvm.SI.if.break";
+static const char *const ElseBreakIntrinsic = "llvm.SI.else.break";
+static const char *const LoopIntrinsic = "llvm.SI.loop";
+static const char *const EndCfIntrinsic = "llvm.SI.end.cf";
+
+class SIAnnotateControlFlow : public FunctionPass {
+
+  static char ID;
+
+  Type *Boolean;
+  Type *Void;
+  Type *Int64;
+  Type *ReturnStruct;
+
+  ConstantInt *BoolTrue;
+  ConstantInt *BoolFalse;
+  UndefValue *BoolUndef;
+  Constant *Int64Zero;
+
+  Constant *If;
+  Constant *Else;
+  Constant *Break;
+  Constant *IfBreak;
+  Constant *ElseBreak;
+  Constant *Loop;
+  Constant *EndCf;
+
+  DominatorTree *DT;
+  StackVector Stack;
+
+  LoopInfo *LI;
+
+  bool isTopOfStack(BasicBlock *BB);
+
+  Value *popSaved();
+
+  void push(BasicBlock *BB, Value *Saved);
+
+  bool isElse(PHINode *Phi);
+
+  void eraseIfUnused(PHINode *Phi);
+
+  void openIf(BranchInst *Term);
+
+  void insertElse(BranchInst *Term);
+
+  Value *handleLoopCondition(Value *Cond, PHINode *Broken, llvm::Loop *L);
+
+  void handleLoop(BranchInst *Term);
+
+  void closeControlFlow(BasicBlock *BB);
+
+public:
+  SIAnnotateControlFlow():
+    FunctionPass(ID) { }
+
+  bool doInitialization(Module &M) override;
+
+  bool runOnFunction(Function &F) override;
+
+  const char *getPassName() const override {
+    return "SI annotate control flow";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<LoopInfoWrapperPass>();
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addPreserved<DominatorTreeWrapperPass>();
+    FunctionPass::getAnalysisUsage(AU);
+  }
+
+};
+
+} // end anonymous namespace
+
+char SIAnnotateControlFlow::ID = 0;
+
+/// \brief Initialize all the types and constants used in the pass
+bool SIAnnotateControlFlow::doInitialization(Module &M) {
+  LLVMContext &Context = M.getContext();
+
+  Void = Type::getVoidTy(Context);
+  Boolean = Type::getInt1Ty(Context);
+  Int64 = Type::getInt64Ty(Context);
+  ReturnStruct = StructType::get(Boolean, Int64, (Type *)nullptr);
+
+  BoolTrue = ConstantInt::getTrue(Context);
+  BoolFalse = ConstantInt::getFalse(Context);
+  BoolUndef = UndefValue::get(Boolean);
+  Int64Zero = ConstantInt::get(Int64, 0);
+
+  If = M.getOrInsertFunction(
+    IfIntrinsic, ReturnStruct, Boolean, (Type *)nullptr);
+
+  Else = M.getOrInsertFunction(
+    ElseIntrinsic, ReturnStruct, Int64, (Type *)nullptr);
+
+  Break = M.getOrInsertFunction(
+    BreakIntrinsic, Int64, Int64, (Type *)nullptr);
+
+  IfBreak = M.getOrInsertFunction(
+    IfBreakIntrinsic, Int64, Boolean, Int64, (Type *)nullptr);
+
+  ElseBreak = M.getOrInsertFunction(
+    ElseBreakIntrinsic, Int64, Int64, Int64, (Type *)nullptr);
+
+  Loop = M.getOrInsertFunction(
+    LoopIntrinsic, Boolean, Int64, (Type *)nullptr);
+
+  EndCf = M.getOrInsertFunction(
+    EndCfIntrinsic, Void, Int64, (Type *)nullptr);
+
+  return false;
+}
+
+/// \brief Is BB the last block saved on the stack ?
+bool SIAnnotateControlFlow::isTopOfStack(BasicBlock *BB) {
+  return !Stack.empty() && Stack.back().first == BB;
+}
+
+/// \brief Pop the last saved value from the control flow stack
+Value *SIAnnotateControlFlow::popSaved() {
+  return Stack.pop_back_val().second;
+}
+
+/// \brief Push a BB and saved value to the control flow stack
+void SIAnnotateControlFlow::push(BasicBlock *BB, Value *Saved) {
+  Stack.push_back(std::make_pair(BB, Saved));
+}
+
+/// \brief Can the condition represented by this PHI node treated like
+/// an "Else" block?
+bool SIAnnotateControlFlow::isElse(PHINode *Phi) {
+  BasicBlock *IDom = DT->getNode(Phi->getParent())->getIDom()->getBlock();
+  for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
+    if (Phi->getIncomingBlock(i) == IDom) {
+
+      if (Phi->getIncomingValue(i) != BoolTrue)
+        return false;
+
+    } else {
+      if (Phi->getIncomingValue(i) != BoolFalse)
+        return false;
+
+    }
+  }
+  return true;
+}
+
+// \brief Erase "Phi" if it is not used any more
+void SIAnnotateControlFlow::eraseIfUnused(PHINode *Phi) {
+  if (!Phi->hasNUsesOrMore(1))
+    Phi->eraseFromParent();
+}
+
+/// \brief Open a new "If" block
+void SIAnnotateControlFlow::openIf(BranchInst *Term) {
+  Value *Ret = CallInst::Create(If, Term->getCondition(), "", Term);
+  Term->setCondition(ExtractValueInst::Create(Ret, 0, "", Term));
+  push(Term->getSuccessor(1), ExtractValueInst::Create(Ret, 1, "", Term));
+}
+
+/// \brief Close the last "If" block and open a new "Else" block
+void SIAnnotateControlFlow::insertElse(BranchInst *Term) {
+  Value *Ret = CallInst::Create(Else, popSaved(), "", Term);
+  Term->setCondition(ExtractValueInst::Create(Ret, 0, "", Term));
+  push(Term->getSuccessor(1), ExtractValueInst::Create(Ret, 1, "", Term));
+}
+
+/// \brief Recursively handle the condition leading to a loop
+Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken,
+                                                  llvm::Loop *L) {
+
+  // Only search through PHI nodes which are inside the loop.  If we try this
+  // with PHI nodes that are outside of the loop, we end up inserting new PHI
+  // nodes outside of the loop which depend on values defined inside the loop.
+  // This will break the module with
+  // 'Instruction does not dominate all users!' errors.
+  PHINode *Phi = nullptr;
+  if ((Phi = dyn_cast<PHINode>(Cond)) && L->contains(Phi)) {
+
+    BasicBlock *Parent = Phi->getParent();
+    PHINode *NewPhi = PHINode::Create(Int64, 0, "", &Parent->front());
+    Value *Ret = NewPhi;
+
+    // Handle all non-constant incoming values first
+    for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
+      Value *Incoming = Phi->getIncomingValue(i);
+      BasicBlock *From = Phi->getIncomingBlock(i);
+      if (isa<ConstantInt>(Incoming)) {
+        NewPhi->addIncoming(Broken, From);
+        continue;
+      }
+
+      Phi->setIncomingValue(i, BoolFalse);
+      Value *PhiArg = handleLoopCondition(Incoming, Broken, L);
+      NewPhi->addIncoming(PhiArg, From);
+    }
+
+    BasicBlock *IDom = DT->getNode(Parent)->getIDom()->getBlock();
+
+    for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
+
+      Value *Incoming = Phi->getIncomingValue(i);
+      if (Incoming != BoolTrue)
+        continue;
+
+      BasicBlock *From = Phi->getIncomingBlock(i);
+      if (From == IDom) {
+        CallInst *OldEnd = dyn_cast<CallInst>(Parent->getFirstInsertionPt());
+        if (OldEnd && OldEnd->getCalledFunction() == EndCf) {
+          Value *Args[] = { OldEnd->getArgOperand(0), NewPhi };
+          Ret = CallInst::Create(ElseBreak, Args, "", OldEnd);
+          continue;
+        }
+      }
+      TerminatorInst *Insert = From->getTerminator();
+      Value *PhiArg = CallInst::Create(Break, Broken, "", Insert);
+      NewPhi->setIncomingValue(i, PhiArg);
+    }
+    eraseIfUnused(Phi);
+    return Ret;
+
+  } else if (Instruction *Inst = dyn_cast<Instruction>(Cond)) {
+    BasicBlock *Parent = Inst->getParent();
+    Instruction *Insert;
+    if (L->contains(Inst)) {
+      Insert = Parent->getTerminator();
+    } else {
+      Insert = L->getHeader()->getFirstNonPHIOrDbgOrLifetime();
+    }
+    Value *Args[] = { Cond, Broken };
+    return CallInst::Create(IfBreak, Args, "", Insert);
+
+  } else {
+    llvm_unreachable("Unhandled loop condition!");
+  }
+  return 0;
+}
+
+/// \brief Handle a back edge (loop)
+void SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
+  BasicBlock *BB = Term->getParent();
+  llvm::Loop *L = LI->getLoopFor(BB);
+  BasicBlock *Target = Term->getSuccessor(1);
+  PHINode *Broken = PHINode::Create(Int64, 0, "", &Target->front());
+
+  Value *Cond = Term->getCondition();
+  Term->setCondition(BoolTrue);
+  Value *Arg = handleLoopCondition(Cond, Broken, L);
+
+  for (pred_iterator PI = pred_begin(Target), PE = pred_end(Target);
+       PI != PE; ++PI) {
+
+    Broken->addIncoming(*PI == BB ? Arg : Int64Zero, *PI);
+  }
+
+  Term->setCondition(CallInst::Create(Loop, Arg, "", Term));
+  push(Term->getSuccessor(0), Arg);
+}/// \brief Close the last opened control flow
+void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
+  llvm::Loop *L = LI->getLoopFor(BB);
+
+  if (L && L->getHeader() == BB) {
+    // We can't insert an EndCF call into a loop header, because it will
+    // get executed on every iteration of the loop, when it should be
+    // executed only once before the loop.
+    SmallVector <BasicBlock*, 8> Latches;
+    L->getLoopLatches(Latches);
+
+    std::vector<BasicBlock*> Preds;
+    for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
+      if (std::find(Latches.begin(), Latches.end(), *PI) == Latches.end())
+        Preds.push_back(*PI);
+    }
+    BB = llvm::SplitBlockPredecessors(BB, Preds, "endcf.split", nullptr, DT,
+                                      LI, false);
+  }
+
+  CallInst::Create(EndCf, popSaved(), "", BB->getFirstInsertionPt());
+}
+
+/// \brief Annotate the control flow with intrinsics so the backend can
+/// recognize if/then/else and loops.
+bool SIAnnotateControlFlow::runOnFunction(Function &F) {
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+
+  for (df_iterator<BasicBlock *> I = df_begin(&F.getEntryBlock()),
+       E = df_end(&F.getEntryBlock()); I != E; ++I) {
+
+    BranchInst *Term = dyn_cast<BranchInst>((*I)->getTerminator());
+
+    if (!Term || Term->isUnconditional()) {
+      if (isTopOfStack(*I))
+        closeControlFlow(*I);
+      continue;
+    }
+
+    if (I.nodeVisited(Term->getSuccessor(1))) {
+      if (isTopOfStack(*I))
+        closeControlFlow(*I);
+      handleLoop(Term);
+      continue;
+    }
+
+    if (isTopOfStack(*I)) {
+      PHINode *Phi = dyn_cast<PHINode>(Term->getCondition());
+      if (Phi && Phi->getParent() == *I && isElse(Phi)) {
+        insertElse(Term);
+        eraseIfUnused(Phi);
+        continue;
+      }
+      closeControlFlow(*I);
+    }
+    openIf(Term);
+  }
+
+  assert(Stack.empty());
+  return true;
+}
+
+/// \brief Create the annotation pass
+FunctionPass *llvm::createSIAnnotateControlFlowPass() {
+  return new SIAnnotateControlFlow();
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIDefines.h b/contrib/llvm/lib/Target/AMDGPU/SIDefines.h
new file mode 100644
index 0000000..f1b4ba1
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -0,0 +1,172 @@
+//===-- SIDefines.h - SI Helper Macros ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCInstrDesc.h"
+
+#ifndef LLVM_LIB_TARGET_R600_SIDEFINES_H
+#define LLVM_LIB_TARGET_R600_SIDEFINES_H
+
+namespace SIInstrFlags {
+// This needs to be kept in sync with the field bits in InstSI.
+enum {
+  SALU = 1 << 3,
+  VALU = 1 << 4,
+
+  SOP1 = 1 << 5,
+  SOP2 = 1 << 6,
+  SOPC = 1 << 7,
+  SOPK = 1 << 8,
+  SOPP = 1 << 9,
+
+  VOP1 = 1 << 10,
+  VOP2 = 1 << 11,
+  VOP3 = 1 << 12,
+  VOPC = 1 << 13,
+
+  MUBUF = 1 << 14,
+  MTBUF = 1 << 15,
+  SMRD = 1 << 16,
+  DS = 1 << 17,
+  MIMG = 1 << 18,
+  FLAT = 1 << 19,
+  WQM = 1 << 20,
+  VGPRSpill = 1 << 21
+};
+} // namespace SIInstrFlags
+
+namespace llvm {
+namespace AMDGPU {
+  enum OperandType {
+    /// Operand with register or 32-bit immediate
+    OPERAND_REG_IMM32 = llvm::MCOI::OPERAND_FIRST_TARGET,
+    /// Operand with register or inline constant
+    OPERAND_REG_INLINE_C
+  };
+}
+}
+
+namespace SIInstrFlags {
+  enum Flags {
+    // First 4 bits are the instruction encoding
+    VM_CNT = 1 << 0,
+    EXP_CNT = 1 << 1,
+    LGKM_CNT = 1 << 2
+  };
+
+  // v_cmp_class_* etc. use a 10-bit mask for what operation is checked.
+  // The result is true if any of these tests are true.
+  enum ClassFlags {
+    S_NAN = 1 << 0,        // Signaling NaN
+    Q_NAN = 1 << 1,        // Quiet NaN
+    N_INFINITY = 1 << 2,   // Negative infinity
+    N_NORMAL = 1 << 3,     // Negative normal
+    N_SUBNORMAL = 1 << 4,  // Negative subnormal
+    N_ZERO = 1 << 5,       // Negative zero
+    P_ZERO = 1 << 6,       // Positive zero
+    P_SUBNORMAL = 1 << 7,  // Positive subnormal
+    P_NORMAL = 1 << 8,     // Positive normal
+    P_INFINITY = 1 << 9    // Positive infinity
+  };
+} // namespace SIInstrFlags
+
+namespace SISrcMods {
+  enum {
+   NEG = 1 << 0,
+   ABS = 1 << 1
+  };
+}
+
+namespace SIOutMods {
+  enum {
+    NONE = 0,
+    MUL2 = 1,
+    MUL4 = 2,
+    DIV2 = 3
+  };
+}
+
+#define R_00B028_SPI_SHADER_PGM_RSRC1_PS                                0x00B028
+#define R_00B02C_SPI_SHADER_PGM_RSRC2_PS                                0x00B02C
+#define   S_00B02C_EXTRA_LDS_SIZE(x)                                  (((x) & 0xFF) << 8)
+#define R_00B128_SPI_SHADER_PGM_RSRC1_VS                                0x00B128
+#define R_00B228_SPI_SHADER_PGM_RSRC1_GS                                0x00B228
+#define R_00B848_COMPUTE_PGM_RSRC1                                      0x00B848
+#define   S_00B028_VGPRS(x)                                           (((x) & 0x3F) << 0)
+#define   S_00B028_SGPRS(x)                                           (((x) & 0x0F) << 6)
+#define R_00B84C_COMPUTE_PGM_RSRC2                                      0x00B84C
+#define   S_00B84C_SCRATCH_EN(x)                                      (((x) & 0x1) << 0)
+#define   S_00B84C_USER_SGPR(x)                                       (((x) & 0x1F) << 1)
+#define   S_00B84C_TGID_X_EN(x)                                       (((x) & 0x1) << 7)
+#define   S_00B84C_TGID_Y_EN(x)                                       (((x) & 0x1) << 8)
+#define   S_00B84C_TGID_Z_EN(x)                                       (((x) & 0x1) << 9)
+#define   S_00B84C_TG_SIZE_EN(x)                                      (((x) & 0x1) << 10)
+#define   S_00B84C_TIDIG_COMP_CNT(x)                                  (((x) & 0x03) << 11)
+
+#define   S_00B84C_LDS_SIZE(x)                                        (((x) & 0x1FF) << 15)
+#define R_0286CC_SPI_PS_INPUT_ENA                                       0x0286CC
+
+
+#define R_00B848_COMPUTE_PGM_RSRC1                                      0x00B848
+#define   S_00B848_VGPRS(x)                                           (((x) & 0x3F) << 0)
+#define   G_00B848_VGPRS(x)                                           (((x) >> 0) & 0x3F)
+#define   C_00B848_VGPRS                                              0xFFFFFFC0
+#define   S_00B848_SGPRS(x)                                           (((x) & 0x0F) << 6)
+#define   G_00B848_SGPRS(x)                                           (((x) >> 6) & 0x0F)
+#define   C_00B848_SGPRS                                              0xFFFFFC3F
+#define   S_00B848_PRIORITY(x)                                        (((x) & 0x03) << 10)
+#define   G_00B848_PRIORITY(x)                                        (((x) >> 10) & 0x03)
+#define   C_00B848_PRIORITY                                           0xFFFFF3FF
+#define   S_00B848_FLOAT_MODE(x)                                      (((x) & 0xFF) << 12)
+#define   G_00B848_FLOAT_MODE(x)                                      (((x) >> 12) & 0xFF)
+#define   C_00B848_FLOAT_MODE                                         0xFFF00FFF
+#define   S_00B848_PRIV(x)                                            (((x) & 0x1) << 20)
+#define   G_00B848_PRIV(x)                                            (((x) >> 20) & 0x1)
+#define   C_00B848_PRIV                                               0xFFEFFFFF
+#define   S_00B848_DX10_CLAMP(x)                                      (((x) & 0x1) << 21)
+#define   G_00B848_DX10_CLAMP(x)                                      (((x) >> 21) & 0x1)
+#define   C_00B848_DX10_CLAMP                                         0xFFDFFFFF
+#define   S_00B848_DEBUG_MODE(x)                                      (((x) & 0x1) << 22)
+#define   G_00B848_DEBUG_MODE(x)                                      (((x) >> 22) & 0x1)
+#define   C_00B848_DEBUG_MODE                                         0xFFBFFFFF
+#define   S_00B848_IEEE_MODE(x)                                       (((x) & 0x1) << 23)
+#define   G_00B848_IEEE_MODE(x)                                       (((x) >> 23) & 0x1)
+#define   C_00B848_IEEE_MODE                                          0xFF7FFFFF
+
+
+// Helpers for setting FLOAT_MODE
+#define FP_ROUND_ROUND_TO_NEAREST 0
+#define FP_ROUND_ROUND_TO_INF 1
+#define FP_ROUND_ROUND_TO_NEGINF 2
+#define FP_ROUND_ROUND_TO_ZERO 3
+
+// Bits 3:0 control rounding mode. 1:0 control single precision, 3:2 double
+// precision.
+#define FP_ROUND_MODE_SP(x) ((x) & 0x3)
+#define FP_ROUND_MODE_DP(x) (((x) & 0x3) << 2)
+
+#define FP_DENORM_FLUSH_IN_FLUSH_OUT 0
+#define FP_DENORM_FLUSH_OUT 1
+#define FP_DENORM_FLUSH_IN 2
+#define FP_DENORM_FLUSH_NONE 3
+
+
+// Bits 7:4 control denormal handling. 5:4 control single precision, 6:7 double
+// precision.
+#define FP_DENORM_MODE_SP(x) (((x) & 0x3) << 4)
+#define FP_DENORM_MODE_DP(x) (((x) & 0x3) << 6)
+
+#define R_00B860_COMPUTE_TMPRING_SIZE                                   0x00B860
+#define   S_00B860_WAVESIZE(x)                                        (((x) & 0x1FFF) << 12)
+
+#define R_0286E8_SPI_TMPRING_SIZE                                       0x0286E8
+#define   S_0286E8_WAVESIZE(x)                                        (((x) & 0x1FFF) << 12)
+
+
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFixControlFlowLiveIntervals.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFixControlFlowLiveIntervals.cpp
new file mode 100644
index 0000000..5fe8d19
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFixControlFlowLiveIntervals.cpp
@@ -0,0 +1,96 @@
+//===-- SIFixControlFlowLiveIntervals.cpp - Fix CF live intervals ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Spilling of EXEC masks used for control flow messes up control flow
+/// lowering, so mark all live intervals associated with CF instructions as
+/// non-spillable.
+///
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "SIInstrInfo.h"
+#include "SIRegisterInfo.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-fix-cf-live-intervals"
+
+namespace {
+
+class SIFixControlFlowLiveIntervals : public MachineFunctionPass {
+public:
+  static char ID;
+
+public:
+  SIFixControlFlowLiveIntervals() : MachineFunctionPass(ID) {
+    initializeSIFixControlFlowLiveIntervalsPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  const char *getPassName() const override {
+    return "SI Fix CF Live Intervals";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<LiveIntervals>();
+    AU.setPreservesAll();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS_BEGIN(SIFixControlFlowLiveIntervals, DEBUG_TYPE,
+                      "SI Fix CF Live Intervals", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(SIFixControlFlowLiveIntervals, DEBUG_TYPE,
+                    "SI Fix CF Live Intervals", false, false)
+
+char SIFixControlFlowLiveIntervals::ID = 0;
+
+char &llvm::SIFixControlFlowLiveIntervalsID = SIFixControlFlowLiveIntervals::ID;
+
+FunctionPass *llvm::createSIFixControlFlowLiveIntervalsPass() {
+  return new SIFixControlFlowLiveIntervals();
+}
+
+bool SIFixControlFlowLiveIntervals::runOnMachineFunction(MachineFunction &MF) {
+  LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
+
+  for (const MachineBasicBlock &MBB : MF) {
+    for (const MachineInstr &MI : MBB) {
+      switch (MI.getOpcode()) {
+        case AMDGPU::SI_IF:
+        case AMDGPU::SI_ELSE:
+        case AMDGPU::SI_BREAK:
+        case AMDGPU::SI_IF_BREAK:
+        case AMDGPU::SI_ELSE_BREAK:
+        case AMDGPU::SI_END_CF: {
+          unsigned Reg = MI.getOperand(0).getReg();
+          LIS->getInterval(Reg).markNotSpillable();
+          break;
+        }
+        default:
+          break;
+      }
+    }
+  }
+
+  return false;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
new file mode 100644
index 0000000..23502b4
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -0,0 +1,338 @@
+//===-- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Copies from VGPR to SGPR registers are illegal and the register coalescer
+/// will sometimes generate these illegal copies in situations like this:
+///
+///  Register Class <vsrc> is the union of <vgpr> and <sgpr>
+///
+/// BB0:
+///   %vreg0 <sgpr> = SCALAR_INST
+///   %vreg1 <vsrc> = COPY %vreg0 <sgpr>
+///    ...
+///    BRANCH %cond BB1, BB2
+///  BB1:
+///    %vreg2 <vgpr> = VECTOR_INST
+///    %vreg3 <vsrc> = COPY %vreg2 <vgpr>
+///  BB2:
+///    %vreg4 <vsrc> = PHI %vreg1 <vsrc>, <BB#0>, %vreg3 <vrsc>, <BB#1>
+///    %vreg5 <vgpr> = VECTOR_INST %vreg4 <vsrc>
+///
+///
+/// The coalescer will begin at BB0 and eliminate its copy, then the resulting
+/// code will look like this:
+///
+/// BB0:
+///   %vreg0 <sgpr> = SCALAR_INST
+///    ...
+///    BRANCH %cond BB1, BB2
+/// BB1:
+///   %vreg2 <vgpr> = VECTOR_INST
+///   %vreg3 <vsrc> = COPY %vreg2 <vgpr>
+/// BB2:
+///   %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <vsrc>, <BB#1>
+///   %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr>
+///
+/// Now that the result of the PHI instruction is an SGPR, the register
+/// allocator is now forced to constrain the register class of %vreg3 to
+/// <sgpr> so we end up with final code like this:
+///
+/// BB0:
+///   %vreg0 <sgpr> = SCALAR_INST
+///    ...
+///    BRANCH %cond BB1, BB2
+/// BB1:
+///   %vreg2 <vgpr> = VECTOR_INST
+///   %vreg3 <sgpr> = COPY %vreg2 <vgpr>
+/// BB2:
+///   %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <sgpr>, <BB#1>
+///   %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr>
+///
+/// Now this code contains an illegal copy from a VGPR to an SGPR.
+///
+/// In order to avoid this problem, this pass searches for PHI instructions
+/// which define a <vsrc> register and constrains its definition class to
+/// <vgpr> if the user of the PHI's definition register is a vector instruction.
+/// If the PHI's definition class is constrained to <vgpr> then the coalescer
+/// will be unable to perform the COPY removal from the above example  which
+/// ultimately led to the creation of an illegal COPY.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "sgpr-copies"
+
+namespace {
+
+class SIFixSGPRCopies : public MachineFunctionPass {
+
+private:
+  static char ID;
+  const TargetRegisterClass *inferRegClassFromUses(const SIRegisterInfo *TRI,
+                                           const MachineRegisterInfo &MRI,
+                                           unsigned Reg,
+                                           unsigned SubReg) const;
+  const TargetRegisterClass *inferRegClassFromDef(const SIRegisterInfo *TRI,
+                                                 const MachineRegisterInfo &MRI,
+                                                 unsigned Reg,
+                                                 unsigned SubReg) const;
+  bool isVGPRToSGPRCopy(const MachineInstr &Copy, const SIRegisterInfo *TRI,
+                        const MachineRegisterInfo &MRI) const;
+
+public:
+  SIFixSGPRCopies(TargetMachine &tm) : MachineFunctionPass(ID) { }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  const char *getPassName() const override {
+    return "SI Fix SGPR copies";
+  }
+
+};
+
+} // End anonymous namespace
+
+char SIFixSGPRCopies::ID = 0;
+
+FunctionPass *llvm::createSIFixSGPRCopiesPass(TargetMachine &tm) {
+  return new SIFixSGPRCopies(tm);
+}
+
+static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) {
+  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    if (!MI.getOperand(i).isReg() ||
+        !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg()))
+      continue;
+
+    if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg())))
+      return true;
+  }
+  return false;
+}
+
+/// This functions walks the use list of Reg until it finds an Instruction
+/// that isn't a COPY returns the register class of that instruction.
+/// \return The register defined by the first non-COPY instruction.
+const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromUses(
+                                                 const SIRegisterInfo *TRI,
+                                                 const MachineRegisterInfo &MRI,
+                                                 unsigned Reg,
+                                                 unsigned SubReg) const {
+
+  const TargetRegisterClass *RC
+    = TargetRegisterInfo::isVirtualRegister(Reg) ?
+    MRI.getRegClass(Reg) :
+    TRI->getPhysRegClass(Reg);
+
+  RC = TRI->getSubRegClass(RC, SubReg);
+  for (MachineRegisterInfo::use_instr_iterator
+       I = MRI.use_instr_begin(Reg), E = MRI.use_instr_end(); I != E; ++I) {
+    switch (I->getOpcode()) {
+    case AMDGPU::COPY:
+      RC = TRI->getCommonSubClass(RC, inferRegClassFromUses(TRI, MRI,
+                                  I->getOperand(0).getReg(),
+                                  I->getOperand(0).getSubReg()));
+      break;
+    }
+  }
+
+  return RC;
+}
+
+const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromDef(
+                                                 const SIRegisterInfo *TRI,
+                                                 const MachineRegisterInfo &MRI,
+                                                 unsigned Reg,
+                                                 unsigned SubReg) const {
+  if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+    const TargetRegisterClass *RC = TRI->getPhysRegClass(Reg);
+    return TRI->getSubRegClass(RC, SubReg);
+  }
+  MachineInstr *Def = MRI.getVRegDef(Reg);
+  if (Def->getOpcode() != AMDGPU::COPY) {
+    return TRI->getSubRegClass(MRI.getRegClass(Reg), SubReg);
+  }
+
+  return inferRegClassFromDef(TRI, MRI, Def->getOperand(1).getReg(),
+                                   Def->getOperand(1).getSubReg());
+}
+
+bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy,
+                                      const SIRegisterInfo *TRI,
+                                      const MachineRegisterInfo &MRI) const {
+
+  unsigned DstReg = Copy.getOperand(0).getReg();
+  unsigned SrcReg = Copy.getOperand(1).getReg();
+  unsigned SrcSubReg = Copy.getOperand(1).getSubReg();
+
+  if (!TargetRegisterInfo::isVirtualRegister(DstReg)) {
+    // If the destination register is a physical register there isn't really
+    // much we can do to fix this.
+    return false;
+  }
+
+  const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg);
+
+  const TargetRegisterClass *SrcRC;
+
+  if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
+      MRI.getRegClass(SrcReg) == &AMDGPU::VReg_1RegClass)
+    return false;
+
+  SrcRC = TRI->getSubRegClass(MRI.getRegClass(SrcReg), SrcSubReg);
+  return TRI->isSGPRClass(DstRC) && TRI->hasVGPRs(SrcRC);
+}
+
+bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const SIRegisterInfo *TRI =
+      static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+  const SIInstrInfo *TII =
+      static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
+  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+                                                  BI != BE; ++BI) {
+
+    MachineBasicBlock &MBB = *BI;
+    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+                                                      I != E; ++I) {
+      MachineInstr &MI = *I;
+      if (MI.getOpcode() == AMDGPU::COPY && isVGPRToSGPRCopy(MI, TRI, MRI)) {
+        DEBUG(dbgs() << "Fixing VGPR -> SGPR copy:\n");
+        DEBUG(MI.print(dbgs()));
+        TII->moveToVALU(MI);
+
+      }
+
+      switch (MI.getOpcode()) {
+      default: continue;
+      case AMDGPU::PHI: {
+        DEBUG(dbgs() << "Fixing PHI: " << MI);
+
+        for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
+          const MachineOperand &Op = MI.getOperand(i);
+          unsigned Reg = Op.getReg();
+          const TargetRegisterClass *RC
+            = inferRegClassFromDef(TRI, MRI, Reg, Op.getSubReg());
+
+          MRI.constrainRegClass(Op.getReg(), RC);
+        }
+        unsigned Reg = MI.getOperand(0).getReg();
+        const TargetRegisterClass *RC = inferRegClassFromUses(TRI, MRI, Reg,
+                                                  MI.getOperand(0).getSubReg());
+        if (TRI->getCommonSubClass(RC, &AMDGPU::VGPR_32RegClass)) {
+          MRI.constrainRegClass(Reg, &AMDGPU::VGPR_32RegClass);
+        }
+
+        if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
+          break;
+
+        // If a PHI node defines an SGPR and any of its operands are VGPRs,
+        // then we need to move it to the VALU.
+        //
+        // Also, if a PHI node defines an SGPR and has all SGPR operands
+        // we must move it to the VALU, because the SGPR operands will
+        // all end up being assigned the same register, which means
+        // there is a potential for a conflict if different threads take
+        // different control flow paths.
+        //
+        // For Example:
+        //
+        // sgpr0 = def;
+        // ...
+        // sgpr1 = def;
+        // ...
+        // sgpr2 = PHI sgpr0, sgpr1
+        // use sgpr2;
+        //
+        // Will Become:
+        //
+        // sgpr2 = def;
+        // ...
+        // sgpr2 = def;
+        // ...
+        // use sgpr2
+        //
+        // FIXME: This is OK if the branching decision is made based on an
+        // SGPR value.
+        bool SGPRBranch = false;
+
+        // The one exception to this rule is when one of the operands
+        // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK
+        // instruction.  In this case, there we know the program will
+        // never enter the second block (the loop) without entering
+        // the first block (where the condition is computed), so there
+        // is no chance for values to be over-written.
+
+        bool HasBreakDef = false;
+        for (unsigned i = 1; i < MI.getNumOperands(); i+=2) {
+          unsigned Reg = MI.getOperand(i).getReg();
+          if (TRI->hasVGPRs(MRI.getRegClass(Reg))) {
+            TII->moveToVALU(MI);
+            break;
+          }
+          MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg);
+          assert(DefInstr);
+          switch(DefInstr->getOpcode()) {
+
+          case AMDGPU::SI_BREAK:
+          case AMDGPU::SI_IF_BREAK:
+          case AMDGPU::SI_ELSE_BREAK:
+          // If we see a PHI instruction that defines an SGPR, then that PHI
+          // instruction has already been considered and should have
+          // a *_BREAK as an operand.
+          case AMDGPU::PHI:
+            HasBreakDef = true;
+            break;
+          }
+        }
+
+        if (!SGPRBranch && !HasBreakDef)
+          TII->moveToVALU(MI);
+        break;
+      }
+      case AMDGPU::REG_SEQUENCE: {
+        if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) ||
+            !hasVGPROperands(MI, TRI))
+          continue;
+
+        DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI);
+
+        TII->moveToVALU(MI);
+        break;
+      }
+      case AMDGPU::INSERT_SUBREG: {
+        const TargetRegisterClass *DstRC, *Src0RC, *Src1RC;
+        DstRC = MRI.getRegClass(MI.getOperand(0).getReg());
+        Src0RC = MRI.getRegClass(MI.getOperand(1).getReg());
+        Src1RC = MRI.getRegClass(MI.getOperand(2).getReg());
+        if (TRI->isSGPRClass(DstRC) &&
+            (TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) {
+          DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI);
+          TII->moveToVALU(MI);
+        }
+        break;
+      }
+      }
+    }
+  }
+
+  return true;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRLiveRanges.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRLiveRanges.cpp
new file mode 100644
index 0000000..0c54446
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRLiveRanges.cpp
@@ -0,0 +1,192 @@
+//===-- SIFixSGPRLiveRanges.cpp - Fix SGPR live ranges ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// SALU instructions ignore control flow, so we need to modify the live ranges
+/// of the registers they define in some cases.
+///
+/// The main case we need to handle is when a def is used in one side of a
+/// branch and not another.  For example:
+///
+/// %def
+/// IF
+///   ...
+///   ...
+/// ELSE
+///   %use
+///   ...
+/// ENDIF
+///
+/// Here we need the register allocator to avoid assigning any of the defs
+/// inside of the IF to the same register as %def.  In traditional live
+/// interval analysis %def is not live inside the IF branch, however, since
+/// SALU instructions inside of IF will be executed even if the branch is not
+/// taken, there is the chance that one of the instructions will overwrite the
+/// value of %def, so the use in ELSE will see the wrong value.
+///
+/// The strategy we use for solving this is to add an extra use after the ENDIF:
+///
+/// %def
+/// IF
+///   ...
+///   ...
+/// ELSE
+///   %use
+///   ...
+/// ENDIF
+/// %use
+///
+/// Adding this use will make the def live thoughout the IF branch, which is
+/// what we want.
+
+#include "AMDGPU.h"
+#include "SIInstrInfo.h"
+#include "SIRegisterInfo.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-fix-sgpr-live-ranges"
+
+namespace {
+
+class SIFixSGPRLiveRanges : public MachineFunctionPass {
+public:
+  static char ID;
+
+public:
+  SIFixSGPRLiveRanges() : MachineFunctionPass(ID) {
+    initializeSIFixSGPRLiveRangesPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  const char *getPassName() const override {
+    return "SI Fix SGPR live ranges";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<LiveIntervals>();
+    AU.addRequired<MachinePostDominatorTree>();
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS_BEGIN(SIFixSGPRLiveRanges, DEBUG_TYPE,
+                      "SI Fix SGPR Live Ranges", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
+INITIALIZE_PASS_END(SIFixSGPRLiveRanges, DEBUG_TYPE,
+                    "SI Fix SGPR Live Ranges", false, false)
+
+char SIFixSGPRLiveRanges::ID = 0;
+
+char &llvm::SIFixSGPRLiveRangesID = SIFixSGPRLiveRanges::ID;
+
+FunctionPass *llvm::createSIFixSGPRLiveRangesPass() {
+  return new SIFixSGPRLiveRanges();
+}
+
+bool SIFixSGPRLiveRanges::runOnMachineFunction(MachineFunction &MF) {
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+  const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
+      MF.getSubtarget().getRegisterInfo());
+  LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
+ MachinePostDominatorTree *PDT = &getAnalysis<MachinePostDominatorTree>();
+  std::vector<std::pair<unsigned, LiveRange *>> SGPRLiveRanges;
+
+  // First pass, collect all live intervals for SGPRs
+  for (const MachineBasicBlock &MBB : MF) {
+    for (const MachineInstr &MI : MBB) {
+      for (const MachineOperand &MO : MI.defs()) {
+        if (MO.isImplicit())
+          continue;
+        unsigned Def = MO.getReg();
+        if (TargetRegisterInfo::isVirtualRegister(Def)) {
+          if (TRI->isSGPRClass(MRI.getRegClass(Def)))
+            SGPRLiveRanges.push_back(
+                std::make_pair(Def, &LIS->getInterval(Def)));
+        } else if (TRI->isSGPRClass(TRI->getPhysRegClass(Def))) {
+            SGPRLiveRanges.push_back(
+                std::make_pair(Def, &LIS->getRegUnit(Def)));
+        }
+      }
+    }
+  }
+
+  // Second pass fix the intervals
+  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+                                                  BI != BE; ++BI) {
+    MachineBasicBlock &MBB = *BI;
+    if (MBB.succ_size() < 2)
+      continue;
+
+    // We have structured control flow, so number of succesors should be two.
+    assert(MBB.succ_size() == 2);
+    MachineBasicBlock *SuccA = *MBB.succ_begin();
+    MachineBasicBlock *SuccB = *(++MBB.succ_begin());
+    MachineBasicBlock *NCD = PDT->findNearestCommonDominator(SuccA, SuccB);
+
+    if (!NCD)
+      continue;
+
+    MachineBasicBlock::iterator NCDTerm = NCD->getFirstTerminator();
+
+    if (NCDTerm != NCD->end() && NCDTerm->getOpcode() == AMDGPU::SI_ELSE) {
+      assert(NCD->succ_size() == 2);
+      // We want to make sure we insert the Use after the ENDIF, not after
+      // the ELSE.
+      NCD = PDT->findNearestCommonDominator(*NCD->succ_begin(),
+                                            *(++NCD->succ_begin()));
+    }
+    assert(SuccA && SuccB);
+    for (std::pair<unsigned, LiveRange*> RegLR : SGPRLiveRanges) {
+      unsigned Reg = RegLR.first;
+      LiveRange *LR = RegLR.second;
+
+      // FIXME: We could be smarter here.  If the register is Live-In to
+      // one block, but the other doesn't have any SGPR defs, then there
+      // won't be a conflict.  Also, if the branch decision is based on
+      // a value in an SGPR, then there will be no conflict.
+      bool LiveInToA = LIS->isLiveInToMBB(*LR, SuccA);
+      bool LiveInToB = LIS->isLiveInToMBB(*LR, SuccB);
+
+      if ((!LiveInToA && !LiveInToB) ||
+          (LiveInToA && LiveInToB))
+        continue;
+
+      // This interval is live in to one successor, but not the other, so
+      // we need to update its range so it is live in to both.
+      DEBUG(dbgs() << "Possible SGPR conflict detected " <<  " in " << *LR <<
+                      " BB#" << SuccA->getNumber() << ", BB#" <<
+                      SuccB->getNumber() <<
+                      " with NCD = " << NCD->getNumber() << '\n');
+
+      // FIXME: Need to figure out how to update LiveRange here so this pass
+      // will be able to preserve LiveInterval analysis.
+      BuildMI(*NCD, NCD->getFirstNonPHI(), DebugLoc(),
+              TII->get(AMDGPU::SGPR_USE))
+              .addReg(Reg, RegState::Implicit);
+      DEBUG(NCD->getFirstNonPHI()->dump());
+    }
+  }
+
+  return false;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
new file mode 100644
index 0000000..d14e37a
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -0,0 +1,288 @@
+//===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define DEBUG_TYPE "si-fold-operands"
+using namespace llvm;
+
+namespace {
+
+class SIFoldOperands : public MachineFunctionPass {
+public:
+  static char ID;
+
+public:
+  SIFoldOperands() : MachineFunctionPass(ID) {
+    initializeSIFoldOperandsPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  const char *getPassName() const override {
+    return "SI Fold Operands";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<MachineDominatorTree>();
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+};
+
+struct FoldCandidate {
+  MachineInstr *UseMI;
+  unsigned UseOpNo;
+  MachineOperand *OpToFold;
+  uint64_t ImmToFold;
+
+  FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) :
+                UseMI(MI), UseOpNo(OpNo) {
+
+    if (FoldOp->isImm()) {
+      OpToFold = nullptr;
+      ImmToFold = FoldOp->getImm();
+    } else {
+      assert(FoldOp->isReg());
+      OpToFold = FoldOp;
+    }
+  }
+
+  bool isImm() const {
+    return !OpToFold;
+  }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS_BEGIN(SIFoldOperands, DEBUG_TYPE,
+                      "SI Fold Operands", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(SIFoldOperands, DEBUG_TYPE,
+                    "SI Fold Operands", false, false)
+
+char SIFoldOperands::ID = 0;
+
+char &llvm::SIFoldOperandsID = SIFoldOperands::ID;
+
+FunctionPass *llvm::createSIFoldOperandsPass() {
+  return new SIFoldOperands();
+}
+
+static bool isSafeToFold(unsigned Opcode) {
+  switch(Opcode) {
+  case AMDGPU::V_MOV_B32_e32:
+  case AMDGPU::V_MOV_B32_e64:
+  case AMDGPU::V_MOV_B64_PSEUDO:
+  case AMDGPU::S_MOV_B32:
+  case AMDGPU::S_MOV_B64:
+  case AMDGPU::COPY:
+    return true;
+  default:
+    return false;
+  }
+}
+
+static bool updateOperand(FoldCandidate &Fold,
+                          const TargetRegisterInfo &TRI) {
+  MachineInstr *MI = Fold.UseMI;
+  MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
+  assert(Old.isReg());
+
+  if (Fold.isImm()) {
+    Old.ChangeToImmediate(Fold.ImmToFold);
+    return true;
+  }
+
+  MachineOperand *New = Fold.OpToFold;
+  if (TargetRegisterInfo::isVirtualRegister(Old.getReg()) &&
+      TargetRegisterInfo::isVirtualRegister(New->getReg())) {
+    Old.substVirtReg(New->getReg(), New->getSubReg(), TRI);
+    return true;
+  }
+
+  // FIXME: Handle physical registers.
+
+  return false;
+}
+
+static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList,
+                             MachineInstr *MI, unsigned OpNo,
+                             MachineOperand *OpToFold,
+                             const SIInstrInfo *TII) {
+  if (!TII->isOperandLegal(MI, OpNo, OpToFold)) {
+    // Operand is not legal, so try to commute the instruction to
+    // see if this makes it possible to fold.
+    unsigned CommuteIdx0;
+    unsigned CommuteIdx1;
+    bool CanCommute = TII->findCommutedOpIndices(MI, CommuteIdx0, CommuteIdx1);
+
+    if (CanCommute) {
+      if (CommuteIdx0 == OpNo)
+        OpNo = CommuteIdx1;
+      else if (CommuteIdx1 == OpNo)
+        OpNo = CommuteIdx0;
+    }
+
+    if (!CanCommute || !TII->commuteInstruction(MI))
+      return false;
+
+    if (!TII->isOperandLegal(MI, OpNo, OpToFold))
+      return false;
+  }
+
+  FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
+  return true;
+}
+
+bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const SIInstrInfo *TII =
+      static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
+  const SIRegisterInfo &TRI = TII->getRegisterInfo();
+
+  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+                                                  BI != BE; ++BI) {
+
+    MachineBasicBlock &MBB = *BI;
+    MachineBasicBlock::iterator I, Next;
+    for (I = MBB.begin(); I != MBB.end(); I = Next) {
+      Next = std::next(I);
+      MachineInstr &MI = *I;
+
+      if (!isSafeToFold(MI.getOpcode()))
+        continue;
+
+      unsigned OpSize = TII->getOpSize(MI, 1);
+      MachineOperand &OpToFold = MI.getOperand(1);
+      bool FoldingImm = OpToFold.isImm();
+
+      // FIXME: We could also be folding things like FrameIndexes and
+      // TargetIndexes.
+      if (!FoldingImm && !OpToFold.isReg())
+        continue;
+
+      // Folding immediates with more than one use will increase program size.
+      // FIXME: This will also reduce register usage, which may be better
+      // in some cases.  A better heuristic is needed.
+      if (FoldingImm && !TII->isInlineConstant(OpToFold, OpSize) &&
+          !MRI.hasOneUse(MI.getOperand(0).getReg()))
+        continue;
+
+      // FIXME: Fold operands with subregs.
+      if (OpToFold.isReg() &&
+          (!TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()) ||
+           OpToFold.getSubReg()))
+        continue;
+
+      std::vector<FoldCandidate> FoldList;
+      for (MachineRegisterInfo::use_iterator
+           Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end();
+           Use != E; ++Use) {
+
+        MachineInstr *UseMI = Use->getParent();
+        const MachineOperand &UseOp = UseMI->getOperand(Use.getOperandNo());
+
+        // FIXME: Fold operands with subregs.
+        if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) ||
+            UseOp.isImplicit())) {
+          continue;
+        }
+
+        APInt Imm;
+
+        if (FoldingImm) {
+          unsigned UseReg = UseOp.getReg();
+          const TargetRegisterClass *UseRC
+            = TargetRegisterInfo::isVirtualRegister(UseReg) ?
+            MRI.getRegClass(UseReg) :
+            TRI.getPhysRegClass(UseReg);
+
+          Imm = APInt(64, OpToFold.getImm());
+
+          // Split 64-bit constants into 32-bits for folding.
+          if (UseOp.getSubReg()) {
+            if (UseRC->getSize() != 8)
+              continue;
+
+            if (UseOp.getSubReg() == AMDGPU::sub0) {
+              Imm = Imm.getLoBits(32);
+            } else {
+              assert(UseOp.getSubReg() == AMDGPU::sub1);
+              Imm = Imm.getHiBits(32);
+            }
+          }
+
+          // In order to fold immediates into copies, we need to change the
+          // copy to a MOV.
+          if (UseMI->getOpcode() == AMDGPU::COPY) {
+            unsigned DestReg = UseMI->getOperand(0).getReg();
+            const TargetRegisterClass *DestRC
+              = TargetRegisterInfo::isVirtualRegister(DestReg) ?
+              MRI.getRegClass(DestReg) :
+              TRI.getPhysRegClass(DestReg);
+
+            unsigned MovOp = TII->getMovOpcode(DestRC);
+            if (MovOp == AMDGPU::COPY)
+              continue;
+
+            UseMI->setDesc(TII->get(MovOp));
+          }
+        }
+
+        const MCInstrDesc &UseDesc = UseMI->getDesc();
+
+        // Don't fold into target independent nodes.  Target independent opcodes
+        // don't have defined register classes.
+        if (UseDesc.isVariadic() ||
+            UseDesc.OpInfo[Use.getOperandNo()].RegClass == -1)
+          continue;
+
+        if (FoldingImm) {
+          MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
+          tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &ImmOp, TII);
+          continue;
+        }
+
+        tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &OpToFold, TII);
+
+        // FIXME: We could try to change the instruction from 64-bit to 32-bit
+        // to enable more folding opportunites.  The shrink operands pass
+        // already does this.
+      }
+
+      for (FoldCandidate &Fold : FoldList) {
+        if (updateOperand(Fold, TRI)) {
+          // Clear kill flags.
+          if (!Fold.isImm()) {
+            assert(Fold.OpToFold && Fold.OpToFold->isReg());
+            Fold.OpToFold->setIsKill(false);
+          }
+          DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
+                Fold.UseOpNo << " of " << *Fold.UseMI << '\n');
+        }
+      }
+    }
+  }
+  return false;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
new file mode 100644
index 0000000..12d08cf
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -0,0 +1,2241 @@
+//===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Custom DAG lowering for SI
+//
+//===----------------------------------------------------------------------===//
+
+#ifdef _MSC_VER
+// Provide M_PI.
+#define _USE_MATH_DEFINES
+#include <cmath>
+#endif
+
+#include "SIISelLowering.h"
+#include "AMDGPU.h"
+#include "AMDGPUIntrinsicInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/IR/Function.h"
+#include "llvm/ADT/SmallString.h"
+
+using namespace llvm;
+
+SITargetLowering::SITargetLowering(TargetMachine &TM,
+                                   const AMDGPUSubtarget &STI)
+    : AMDGPUTargetLowering(TM, STI) {
+  addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass);
+  addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
+
+  addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass);
+  addRegisterClass(MVT::v64i8, &AMDGPU::SReg_512RegClass);
+
+  addRegisterClass(MVT::i32, &AMDGPU::SReg_32RegClass);
+  addRegisterClass(MVT::f32, &AMDGPU::VGPR_32RegClass);
+
+  addRegisterClass(MVT::f64, &AMDGPU::VReg_64RegClass);
+  addRegisterClass(MVT::v2i32, &AMDGPU::SReg_64RegClass);
+  addRegisterClass(MVT::v2f32, &AMDGPU::VReg_64RegClass);
+
+  addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass);
+  addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
+
+  addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass);
+  addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
+
+  addRegisterClass(MVT::v16i32, &AMDGPU::SReg_512RegClass);
+  addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass);
+
+  computeRegisterProperties(STI.getRegisterInfo());
+
+  setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i32, Expand);
+  setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Expand);
+  setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand);
+  setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16f32, Expand);
+
+  setOperationAction(ISD::ADD, MVT::i32, Legal);
+  setOperationAction(ISD::ADDC, MVT::i32, Legal);
+  setOperationAction(ISD::ADDE, MVT::i32, Legal);
+  setOperationAction(ISD::SUBC, MVT::i32, Legal);
+  setOperationAction(ISD::SUBE, MVT::i32, Legal);
+
+  setOperationAction(ISD::FSIN, MVT::f32, Custom);
+  setOperationAction(ISD::FCOS, MVT::f32, Custom);
+
+  setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
+  setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
+
+  // We need to custom lower vector stores from local memory
+  setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
+  setOperationAction(ISD::LOAD, MVT::v8i32, Custom);
+  setOperationAction(ISD::LOAD, MVT::v16i32, Custom);
+
+  setOperationAction(ISD::STORE, MVT::v8i32, Custom);
+  setOperationAction(ISD::STORE, MVT::v16i32, Custom);
+
+  setOperationAction(ISD::STORE, MVT::i1, Custom);
+  setOperationAction(ISD::STORE, MVT::v4i32, Custom);
+
+  setOperationAction(ISD::SELECT, MVT::i64, Custom);
+  setOperationAction(ISD::SELECT, MVT::f64, Promote);
+  AddPromotedToType(ISD::SELECT, MVT::f64, MVT::i64);
+
+  setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
+  setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
+  setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
+  setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
+
+  setOperationAction(ISD::SETCC, MVT::v2i1, Expand);
+  setOperationAction(ISD::SETCC, MVT::v4i1, Expand);
+
+  setOperationAction(ISD::BSWAP, MVT::i32, Legal);
+
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Legal);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Custom);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Custom);
+
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Legal);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Custom);
+
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Custom);
+
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Custom);
+
+  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom);
+  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v16i8, Custom);
+  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
+
+  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
+  setOperationAction(ISD::BRCOND, MVT::Other, Custom);
+
+  for (MVT VT : MVT::integer_valuetypes()) {
+    if (VT == MVT::i64)
+      continue;
+
+    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
+    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Legal);
+    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Legal);
+    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
+
+    setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
+    setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Legal);
+    setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Legal);
+    setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand);
+
+    setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
+    setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Legal);
+    setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Legal);
+    setLoadExtAction(ISD::EXTLOAD, VT, MVT::i32, Expand);
+  }
+
+  for (MVT VT : MVT::integer_vector_valuetypes()) {
+    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i16, Expand);
+    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v16i16, Expand);
+  }
+
+  for (MVT VT : MVT::fp_valuetypes())
+    setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
+
+  setTruncStoreAction(MVT::i64, MVT::i32, Expand);
+  setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
+  setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);
+
+  setOperationAction(ISD::LOAD, MVT::i1, Custom);
+
+  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+  setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
+
+  // These should use UDIVREM, so set them to expand
+  setOperationAction(ISD::UDIV, MVT::i64, Expand);
+  setOperationAction(ISD::UREM, MVT::i64, Expand);
+
+  setOperationAction(ISD::SELECT_CC, MVT::i1, Expand);
+  setOperationAction(ISD::SELECT, MVT::i1, Promote);
+
+  // We only support LOAD/STORE and vector manipulation ops for vectors
+  // with > 4 elements.
+  for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32}) {
+    for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
+      switch(Op) {
+      case ISD::LOAD:
+      case ISD::STORE:
+      case ISD::BUILD_VECTOR:
+      case ISD::BITCAST:
+      case ISD::EXTRACT_VECTOR_ELT:
+      case ISD::INSERT_VECTOR_ELT:
+      case ISD::INSERT_SUBVECTOR:
+      case ISD::EXTRACT_SUBVECTOR:
+        break;
+      case ISD::CONCAT_VECTORS:
+        setOperationAction(Op, VT, Custom);
+        break;
+      default:
+        setOperationAction(Op, VT, Expand);
+        break;
+      }
+    }
+  }
+
+  if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
+    setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+    setOperationAction(ISD::FCEIL, MVT::f64, Legal);
+    setOperationAction(ISD::FRINT, MVT::f64, Legal);
+  }
+
+  setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
+  setOperationAction(ISD::FDIV, MVT::f32, Custom);
+  setOperationAction(ISD::FDIV, MVT::f64, Custom);
+
+  setTargetDAGCombine(ISD::FADD);
+  setTargetDAGCombine(ISD::FSUB);
+  setTargetDAGCombine(ISD::FMINNUM);
+  setTargetDAGCombine(ISD::FMAXNUM);
+  setTargetDAGCombine(ISD::SMIN);
+  setTargetDAGCombine(ISD::SMAX);
+  setTargetDAGCombine(ISD::UMIN);
+  setTargetDAGCombine(ISD::UMAX);
+  setTargetDAGCombine(ISD::SELECT_CC);
+  setTargetDAGCombine(ISD::SETCC);
+  setTargetDAGCombine(ISD::AND);
+  setTargetDAGCombine(ISD::OR);
+  setTargetDAGCombine(ISD::UINT_TO_FP);
+
+  // All memory operations. Some folding on the pointer operand is done to help
+  // matching the constant offsets in the addressing modes.
+  setTargetDAGCombine(ISD::LOAD);
+  setTargetDAGCombine(ISD::STORE);
+  setTargetDAGCombine(ISD::ATOMIC_LOAD);
+  setTargetDAGCombine(ISD::ATOMIC_STORE);
+  setTargetDAGCombine(ISD::ATOMIC_CMP_SWAP);
+  setTargetDAGCombine(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS);
+  setTargetDAGCombine(ISD::ATOMIC_SWAP);
+  setTargetDAGCombine(ISD::ATOMIC_LOAD_ADD);
+  setTargetDAGCombine(ISD::ATOMIC_LOAD_SUB);
+  setTargetDAGCombine(ISD::ATOMIC_LOAD_AND);
+  setTargetDAGCombine(ISD::ATOMIC_LOAD_OR);
+  setTargetDAGCombine(ISD::ATOMIC_LOAD_XOR);
+  setTargetDAGCombine(ISD::ATOMIC_LOAD_NAND);
+  setTargetDAGCombine(ISD::ATOMIC_LOAD_MIN);
+  setTargetDAGCombine(ISD::ATOMIC_LOAD_MAX);
+  setTargetDAGCombine(ISD::ATOMIC_LOAD_UMIN);
+  setTargetDAGCombine(ISD::ATOMIC_LOAD_UMAX);
+
+  setSchedulingPreference(Sched::RegPressure);
+}
+
+//===----------------------------------------------------------------------===//
+// TargetLowering queries
+//===----------------------------------------------------------------------===//
+
+bool SITargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &,
+                                          EVT) const {
+  // SI has some legal vector types, but no legal vector operations. Say no
+  // shuffles are legal in order to prefer scalarizing some vector operations.
+  return false;
+}
+
+bool SITargetLowering::isLegalAddressingMode(const AddrMode &AM,
+                                             Type *Ty, unsigned AS) const {
+  // No global is ever allowed as a base.
+  if (AM.BaseGV)
+    return false;
+
+  switch (AS) {
+  case AMDGPUAS::GLOBAL_ADDRESS:
+  case AMDGPUAS::CONSTANT_ADDRESS: // XXX - Should we assume SMRD instructions?
+  case AMDGPUAS::PRIVATE_ADDRESS:
+  case AMDGPUAS::UNKNOWN_ADDRESS_SPACE: {
+    // MUBUF / MTBUF instructions have a 12-bit unsigned byte offset, and
+    // additionally can do r + r + i with addr64. 32-bit has more addressing
+    // mode options. Depending on the resource constant, it can also do
+    // (i64 r0) + (i32 r1) * (i14 i).
+    //
+    // SMRD instructions have an 8-bit, dword offset.
+    //
+    // Assume nonunifom access, since the address space isn't enough to know
+    // what instruction we will use, and since we don't know if this is a load
+    // or store and scalar stores are only available on VI.
+    //
+    // We also know if we are doing an extload, we can't do a scalar load.
+    //
+    // Private arrays end up using a scratch buffer most of the time, so also
+    // assume those use MUBUF instructions. Scratch loads / stores are currently
+    // implemented as mubuf instructions with offen bit set, so slightly
+    // different than the normal addr64.
+    if (!isUInt<12>(AM.BaseOffs))
+      return false;
+
+    // FIXME: Since we can split immediate into soffset and immediate offset,
+    // would it make sense to allow any immediate?
+
+    switch (AM.Scale) {
+    case 0: // r + i or just i, depending on HasBaseReg.
+      return true;
+    case 1:
+      return true; // We have r + r or r + i.
+    case 2:
+      if (AM.HasBaseReg) {
+        // Reject 2 * r + r.
+        return false;
+      }
+
+      // Allow 2 * r as r + r
+      // Or  2 * r + i is allowed as r + r + i.
+      return true;
+    default: // Don't allow n * r
+      return false;
+    }
+  }
+  case AMDGPUAS::LOCAL_ADDRESS:
+  case AMDGPUAS::REGION_ADDRESS: {
+    // Basic, single offset DS instructions allow a 16-bit unsigned immediate
+    // field.
+    // XXX - If doing a 4-byte aligned 8-byte type access, we effectively have
+    // an 8-bit dword offset but we don't know the alignment here.
+    if (!isUInt<16>(AM.BaseOffs))
+      return false;
+
+    if (AM.Scale == 0) // r + i or just i, depending on HasBaseReg.
+      return true;
+
+    if (AM.Scale == 1 && AM.HasBaseReg)
+      return true;
+
+    return false;
+  }
+  case AMDGPUAS::FLAT_ADDRESS: {
+    // Flat instructions do not have offsets, and only have the register
+    // address.
+    return AM.BaseOffs == 0 && (AM.Scale == 0 || AM.Scale == 1);
+  }
+  default:
+    llvm_unreachable("unhandled address space");
+  }
+}
+
+bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
+                                                      unsigned AddrSpace,
+                                                      unsigned Align,
+                                                      bool *IsFast) const {
+  if (IsFast)
+    *IsFast = false;
+
+  // TODO: I think v3i32 should allow unaligned accesses on CI with DS_READ_B96,
+  // which isn't a simple VT.
+  if (!VT.isSimple() || VT == MVT::Other)
+    return false;
+
+  // TODO - CI+ supports unaligned memory accesses, but this requires driver
+  // support.
+
+  // XXX - The only mention I see of this in the ISA manual is for LDS direct
+  // reads the "byte address and must be dword aligned". Is it also true for the
+  // normal loads and stores?
+  if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS) {
+    // ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte
+    // aligned, 8 byte access in a single operation using ds_read2/write2_b32
+    // with adjacent offsets.
+    return Align % 4 == 0;
+  }
+
+  // Smaller than dword value must be aligned.
+  // FIXME: This should be allowed on CI+
+  if (VT.bitsLT(MVT::i32))
+    return false;
+
+  // 8.1.6 - For Dword or larger reads or writes, the two LSBs of the
+  // byte-address are ignored, thus forcing Dword alignment.
+  // This applies to private, global, and constant memory.
+  if (IsFast)
+    *IsFast = true;
+
+  return VT.bitsGT(MVT::i32) && Align % 4 == 0;
+}
+
+EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
+                                          unsigned SrcAlign, bool IsMemset,
+                                          bool ZeroMemset,
+                                          bool MemcpyStrSrc,
+                                          MachineFunction &MF) const {
+  // FIXME: Should account for address space here.
+
+  // The default fallback uses the private pointer size as a guess for a type to
+  // use. Make sure we switch these to 64-bit accesses.
+
+  if (Size >= 16 && DstAlign >= 4) // XXX: Should only do for global
+    return MVT::v4i32;
+
+  if (Size >= 8 && DstAlign >= 4)
+    return MVT::v2i32;
+
+  // Use the default.
+  return MVT::Other;
+}
+
+TargetLoweringBase::LegalizeTypeAction
+SITargetLowering::getPreferredVectorAction(EVT VT) const {
+  if (VT.getVectorNumElements() != 1 && VT.getScalarType().bitsLE(MVT::i16))
+    return TypeSplitVector;
+
+  return TargetLoweringBase::getPreferredVectorAction(VT);
+}
+
+bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
+                                                         Type *Ty) const {
+  const SIInstrInfo *TII =
+      static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
+  return TII->isInlineConstant(Imm);
+}
+
+static EVT toIntegerVT(EVT VT) {
+  if (VT.isVector())
+    return VT.changeVectorElementTypeToInteger();
+  return MVT::getIntegerVT(VT.getSizeInBits());
+}
+
+SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
+                                         SDLoc SL, SDValue Chain,
+                                         unsigned Offset, bool Signed) const {
+  const DataLayout *DL = getDataLayout();
+  MachineFunction &MF = DAG.getMachineFunction();
+  const SIRegisterInfo *TRI =
+      static_cast<const SIRegisterInfo*>(Subtarget->getRegisterInfo());
+  unsigned InputPtrReg = TRI->getPreloadedValue(MF, SIRegisterInfo::INPUT_PTR);
+
+  Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+
+  MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
+  MVT PtrVT = getPointerTy(AMDGPUAS::CONSTANT_ADDRESS);
+  PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
+  SDValue BasePtr = DAG.getCopyFromReg(Chain, SL,
+                                       MRI.getLiveInVirtReg(InputPtrReg), PtrVT);
+  SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
+                            DAG.getConstant(Offset, SL, PtrVT));
+  SDValue PtrOffset = DAG.getUNDEF(getPointerTy(AMDGPUAS::CONSTANT_ADDRESS));
+  MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
+
+  unsigned Align = DL->getABITypeAlignment(Ty);
+
+  if (VT != MemVT && VT.isFloatingPoint()) {
+    // Do an integer load and convert.
+    // FIXME: This is mostly because load legalization after type legalization
+    // doesn't handle FP extloads.
+    assert(VT.getScalarType() == MVT::f32 &&
+           MemVT.getScalarType() == MVT::f16);
+
+    EVT IVT = toIntegerVT(VT);
+    EVT MemIVT = toIntegerVT(MemVT);
+    SDValue Load = DAG.getLoad(ISD::UNINDEXED, ISD::ZEXTLOAD,
+                               IVT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemIVT,
+                               false, // isVolatile
+                               true, // isNonTemporal
+                               true, // isInvariant
+                               Align); // Alignment
+    return DAG.getNode(ISD::FP16_TO_FP, SL, VT, Load);
+  }
+
+  ISD::LoadExtType ExtTy = Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+  return DAG.getLoad(ISD::UNINDEXED, ExtTy,
+                     VT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemVT,
+                     false, // isVolatile
+                     true, // isNonTemporal
+                     true, // isInvariant
+                     Align); // Alignment
+}
+
+SDValue SITargetLowering::LowerFormalArguments(
+    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+    const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL, SelectionDAG &DAG,
+    SmallVectorImpl<SDValue> &InVals) const {
+  const SIRegisterInfo *TRI =
+      static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  FunctionType *FType = MF.getFunction()->getFunctionType();
+  SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+
+  assert(CallConv == CallingConv::C);
+
+  SmallVector<ISD::InputArg, 16> Splits;
+  BitVector Skipped(Ins.size());
+
+  for (unsigned i = 0, e = Ins.size(), PSInputNum = 0; i != e; ++i) {
+    const ISD::InputArg &Arg = Ins[i];
+
+    // First check if it's a PS input addr
+    if (Info->getShaderType() == ShaderType::PIXEL && !Arg.Flags.isInReg() &&
+        !Arg.Flags.isByVal()) {
+
+      assert((PSInputNum <= 15) && "Too many PS inputs!");
+
+      if (!Arg.Used) {
+        // We can savely skip PS inputs
+        Skipped.set(i);
+        ++PSInputNum;
+        continue;
+      }
+
+      Info->PSInputAddr |= 1 << PSInputNum++;
+    }
+
+    // Second split vertices into their elements
+    if (Info->getShaderType() != ShaderType::COMPUTE && Arg.VT.isVector()) {
+      ISD::InputArg NewArg = Arg;
+      NewArg.Flags.setSplit();
+      NewArg.VT = Arg.VT.getVectorElementType();
+
+      // We REALLY want the ORIGINAL number of vertex elements here, e.g. a
+      // three or five element vertex only needs three or five registers,
+      // NOT four or eigth.
+      Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
+      unsigned NumElements = ParamType->getVectorNumElements();
+
+      for (unsigned j = 0; j != NumElements; ++j) {
+        Splits.push_back(NewArg);
+        NewArg.PartOffset += NewArg.VT.getStoreSize();
+      }
+
+    } else if (Info->getShaderType() != ShaderType::COMPUTE) {
+      Splits.push_back(Arg);
+    }
+  }
+
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+                 *DAG.getContext());
+
+  // At least one interpolation mode must be enabled or else the GPU will hang.
+  if (Info->getShaderType() == ShaderType::PIXEL &&
+      (Info->PSInputAddr & 0x7F) == 0) {
+    Info->PSInputAddr |= 1;
+    CCInfo.AllocateReg(AMDGPU::VGPR0);
+    CCInfo.AllocateReg(AMDGPU::VGPR1);
+  }
+
+  // The pointer to the list of arguments is stored in SGPR0, SGPR1
+	// The pointer to the scratch buffer is stored in SGPR2, SGPR3
+  if (Info->getShaderType() == ShaderType::COMPUTE) {
+    if (Subtarget->isAmdHsaOS())
+      Info->NumUserSGPRs = 2;  // FIXME: Need to support scratch buffers.
+    else
+      Info->NumUserSGPRs = 4;
+
+    unsigned InputPtrReg =
+        TRI->getPreloadedValue(MF, SIRegisterInfo::INPUT_PTR);
+    unsigned InputPtrRegLo =
+        TRI->getPhysRegSubReg(InputPtrReg, &AMDGPU::SReg_32RegClass, 0);
+    unsigned InputPtrRegHi =
+        TRI->getPhysRegSubReg(InputPtrReg, &AMDGPU::SReg_32RegClass, 1);
+
+    unsigned ScratchPtrReg =
+        TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR);
+    unsigned ScratchPtrRegLo =
+        TRI->getPhysRegSubReg(ScratchPtrReg, &AMDGPU::SReg_32RegClass, 0);
+    unsigned ScratchPtrRegHi =
+        TRI->getPhysRegSubReg(ScratchPtrReg, &AMDGPU::SReg_32RegClass, 1);
+
+    CCInfo.AllocateReg(InputPtrRegLo);
+    CCInfo.AllocateReg(InputPtrRegHi);
+    CCInfo.AllocateReg(ScratchPtrRegLo);
+    CCInfo.AllocateReg(ScratchPtrRegHi);
+    MF.addLiveIn(InputPtrReg, &AMDGPU::SReg_64RegClass);
+    MF.addLiveIn(ScratchPtrReg, &AMDGPU::SReg_64RegClass);
+  }
+
+  if (Info->getShaderType() == ShaderType::COMPUTE) {
+    getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins,
+                            Splits);
+  }
+
+  AnalyzeFormalArguments(CCInfo, Splits);
+
+  for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
+
+    const ISD::InputArg &Arg = Ins[i];
+    if (Skipped[i]) {
+      InVals.push_back(DAG.getUNDEF(Arg.VT));
+      continue;
+    }
+
+    CCValAssign &VA = ArgLocs[ArgIdx++];
+    MVT VT = VA.getLocVT();
+
+    if (VA.isMemLoc()) {
+      VT = Ins[i].VT;
+      EVT MemVT = Splits[i].VT;
+      const unsigned Offset = 36 + VA.getLocMemOffset();
+      // The first 36 bytes of the input buffer contains information about
+      // thread group and global sizes.
+      SDValue Arg = LowerParameter(DAG, VT, MemVT,  DL, DAG.getRoot(),
+                                   Offset, Ins[i].Flags.isSExt());
+
+      const PointerType *ParamTy =
+        dyn_cast<PointerType>(FType->getParamType(Ins[i].getOrigArgIndex()));
+      if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
+          ParamTy && ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
+        // On SI local pointers are just offsets into LDS, so they are always
+        // less than 16-bits.  On CI and newer they could potentially be
+        // real pointers, so we can't guarantee their size.
+        Arg = DAG.getNode(ISD::AssertZext, DL, Arg.getValueType(), Arg,
+                          DAG.getValueType(MVT::i16));
+      }
+
+      InVals.push_back(Arg);
+      Info->ABIArgOffset = Offset + MemVT.getStoreSize();
+      continue;
+    }
+    assert(VA.isRegLoc() && "Parameter must be in a register!");
+
+    unsigned Reg = VA.getLocReg();
+
+    if (VT == MVT::i64) {
+      // For now assume it is a pointer
+      Reg = TRI->getMatchingSuperReg(Reg, AMDGPU::sub0,
+                                     &AMDGPU::SReg_64RegClass);
+      Reg = MF.addLiveIn(Reg, &AMDGPU::SReg_64RegClass);
+      InVals.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT));
+      continue;
+    }
+
+    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
+
+    Reg = MF.addLiveIn(Reg, RC);
+    SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT);
+
+    if (Arg.VT.isVector()) {
+
+      // Build a vector from the registers
+      Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
+      unsigned NumElements = ParamType->getVectorNumElements();
+
+      SmallVector<SDValue, 4> Regs;
+      Regs.push_back(Val);
+      for (unsigned j = 1; j != NumElements; ++j) {
+        Reg = ArgLocs[ArgIdx++].getLocReg();
+        Reg = MF.addLiveIn(Reg, RC);
+        Regs.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT));
+      }
+
+      // Fill up the missing vector elements
+      NumElements = Arg.VT.getVectorNumElements() - NumElements;
+      Regs.append(NumElements, DAG.getUNDEF(VT));
+
+      InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, DL, Arg.VT, Regs));
+      continue;
+    }
+
+    InVals.push_back(Val);
+  }
+
+  if (Info->getShaderType() != ShaderType::COMPUTE) {
+    unsigned ScratchIdx = CCInfo.getFirstUnallocated(ArrayRef<MCPhysReg>(
+        AMDGPU::SGPR_32RegClass.begin(), AMDGPU::SGPR_32RegClass.getNumRegs()));
+    Info->ScratchOffsetReg = AMDGPU::SGPR_32RegClass.getRegister(ScratchIdx);
+  }
+  return Chain;
+}
+
+MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
+    MachineInstr * MI, MachineBasicBlock * BB) const {
+
+  MachineBasicBlock::iterator I = *MI;
+  const SIInstrInfo *TII =
+      static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
+
+  switch (MI->getOpcode()) {
+  default:
+    return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
+  case AMDGPU::BRANCH:
+    return BB;
+  case AMDGPU::SI_RegisterStorePseudo: {
+    MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+    unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+    MachineInstrBuilder MIB =
+        BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::SI_RegisterStore),
+                Reg);
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
+      MIB.addOperand(MI->getOperand(i));
+
+    MI->eraseFromParent();
+    break;
+  }
+  }
+  return BB;
+}
+
+bool SITargetLowering::enableAggressiveFMAFusion(EVT VT) const {
+  // This currently forces unfolding various combinations of fsub into fma with
+  // free fneg'd operands. As long as we have fast FMA (controlled by
+  // isFMAFasterThanFMulAndFAdd), we should perform these.
+
+  // When fma is quarter rate, for f64 where add / sub are at best half rate,
+  // most of these combines appear to be cycle neutral but save on instruction
+  // count / code size.
+  return true;
+}
+
+EVT SITargetLowering::getSetCCResultType(LLVMContext &Ctx, EVT VT) const {
+  if (!VT.isVector()) {
+    return MVT::i1;
+  }
+  return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements());
+}
+
+MVT SITargetLowering::getScalarShiftAmountTy(EVT VT) const {
+  return MVT::i32;
+}
+
+// Answering this is somewhat tricky and depends on the specific device which
+// have different rates for fma or all f64 operations.
+//
+// v_fma_f64 and v_mul_f64 always take the same number of cycles as each other
+// regardless of which device (although the number of cycles differs between
+// devices), so it is always profitable for f64.
+//
+// v_fma_f32 takes 4 or 16 cycles depending on the device, so it is profitable
+// only on full rate devices. Normally, we should prefer selecting v_mad_f32
+// which we can always do even without fused FP ops since it returns the same
+// result as the separate operations and since it is always full
+// rate. Therefore, we lie and report that it is not faster for f32. v_mad_f32
+// however does not support denormals, so we do report fma as faster if we have
+// a fast fma device and require denormals.
+//
+bool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+  VT = VT.getScalarType();
+
+  if (!VT.isSimple())
+    return false;
+
+  switch (VT.getSimpleVT().SimpleTy) {
+  case MVT::f32:
+    // This is as fast on some subtargets. However, we always have full rate f32
+    // mad available which returns the same result as the separate operations
+    // which we should prefer over fma. We can't use this if we want to support
+    // denormals, so only report this in these cases.
+    return Subtarget->hasFP32Denormals() && Subtarget->hasFastFMAF32();
+  case MVT::f64:
+    return true;
+  default:
+    break;
+  }
+
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Custom DAG Lowering Operations
+//===----------------------------------------------------------------------===//
+
+SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+  switch (Op.getOpcode()) {
+  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+  case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
+  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
+  case ISD::LOAD: {
+    SDValue Result = LowerLOAD(Op, DAG);
+    assert((!Result.getNode() ||
+            Result.getNode()->getNumValues() == 2) &&
+           "Load should return a value and a chain");
+    return Result;
+  }
+
+  case ISD::FSIN:
+  case ISD::FCOS:
+    return LowerTrig(Op, DAG);
+  case ISD::SELECT: return LowerSELECT(Op, DAG);
+  case ISD::FDIV: return LowerFDIV(Op, DAG);
+  case ISD::STORE: return LowerSTORE(Op, DAG);
+  case ISD::GlobalAddress: {
+    MachineFunction &MF = DAG.getMachineFunction();
+    SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+    return LowerGlobalAddress(MFI, Op, DAG);
+  }
+  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+  case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG);
+  }
+  return SDValue();
+}
+
+/// \brief Helper function for LowerBRCOND
+static SDNode *findUser(SDValue Value, unsigned Opcode) {
+
+  SDNode *Parent = Value.getNode();
+  for (SDNode::use_iterator I = Parent->use_begin(), E = Parent->use_end();
+       I != E; ++I) {
+
+    if (I.getUse().get() != Value)
+      continue;
+
+    if (I->getOpcode() == Opcode)
+      return *I;
+  }
+  return nullptr;
+}
+
+SDValue SITargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
+
+  FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Op);
+  unsigned FrameIndex = FINode->getIndex();
+
+  return DAG.getTargetFrameIndex(FrameIndex, MVT::i32);
+}
+
+/// This transforms the control flow intrinsics to get the branch destination as
+/// last parameter, also switches branch target with BR if the need arise
+SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
+                                      SelectionDAG &DAG) const {
+
+  SDLoc DL(BRCOND);
+
+  SDNode *Intr = BRCOND.getOperand(1).getNode();
+  SDValue Target = BRCOND.getOperand(2);
+  SDNode *BR = nullptr;
+
+  if (Intr->getOpcode() == ISD::SETCC) {
+    // As long as we negate the condition everything is fine
+    SDNode *SetCC = Intr;
+    assert(SetCC->getConstantOperandVal(1) == 1);
+    assert(cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get() ==
+           ISD::SETNE);
+    Intr = SetCC->getOperand(0).getNode();
+
+  } else {
+    // Get the target from BR if we don't negate the condition
+    BR = findUser(BRCOND, ISD::BR);
+    Target = BR->getOperand(1);
+  }
+
+  assert(Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN);
+
+  // Build the result and
+  ArrayRef<EVT> Res(Intr->value_begin() + 1, Intr->value_end());
+
+  // operands of the new intrinsic call
+  SmallVector<SDValue, 4> Ops;
+  Ops.push_back(BRCOND.getOperand(0));
+  Ops.append(Intr->op_begin() + 1, Intr->op_end());
+  Ops.push_back(Target);
+
+  // build the new intrinsic call
+  SDNode *Result = DAG.getNode(
+    Res.size() > 1 ? ISD::INTRINSIC_W_CHAIN : ISD::INTRINSIC_VOID, DL,
+    DAG.getVTList(Res), Ops).getNode();
+
+  if (BR) {
+    // Give the branch instruction our target
+    SDValue Ops[] = {
+      BR->getOperand(0),
+      BRCOND.getOperand(2)
+    };
+    SDValue NewBR = DAG.getNode(ISD::BR, DL, BR->getVTList(), Ops);
+    DAG.ReplaceAllUsesWith(BR, NewBR.getNode());
+    BR = NewBR.getNode();
+  }
+
+  SDValue Chain = SDValue(Result, Result->getNumValues() - 1);
+
+  // Copy the intrinsic results to registers
+  for (unsigned i = 1, e = Intr->getNumValues() - 1; i != e; ++i) {
+    SDNode *CopyToReg = findUser(SDValue(Intr, i), ISD::CopyToReg);
+    if (!CopyToReg)
+      continue;
+
+    Chain = DAG.getCopyToReg(
+      Chain, DL,
+      CopyToReg->getOperand(1),
+      SDValue(Result, i - 1),
+      SDValue());
+
+    DAG.ReplaceAllUsesWith(SDValue(CopyToReg, 0), CopyToReg->getOperand(0));
+  }
+
+  // Remove the old intrinsic from the chain
+  DAG.ReplaceAllUsesOfValueWith(
+    SDValue(Intr, Intr->getNumValues() - 1),
+    Intr->getOperand(0));
+
+  return Chain;
+}
+
+SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
+                                             SDValue Op,
+                                             SelectionDAG &DAG) const {
+  GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
+
+  if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
+    return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
+
+  SDLoc DL(GSD);
+  const GlobalValue *GV = GSD->getGlobal();
+  MVT PtrVT = getPointerTy(GSD->getAddressSpace());
+
+  SDValue Ptr = DAG.getNode(AMDGPUISD::CONST_DATA_PTR, DL, PtrVT);
+  SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
+
+  SDValue PtrLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Ptr,
+                              DAG.getConstant(0, DL, MVT::i32));
+  SDValue PtrHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Ptr,
+                              DAG.getConstant(1, DL, MVT::i32));
+
+  SDValue Lo = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i32, MVT::Glue),
+                           PtrLo, GA);
+  SDValue Hi = DAG.getNode(ISD::ADDE, DL, DAG.getVTList(MVT::i32, MVT::Glue),
+                           PtrHi, DAG.getConstant(0, DL, MVT::i32),
+                           SDValue(Lo.getNode(), 1));
+  return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
+}
+
+SDValue SITargetLowering::copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL,
+                                   SDValue V) const {
+  // We can't use CopyToReg, because MachineCSE won't combine COPY instructions,
+  // so we will end up with redundant moves to m0.
+  //
+  // We can't use S_MOV_B32, because there is no way to specify m0 as the
+  // destination register.
+  //
+  // We have to use them both.  Machine cse will combine all the S_MOV_B32
+  // instructions and the register coalescer eliminate the extra copies.
+  SDNode *M0 = DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, V.getValueType(), V);
+  return DAG.getCopyToReg(Chain, DL, DAG.getRegister(AMDGPU::M0, MVT::i32),
+                          SDValue(M0, 0), SDValue()); // Glue
+                                                      // A Null SDValue creates
+                                                      // a glue result.
+}
+
+SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
+                                                  SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  const SIRegisterInfo *TRI =
+      static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
+
+  EVT VT = Op.getValueType();
+  SDLoc DL(Op);
+  unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+
+  switch (IntrinsicID) {
+  case Intrinsic::r600_read_ngroups_x:
+    return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+                          SI::KernelInputOffsets::NGROUPS_X, false);
+  case Intrinsic::r600_read_ngroups_y:
+    return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+                          SI::KernelInputOffsets::NGROUPS_Y, false);
+  case Intrinsic::r600_read_ngroups_z:
+    return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+                          SI::KernelInputOffsets::NGROUPS_Z, false);
+  case Intrinsic::r600_read_global_size_x:
+    return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+                          SI::KernelInputOffsets::GLOBAL_SIZE_X, false);
+  case Intrinsic::r600_read_global_size_y:
+    return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+                          SI::KernelInputOffsets::GLOBAL_SIZE_Y, false);
+  case Intrinsic::r600_read_global_size_z:
+    return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+                          SI::KernelInputOffsets::GLOBAL_SIZE_Z, false);
+  case Intrinsic::r600_read_local_size_x:
+    return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+                          SI::KernelInputOffsets::LOCAL_SIZE_X, false);
+  case Intrinsic::r600_read_local_size_y:
+    return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+                          SI::KernelInputOffsets::LOCAL_SIZE_Y, false);
+  case Intrinsic::r600_read_local_size_z:
+    return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+                          SI::KernelInputOffsets::LOCAL_SIZE_Z, false);
+
+  case Intrinsic::AMDGPU_read_workdim:
+    return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+                          MF.getInfo<SIMachineFunctionInfo>()->ABIArgOffset,
+                          false);
+
+  case Intrinsic::r600_read_tgid_x:
+    return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
+      TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_X), VT);
+  case Intrinsic::r600_read_tgid_y:
+    return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
+      TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_Y), VT);
+  case Intrinsic::r600_read_tgid_z:
+    return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
+      TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_Z), VT);
+  case Intrinsic::r600_read_tidig_x:
+    return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass,
+      TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_X), VT);
+  case Intrinsic::r600_read_tidig_y:
+    return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass,
+      TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_Y), VT);
+  case Intrinsic::r600_read_tidig_z:
+    return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass,
+      TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_Z), VT);
+  case AMDGPUIntrinsic::SI_load_const: {
+    SDValue Ops[] = {
+      Op.getOperand(1),
+      Op.getOperand(2)
+    };
+
+    MachineMemOperand *MMO = MF.getMachineMemOperand(
+      MachinePointerInfo(),
+      MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant,
+      VT.getStoreSize(), 4);
+    return DAG.getMemIntrinsicNode(AMDGPUISD::LOAD_CONSTANT, DL,
+                                   Op->getVTList(), Ops, VT, MMO);
+  }
+  case AMDGPUIntrinsic::SI_sample:
+    return LowerSampleIntrinsic(AMDGPUISD::SAMPLE, Op, DAG);
+  case AMDGPUIntrinsic::SI_sampleb:
+    return LowerSampleIntrinsic(AMDGPUISD::SAMPLEB, Op, DAG);
+  case AMDGPUIntrinsic::SI_sampled:
+    return LowerSampleIntrinsic(AMDGPUISD::SAMPLED, Op, DAG);
+  case AMDGPUIntrinsic::SI_samplel:
+    return LowerSampleIntrinsic(AMDGPUISD::SAMPLEL, Op, DAG);
+  case AMDGPUIntrinsic::SI_vs_load_input:
+    return DAG.getNode(AMDGPUISD::LOAD_INPUT, DL, VT,
+                       Op.getOperand(1),
+                       Op.getOperand(2),
+                       Op.getOperand(3));
+
+  case AMDGPUIntrinsic::AMDGPU_fract:
+  case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
+    return DAG.getNode(ISD::FSUB, DL, VT, Op.getOperand(1),
+                       DAG.getNode(ISD::FFLOOR, DL, VT, Op.getOperand(1)));
+  case AMDGPUIntrinsic::SI_fs_constant: {
+    SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(3));
+    SDValue Glue = M0.getValue(1);
+    return DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32,
+                       DAG.getConstant(2, DL, MVT::i32), // P0
+                       Op.getOperand(1), Op.getOperand(2), Glue);
+  }
+  case AMDGPUIntrinsic::SI_fs_interp: {
+    SDValue IJ = Op.getOperand(4);
+    SDValue I = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, IJ,
+                            DAG.getConstant(0, DL, MVT::i32));
+    SDValue J = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, IJ,
+                            DAG.getConstant(1, DL, MVT::i32));
+    SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(3));
+    SDValue Glue = M0.getValue(1);
+    SDValue P1 = DAG.getNode(AMDGPUISD::INTERP_P1, DL,
+                             DAG.getVTList(MVT::f32, MVT::Glue),
+                             I, Op.getOperand(1), Op.getOperand(2), Glue);
+    Glue = SDValue(P1.getNode(), 1);
+    return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, P1, J,
+                             Op.getOperand(1), Op.getOperand(2), Glue);
+  }
+  default:
+    return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+  }
+}
+
+SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  SDLoc DL(Op);
+  SDValue Chain = Op.getOperand(0);
+  unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+
+  switch (IntrinsicID) {
+  case AMDGPUIntrinsic::SI_sendmsg: {
+    Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3));
+    SDValue Glue = Chain.getValue(1);
+    return DAG.getNode(AMDGPUISD::SENDMSG, DL, MVT::Other, Chain,
+                       Op.getOperand(2), Glue);
+  }
+  case AMDGPUIntrinsic::SI_tbuffer_store: {
+    SDValue Ops[] = {
+      Chain,
+      Op.getOperand(2),
+      Op.getOperand(3),
+      Op.getOperand(4),
+      Op.getOperand(5),
+      Op.getOperand(6),
+      Op.getOperand(7),
+      Op.getOperand(8),
+      Op.getOperand(9),
+      Op.getOperand(10),
+      Op.getOperand(11),
+      Op.getOperand(12),
+      Op.getOperand(13),
+      Op.getOperand(14)
+    };
+
+    EVT VT = Op.getOperand(3).getValueType();
+
+    MachineMemOperand *MMO = MF.getMachineMemOperand(
+      MachinePointerInfo(),
+      MachineMemOperand::MOStore,
+      VT.getStoreSize(), 4);
+    return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL,
+                                   Op->getVTList(), Ops, VT, MMO);
+  }
+  default:
+    return SDValue();
+  }
+}
+
+SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  LoadSDNode *Load = cast<LoadSDNode>(Op);
+
+  if (Op.getValueType().isVector()) {
+    assert(Op.getValueType().getVectorElementType() == MVT::i32 &&
+           "Custom lowering for non-i32 vectors hasn't been implemented.");
+    unsigned NumElements = Op.getValueType().getVectorNumElements();
+    assert(NumElements != 2 && "v2 loads are supported for all address spaces.");
+    switch (Load->getAddressSpace()) {
+      default: break;
+      case AMDGPUAS::GLOBAL_ADDRESS:
+      case AMDGPUAS::PRIVATE_ADDRESS:
+        // v4 loads are supported for private and global memory.
+        if (NumElements <= 4)
+          break;
+        // fall-through
+      case AMDGPUAS::LOCAL_ADDRESS:
+        return ScalarizeVectorLoad(Op, DAG);
+    }
+  }
+
+  return AMDGPUTargetLowering::LowerLOAD(Op, DAG);
+}
+
+SDValue SITargetLowering::LowerSampleIntrinsic(unsigned Opcode,
+                                               const SDValue &Op,
+                                               SelectionDAG &DAG) const {
+  return DAG.getNode(Opcode, SDLoc(Op), Op.getValueType(), Op.getOperand(1),
+                     Op.getOperand(2),
+                     Op.getOperand(3),
+                     Op.getOperand(4));
+}
+
+SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
+  if (Op.getValueType() != MVT::i64)
+    return SDValue();
+
+  SDLoc DL(Op);
+  SDValue Cond = Op.getOperand(0);
+
+  SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
+  SDValue One = DAG.getConstant(1, DL, MVT::i32);
+
+  SDValue LHS = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Op.getOperand(1));
+  SDValue RHS = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Op.getOperand(2));
+
+  SDValue Lo0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, LHS, Zero);
+  SDValue Lo1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, RHS, Zero);
+
+  SDValue Lo = DAG.getSelect(DL, MVT::i32, Cond, Lo0, Lo1);
+
+  SDValue Hi0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, LHS, One);
+  SDValue Hi1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, RHS, One);
+
+  SDValue Hi = DAG.getSelect(DL, MVT::i32, Cond, Hi0, Hi1);
+
+  SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2i32, Lo, Hi);
+  return DAG.getNode(ISD::BITCAST, DL, MVT::i64, Res);
+}
+
+// Catch division cases where we can use shortcuts with rcp and rsq
+// instructions.
+SDValue SITargetLowering::LowerFastFDIV(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc SL(Op);
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  EVT VT = Op.getValueType();
+  bool Unsafe = DAG.getTarget().Options.UnsafeFPMath;
+
+  if (const ConstantFPSDNode *CLHS = dyn_cast<ConstantFPSDNode>(LHS)) {
+    if ((Unsafe || (VT == MVT::f32 && !Subtarget->hasFP32Denormals())) &&
+        CLHS->isExactlyValue(1.0)) {
+      // v_rcp_f32 and v_rsq_f32 do not support denormals, and according to
+      // the CI documentation has a worst case error of 1 ulp.
+      // OpenCL requires <= 2.5 ulp for 1.0 / x, so it should always be OK to
+      // use it as long as we aren't trying to use denormals.
+
+      // 1.0 / sqrt(x) -> rsq(x)
+      //
+      // XXX - Is UnsafeFPMath sufficient to do this for f64? The maximum ULP
+      // error seems really high at 2^29 ULP.
+      if (RHS.getOpcode() == ISD::FSQRT)
+        return DAG.getNode(AMDGPUISD::RSQ, SL, VT, RHS.getOperand(0));
+
+      // 1.0 / x -> rcp(x)
+      return DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS);
+    }
+  }
+
+  if (Unsafe) {
+    // Turn into multiply by the reciprocal.
+    // x / y -> x * (1.0 / y)
+    SDValue Recip = DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS);
+    return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip);
+  }
+
+  return SDValue();
+}
+
+SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
+  SDValue FastLowered = LowerFastFDIV(Op, DAG);
+  if (FastLowered.getNode())
+    return FastLowered;
+
+  // This uses v_rcp_f32 which does not handle denormals. Let this hit a
+  // selection error for now rather than do something incorrect.
+  if (Subtarget->hasFP32Denormals())
+    return SDValue();
+
+  SDLoc SL(Op);
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+
+  SDValue r1 = DAG.getNode(ISD::FABS, SL, MVT::f32, RHS);
+
+  const APFloat K0Val(BitsToFloat(0x6f800000));
+  const SDValue K0 = DAG.getConstantFP(K0Val, SL, MVT::f32);
+
+  const APFloat K1Val(BitsToFloat(0x2f800000));
+  const SDValue K1 = DAG.getConstantFP(K1Val, SL, MVT::f32);
+
+  const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);
+
+  EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f32);
+
+  SDValue r2 = DAG.getSetCC(SL, SetCCVT, r1, K0, ISD::SETOGT);
+
+  SDValue r3 = DAG.getNode(ISD::SELECT, SL, MVT::f32, r2, K1, One);
+
+  r1 = DAG.getNode(ISD::FMUL, SL, MVT::f32, RHS, r3);
+
+  SDValue r0 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, r1);
+
+  SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f32, LHS, r0);
+
+  return DAG.getNode(ISD::FMUL, SL, MVT::f32, r3, Mul);
+}
+
+SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const {
+  if (DAG.getTarget().Options.UnsafeFPMath)
+    return LowerFastFDIV(Op, DAG);
+
+  SDLoc SL(Op);
+  SDValue X = Op.getOperand(0);
+  SDValue Y = Op.getOperand(1);
+
+  const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f64);
+
+  SDVTList ScaleVT = DAG.getVTList(MVT::f64, MVT::i1);
+
+  SDValue DivScale0 = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, Y, Y, X);
+
+  SDValue NegDivScale0 = DAG.getNode(ISD::FNEG, SL, MVT::f64, DivScale0);
+
+  SDValue Rcp = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f64, DivScale0);
+
+  SDValue Fma0 = DAG.getNode(ISD::FMA, SL, MVT::f64, NegDivScale0, Rcp, One);
+
+  SDValue Fma1 = DAG.getNode(ISD::FMA, SL, MVT::f64, Rcp, Fma0, Rcp);
+
+  SDValue Fma2 = DAG.getNode(ISD::FMA, SL, MVT::f64, NegDivScale0, Fma1, One);
+
+  SDValue DivScale1 = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, X, Y, X);
+
+  SDValue Fma3 = DAG.getNode(ISD::FMA, SL, MVT::f64, Fma1, Fma2, Fma1);
+  SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, DivScale1, Fma3);
+
+  SDValue Fma4 = DAG.getNode(ISD::FMA, SL, MVT::f64,
+                             NegDivScale0, Mul, DivScale1);
+
+  SDValue Scale;
+
+  if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+    // Workaround a hardware bug on SI where the condition output from div_scale
+    // is not usable.
+
+    const SDValue Hi = DAG.getConstant(1, SL, MVT::i32);
+
+    // Figure out if the scale to use for div_fmas.
+    SDValue NumBC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, X);
+    SDValue DenBC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Y);
+    SDValue Scale0BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, DivScale0);
+    SDValue Scale1BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, DivScale1);
+
+    SDValue NumHi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, NumBC, Hi);
+    SDValue DenHi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, DenBC, Hi);
+
+    SDValue Scale0Hi
+      = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Scale0BC, Hi);
+    SDValue Scale1Hi
+      = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Scale1BC, Hi);
+
+    SDValue CmpDen = DAG.getSetCC(SL, MVT::i1, DenHi, Scale0Hi, ISD::SETEQ);
+    SDValue CmpNum = DAG.getSetCC(SL, MVT::i1, NumHi, Scale1Hi, ISD::SETEQ);
+    Scale = DAG.getNode(ISD::XOR, SL, MVT::i1, CmpNum, CmpDen);
+  } else {
+    Scale = DivScale1.getValue(1);
+  }
+
+  SDValue Fmas = DAG.getNode(AMDGPUISD::DIV_FMAS, SL, MVT::f64,
+                             Fma4, Fma3, Mul, Scale);
+
+  return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f64, Fmas, Y, X);
+}
+
+SDValue SITargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+
+  if (VT == MVT::f32)
+    return LowerFDIV32(Op, DAG);
+
+  if (VT == MVT::f64)
+    return LowerFDIV64(Op, DAG);
+
+  llvm_unreachable("Unexpected type for fdiv");
+}
+
+SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  StoreSDNode *Store = cast<StoreSDNode>(Op);
+  EVT VT = Store->getMemoryVT();
+
+  // These stores are legal.
+  if (Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
+    if (VT.isVector() && VT.getVectorNumElements() > 4)
+      return ScalarizeVectorStore(Op, DAG);
+    return SDValue();
+  }
+
+  SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
+  if (Ret.getNode())
+    return Ret;
+
+  if (VT.isVector() && VT.getVectorNumElements() >= 8)
+      return ScalarizeVectorStore(Op, DAG);
+
+  if (VT == MVT::i1)
+    return DAG.getTruncStore(Store->getChain(), DL,
+                        DAG.getSExtOrTrunc(Store->getValue(), DL, MVT::i32),
+                        Store->getBasePtr(), MVT::i1, Store->getMemOperand());
+
+  return SDValue();
+}
+
+SDValue SITargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  EVT VT = Op.getValueType();
+  SDValue Arg = Op.getOperand(0);
+  SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
+                                  DAG.getNode(ISD::FMUL, DL, VT, Arg,
+                                              DAG.getConstantFP(0.5/M_PI, DL,
+                                                                VT)));
+
+  switch (Op.getOpcode()) {
+  case ISD::FCOS:
+    return DAG.getNode(AMDGPUISD::COS_HW, SDLoc(Op), VT, FractPart);
+  case ISD::FSIN:
+    return DAG.getNode(AMDGPUISD::SIN_HW, SDLoc(Op), VT, FractPart);
+  default:
+    llvm_unreachable("Wrong trig opcode");
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Custom DAG optimizations
+//===----------------------------------------------------------------------===//
+
+SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N,
+                                                     DAGCombinerInfo &DCI) const {
+  EVT VT = N->getValueType(0);
+  EVT ScalarVT = VT.getScalarType();
+  if (ScalarVT != MVT::f32)
+    return SDValue();
+
+  SelectionDAG &DAG = DCI.DAG;
+  SDLoc DL(N);
+
+  SDValue Src = N->getOperand(0);
+  EVT SrcVT = Src.getValueType();
+
+  // TODO: We could try to match extracting the higher bytes, which would be
+  // easier if i8 vectors weren't promoted to i32 vectors, particularly after
+  // types are legalized. v4i8 -> v4f32 is probably the only case to worry
+  // about in practice.
+  if (DCI.isAfterLegalizeVectorOps() && SrcVT == MVT::i32) {
+    if (DAG.MaskedValueIsZero(Src, APInt::getHighBitsSet(32, 24))) {
+      SDValue Cvt = DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0, DL, VT, Src);
+      DCI.AddToWorklist(Cvt.getNode());
+      return Cvt;
+    }
+  }
+
+  // We are primarily trying to catch operations on illegal vector types
+  // before they are expanded.
+  // For scalars, we can use the more flexible method of checking masked bits
+  // after legalization.
+  if (!DCI.isBeforeLegalize() ||
+      !SrcVT.isVector() ||
+      SrcVT.getVectorElementType() != MVT::i8) {
+    return SDValue();
+  }
+
+  assert(DCI.isBeforeLegalize() && "Unexpected legal type");
+
+  // Weird sized vectors are a pain to handle, but we know 3 is really the same
+  // size as 4.
+  unsigned NElts = SrcVT.getVectorNumElements();
+  if (!SrcVT.isSimple() && NElts != 3)
+    return SDValue();
+
+  // Handle v4i8 -> v4f32 extload. Replace the v4i8 with a legal i32 load to
+  // prevent a mess from expanding to v4i32 and repacking.
+  if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse()) {
+    EVT LoadVT = getEquivalentMemType(*DAG.getContext(), SrcVT);
+    EVT RegVT = getEquivalentLoadRegType(*DAG.getContext(), SrcVT);
+    EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f32, NElts);
+    LoadSDNode *Load = cast<LoadSDNode>(Src);
+
+    unsigned AS = Load->getAddressSpace();
+    unsigned Align = Load->getAlignment();
+    Type *Ty = LoadVT.getTypeForEVT(*DAG.getContext());
+    unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
+
+    // Don't try to replace the load if we have to expand it due to alignment
+    // problems. Otherwise we will end up scalarizing the load, and trying to
+    // repack into the vector for no real reason.
+    if (Align < ABIAlignment &&
+        !allowsMisalignedMemoryAccesses(LoadVT, AS, Align, nullptr)) {
+      return SDValue();
+    }
+
+    SDValue NewLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, RegVT,
+                                     Load->getChain(),
+                                     Load->getBasePtr(),
+                                     LoadVT,
+                                     Load->getMemOperand());
+
+    // Make sure successors of the original load stay after it by updating
+    // them to use the new Chain.
+    DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), NewLoad.getValue(1));
+
+    SmallVector<SDValue, 4> Elts;
+    if (RegVT.isVector())
+      DAG.ExtractVectorElements(NewLoad, Elts);
+    else
+      Elts.push_back(NewLoad);
+
+    SmallVector<SDValue, 4> Ops;
+
+    unsigned EltIdx = 0;
+    for (SDValue Elt : Elts) {
+      unsigned ComponentsInElt = std::min(4u, NElts - 4 * EltIdx);
+      for (unsigned I = 0; I < ComponentsInElt; ++I) {
+        unsigned Opc = AMDGPUISD::CVT_F32_UBYTE0 + I;
+        SDValue Cvt = DAG.getNode(Opc, DL, MVT::f32, Elt);
+        DCI.AddToWorklist(Cvt.getNode());
+        Ops.push_back(Cvt);
+      }
+
+      ++EltIdx;
+    }
+
+    assert(Ops.size() == NElts);
+
+    return DAG.getNode(ISD::BUILD_VECTOR, DL, FloatVT, Ops);
+  }
+
+  return SDValue();
+}
+
+/// \brief Return true if the given offset Size in bytes can be folded into
+/// the immediate offsets of a memory instruction for the given address space.
+static bool canFoldOffset(unsigned OffsetSize, unsigned AS,
+                          const AMDGPUSubtarget &STI) {
+  switch (AS) {
+  case AMDGPUAS::GLOBAL_ADDRESS: {
+    // MUBUF instructions a 12-bit offset in bytes.
+    return isUInt<12>(OffsetSize);
+  }
+  case AMDGPUAS::CONSTANT_ADDRESS: {
+    // SMRD instructions have an 8-bit offset in dwords on SI and
+    // a 20-bit offset in bytes on VI.
+    if (STI.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+      return isUInt<20>(OffsetSize);
+    else
+      return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4);
+  }
+  case AMDGPUAS::LOCAL_ADDRESS:
+  case AMDGPUAS::REGION_ADDRESS: {
+    // The single offset versions have a 16-bit offset in bytes.
+    return isUInt<16>(OffsetSize);
+  }
+  case AMDGPUAS::PRIVATE_ADDRESS:
+  // Indirect register addressing does not use any offsets.
+  default:
+    return 0;
+  }
+}
+
+// (shl (add x, c1), c2) -> add (shl x, c2), (shl c1, c2)
+
+// This is a variant of
+// (mul (add x, c1), c2) -> add (mul x, c2), (mul c1, c2),
+//
+// The normal DAG combiner will do this, but only if the add has one use since
+// that would increase the number of instructions.
+//
+// This prevents us from seeing a constant offset that can be folded into a
+// memory instruction's addressing mode. If we know the resulting add offset of
+// a pointer can be folded into an addressing offset, we can replace the pointer
+// operand with the add of new constant offset. This eliminates one of the uses,
+// and may allow the remaining use to also be simplified.
+//
+SDValue SITargetLowering::performSHLPtrCombine(SDNode *N,
+                                               unsigned AddrSpace,
+                                               DAGCombinerInfo &DCI) const {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  if (N0.getOpcode() != ISD::ADD)
+    return SDValue();
+
+  const ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N1);
+  if (!CN1)
+    return SDValue();
+
+  const ConstantSDNode *CAdd = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+  if (!CAdd)
+    return SDValue();
+
+  // If the resulting offset is too large, we can't fold it into the addressing
+  // mode offset.
+  APInt Offset = CAdd->getAPIntValue() << CN1->getAPIntValue();
+  if (!canFoldOffset(Offset.getZExtValue(), AddrSpace, *Subtarget))
+    return SDValue();
+
+  SelectionDAG &DAG = DCI.DAG;
+  SDLoc SL(N);
+  EVT VT = N->getValueType(0);
+
+  SDValue ShlX = DAG.getNode(ISD::SHL, SL, VT, N0.getOperand(0), N1);
+  SDValue COffset = DAG.getConstant(Offset, SL, MVT::i32);
+
+  return DAG.getNode(ISD::ADD, SL, VT, ShlX, COffset);
+}
+
+SDValue SITargetLowering::performAndCombine(SDNode *N,
+                                            DAGCombinerInfo &DCI) const {
+  if (DCI.isBeforeLegalize())
+    return SDValue();
+
+  SelectionDAG &DAG = DCI.DAG;
+
+  // (and (fcmp ord x, x), (fcmp une (fabs x), inf)) ->
+  // fp_class x, ~(s_nan | q_nan | n_infinity | p_infinity)
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+
+  if (LHS.getOpcode() == ISD::SETCC &&
+      RHS.getOpcode() == ISD::SETCC) {
+    ISD::CondCode LCC = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
+    ISD::CondCode RCC = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
+
+    SDValue X = LHS.getOperand(0);
+    SDValue Y = RHS.getOperand(0);
+    if (Y.getOpcode() != ISD::FABS || Y.getOperand(0) != X)
+      return SDValue();
+
+    if (LCC == ISD::SETO) {
+      if (X != LHS.getOperand(1))
+        return SDValue();
+
+      if (RCC == ISD::SETUNE) {
+        const ConstantFPSDNode *C1 = dyn_cast<ConstantFPSDNode>(RHS.getOperand(1));
+        if (!C1 || !C1->isInfinity() || C1->isNegative())
+          return SDValue();
+
+        const uint32_t Mask = SIInstrFlags::N_NORMAL |
+                              SIInstrFlags::N_SUBNORMAL |
+                              SIInstrFlags::N_ZERO |
+                              SIInstrFlags::P_ZERO |
+                              SIInstrFlags::P_SUBNORMAL |
+                              SIInstrFlags::P_NORMAL;
+
+        static_assert(((~(SIInstrFlags::S_NAN |
+                          SIInstrFlags::Q_NAN |
+                          SIInstrFlags::N_INFINITY |
+                          SIInstrFlags::P_INFINITY)) & 0x3ff) == Mask,
+                      "mask not equal");
+
+        SDLoc DL(N);
+        return DAG.getNode(AMDGPUISD::FP_CLASS, DL, MVT::i1,
+                           X, DAG.getConstant(Mask, DL, MVT::i32));
+      }
+    }
+  }
+
+  return SDValue();
+}
+
+SDValue SITargetLowering::performOrCombine(SDNode *N,
+                                           DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+
+  // or (fp_class x, c1), (fp_class x, c2) -> fp_class x, (c1 | c2)
+  if (LHS.getOpcode() == AMDGPUISD::FP_CLASS &&
+      RHS.getOpcode() == AMDGPUISD::FP_CLASS) {
+    SDValue Src = LHS.getOperand(0);
+    if (Src != RHS.getOperand(0))
+      return SDValue();
+
+    const ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
+    const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS.getOperand(1));
+    if (!CLHS || !CRHS)
+      return SDValue();
+
+    // Only 10 bits are used.
+    static const uint32_t MaxMask = 0x3ff;
+
+    uint32_t NewMask = (CLHS->getZExtValue() | CRHS->getZExtValue()) & MaxMask;
+    SDLoc DL(N);
+    return DAG.getNode(AMDGPUISD::FP_CLASS, DL, MVT::i1,
+                       Src, DAG.getConstant(NewMask, DL, MVT::i32));
+  }
+
+  return SDValue();
+}
+
+SDValue SITargetLowering::performClassCombine(SDNode *N,
+                                              DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+  SDValue Mask = N->getOperand(1);
+
+  // fp_class x, 0 -> false
+  if (const ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(Mask)) {
+    if (CMask->isNullValue())
+      return DAG.getConstant(0, SDLoc(N), MVT::i1);
+  }
+
+  return SDValue();
+}
+
+static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) {
+  switch (Opc) {
+  case ISD::FMAXNUM:
+    return AMDGPUISD::FMAX3;
+  case ISD::SMAX:
+    return AMDGPUISD::SMAX3;
+  case ISD::UMAX:
+    return AMDGPUISD::UMAX3;
+  case ISD::FMINNUM:
+    return AMDGPUISD::FMIN3;
+  case ISD::SMIN:
+    return AMDGPUISD::SMIN3;
+  case ISD::UMIN:
+    return AMDGPUISD::UMIN3;
+  default:
+    llvm_unreachable("Not a min/max opcode");
+  }
+}
+
+SDValue SITargetLowering::performMin3Max3Combine(SDNode *N,
+                                                 DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+
+  unsigned Opc = N->getOpcode();
+  SDValue Op0 = N->getOperand(0);
+  SDValue Op1 = N->getOperand(1);
+
+  // Only do this if the inner op has one use since this will just increases
+  // register pressure for no benefit.
+
+  // max(max(a, b), c)
+  if (Op0.getOpcode() == Opc && Op0.hasOneUse()) {
+    SDLoc DL(N);
+    return DAG.getNode(minMaxOpcToMin3Max3Opc(Opc),
+                       DL,
+                       N->getValueType(0),
+                       Op0.getOperand(0),
+                       Op0.getOperand(1),
+                       Op1);
+  }
+
+  // max(a, max(b, c))
+  if (Op1.getOpcode() == Opc && Op1.hasOneUse()) {
+    SDLoc DL(N);
+    return DAG.getNode(minMaxOpcToMin3Max3Opc(Opc),
+                       DL,
+                       N->getValueType(0),
+                       Op0,
+                       Op1.getOperand(0),
+                       Op1.getOperand(1));
+  }
+
+  return SDValue();
+}
+
+SDValue SITargetLowering::performSetCCCombine(SDNode *N,
+                                              DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+  SDLoc SL(N);
+
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+  EVT VT = LHS.getValueType();
+
+  if (VT != MVT::f32 && VT != MVT::f64)
+    return SDValue();
+
+  // Match isinf pattern
+  // (fcmp oeq (fabs x), inf) -> (fp_class x, (p_infinity | n_infinity))
+  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+  if (CC == ISD::SETOEQ && LHS.getOpcode() == ISD::FABS) {
+    const ConstantFPSDNode *CRHS = dyn_cast<ConstantFPSDNode>(RHS);
+    if (!CRHS)
+      return SDValue();
+
+    const APFloat &APF = CRHS->getValueAPF();
+    if (APF.isInfinity() && !APF.isNegative()) {
+      unsigned Mask = SIInstrFlags::P_INFINITY | SIInstrFlags::N_INFINITY;
+      return DAG.getNode(AMDGPUISD::FP_CLASS, SL, MVT::i1, LHS.getOperand(0),
+                         DAG.getConstant(Mask, SL, MVT::i32));
+    }
+  }
+
+  return SDValue();
+}
+
+SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
+                                            DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+  SDLoc DL(N);
+
+  switch (N->getOpcode()) {
+  default:
+    return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
+  case ISD::SETCC:
+    return performSetCCCombine(N, DCI);
+  case ISD::FMAXNUM: // TODO: What about fmax_legacy?
+  case ISD::FMINNUM:
+  case ISD::SMAX:
+  case ISD::SMIN:
+  case ISD::UMAX:
+  case ISD::UMIN: {
+    if (DCI.getDAGCombineLevel() >= AfterLegalizeDAG &&
+        N->getValueType(0) != MVT::f64 &&
+        getTargetMachine().getOptLevel() > CodeGenOpt::None)
+      return performMin3Max3Combine(N, DCI);
+    break;
+  }
+
+  case AMDGPUISD::CVT_F32_UBYTE0:
+  case AMDGPUISD::CVT_F32_UBYTE1:
+  case AMDGPUISD::CVT_F32_UBYTE2:
+  case AMDGPUISD::CVT_F32_UBYTE3: {
+    unsigned Offset = N->getOpcode() - AMDGPUISD::CVT_F32_UBYTE0;
+
+    SDValue Src = N->getOperand(0);
+    APInt Demanded = APInt::getBitsSet(32, 8 * Offset, 8 * Offset + 8);
+
+    APInt KnownZero, KnownOne;
+    TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+                                          !DCI.isBeforeLegalizeOps());
+    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+    if (TLO.ShrinkDemandedConstant(Src, Demanded) ||
+        TLI.SimplifyDemandedBits(Src, Demanded, KnownZero, KnownOne, TLO)) {
+      DCI.CommitTargetLoweringOpt(TLO);
+    }
+
+    break;
+  }
+
+  case ISD::UINT_TO_FP: {
+    return performUCharToFloatCombine(N, DCI);
+
+  case ISD::FADD: {
+    if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
+      break;
+
+    EVT VT = N->getValueType(0);
+    if (VT != MVT::f32)
+      break;
+
+    // Only do this if we are not trying to support denormals. v_mad_f32 does
+    // not support denormals ever.
+    if (Subtarget->hasFP32Denormals())
+      break;
+
+    SDValue LHS = N->getOperand(0);
+    SDValue RHS = N->getOperand(1);
+
+    // These should really be instruction patterns, but writing patterns with
+    // source modiifiers is a pain.
+
+    // fadd (fadd (a, a), b) -> mad 2.0, a, b
+    if (LHS.getOpcode() == ISD::FADD) {
+      SDValue A = LHS.getOperand(0);
+      if (A == LHS.getOperand(1)) {
+        const SDValue Two = DAG.getConstantFP(2.0, DL, MVT::f32);
+        return DAG.getNode(ISD::FMAD, DL, VT, Two, A, RHS);
+      }
+    }
+
+    // fadd (b, fadd (a, a)) -> mad 2.0, a, b
+    if (RHS.getOpcode() == ISD::FADD) {
+      SDValue A = RHS.getOperand(0);
+      if (A == RHS.getOperand(1)) {
+        const SDValue Two = DAG.getConstantFP(2.0, DL, MVT::f32);
+        return DAG.getNode(ISD::FMAD, DL, VT, Two, A, LHS);
+      }
+    }
+
+    return SDValue();
+  }
+  case ISD::FSUB: {
+    if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
+      break;
+
+    EVT VT = N->getValueType(0);
+
+    // Try to get the fneg to fold into the source modifier. This undoes generic
+    // DAG combines and folds them into the mad.
+    //
+    // Only do this if we are not trying to support denormals. v_mad_f32 does
+    // not support denormals ever.
+    if (VT == MVT::f32 &&
+        !Subtarget->hasFP32Denormals()) {
+      SDValue LHS = N->getOperand(0);
+      SDValue RHS = N->getOperand(1);
+      if (LHS.getOpcode() == ISD::FADD) {
+        // (fsub (fadd a, a), c) -> mad 2.0, a, (fneg c)
+
+        SDValue A = LHS.getOperand(0);
+        if (A == LHS.getOperand(1)) {
+          const SDValue Two = DAG.getConstantFP(2.0, DL, MVT::f32);
+          SDValue NegRHS = DAG.getNode(ISD::FNEG, DL, VT, RHS);
+
+          return DAG.getNode(ISD::FMAD, DL, VT, Two, A, NegRHS);
+        }
+      }
+
+      if (RHS.getOpcode() == ISD::FADD) {
+        // (fsub c, (fadd a, a)) -> mad -2.0, a, c
+
+        SDValue A = RHS.getOperand(0);
+        if (A == RHS.getOperand(1)) {
+          const SDValue NegTwo = DAG.getConstantFP(-2.0, DL, MVT::f32);
+          return DAG.getNode(ISD::FMAD, DL, VT, NegTwo, A, LHS);
+        }
+      }
+
+      return SDValue();
+    }
+
+    break;
+  }
+  }
+  case ISD::LOAD:
+  case ISD::STORE:
+  case ISD::ATOMIC_LOAD:
+  case ISD::ATOMIC_STORE:
+  case ISD::ATOMIC_CMP_SWAP:
+  case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
+  case ISD::ATOMIC_SWAP:
+  case ISD::ATOMIC_LOAD_ADD:
+  case ISD::ATOMIC_LOAD_SUB:
+  case ISD::ATOMIC_LOAD_AND:
+  case ISD::ATOMIC_LOAD_OR:
+  case ISD::ATOMIC_LOAD_XOR:
+  case ISD::ATOMIC_LOAD_NAND:
+  case ISD::ATOMIC_LOAD_MIN:
+  case ISD::ATOMIC_LOAD_MAX:
+  case ISD::ATOMIC_LOAD_UMIN:
+  case ISD::ATOMIC_LOAD_UMAX: { // TODO: Target mem intrinsics.
+    if (DCI.isBeforeLegalize())
+      break;
+
+    MemSDNode *MemNode = cast<MemSDNode>(N);
+    SDValue Ptr = MemNode->getBasePtr();
+
+    // TODO: We could also do this for multiplies.
+    unsigned AS = MemNode->getAddressSpace();
+    if (Ptr.getOpcode() == ISD::SHL && AS != AMDGPUAS::PRIVATE_ADDRESS) {
+      SDValue NewPtr = performSHLPtrCombine(Ptr.getNode(), AS, DCI);
+      if (NewPtr) {
+        SmallVector<SDValue, 8> NewOps(MemNode->op_begin(), MemNode->op_end());
+
+        NewOps[N->getOpcode() == ISD::STORE ? 2 : 1] = NewPtr;
+        return SDValue(DAG.UpdateNodeOperands(MemNode, NewOps), 0);
+      }
+    }
+    break;
+  }
+  case ISD::AND:
+    return performAndCombine(N, DCI);
+  case ISD::OR:
+    return performOrCombine(N, DCI);
+  case AMDGPUISD::FP_CLASS:
+    return performClassCombine(N, DCI);
+  }
+  return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
+}
+
+/// \brief Analyze the possible immediate value Op
+///
+/// Returns -1 if it isn't an immediate, 0 if it's and inline immediate
+/// and the immediate value if it's a literal immediate
+int32_t SITargetLowering::analyzeImmediate(const SDNode *N) const {
+
+  const SIInstrInfo *TII =
+      static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
+
+  if (const ConstantSDNode *Node = dyn_cast<ConstantSDNode>(N)) {
+    if (TII->isInlineConstant(Node->getAPIntValue()))
+      return 0;
+
+    uint64_t Val = Node->getZExtValue();
+    return isUInt<32>(Val) ? Val : -1;
+  }
+
+  if (const ConstantFPSDNode *Node = dyn_cast<ConstantFPSDNode>(N)) {
+    if (TII->isInlineConstant(Node->getValueAPF().bitcastToAPInt()))
+      return 0;
+
+    if (Node->getValueType(0) == MVT::f32)
+      return FloatToBits(Node->getValueAPF().convertToFloat());
+
+    return -1;
+  }
+
+  return -1;
+}
+
+/// \brief Helper function for adjustWritemask
+static unsigned SubIdx2Lane(unsigned Idx) {
+  switch (Idx) {
+  default: return 0;
+  case AMDGPU::sub0: return 0;
+  case AMDGPU::sub1: return 1;
+  case AMDGPU::sub2: return 2;
+  case AMDGPU::sub3: return 3;
+  }
+}
+
+/// \brief Adjust the writemask of MIMG instructions
+void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
+                                       SelectionDAG &DAG) const {
+  SDNode *Users[4] = { };
+  unsigned Lane = 0;
+  unsigned OldDmask = Node->getConstantOperandVal(0);
+  unsigned NewDmask = 0;
+
+  // Try to figure out the used register components
+  for (SDNode::use_iterator I = Node->use_begin(), E = Node->use_end();
+       I != E; ++I) {
+
+    // Abort if we can't understand the usage
+    if (!I->isMachineOpcode() ||
+        I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
+      return;
+
+    // Lane means which subreg of %VGPRa_VGPRb_VGPRc_VGPRd is used.
+    // Note that subregs are packed, i.e. Lane==0 is the first bit set
+    // in OldDmask, so it can be any of X,Y,Z,W; Lane==1 is the second bit
+    // set, etc.
+    Lane = SubIdx2Lane(I->getConstantOperandVal(1));
+
+    // Set which texture component corresponds to the lane.
+    unsigned Comp;
+    for (unsigned i = 0, Dmask = OldDmask; i <= Lane; i++) {
+      assert(Dmask);
+      Comp = countTrailingZeros(Dmask);
+      Dmask &= ~(1 << Comp);
+    }
+
+    // Abort if we have more than one user per component
+    if (Users[Lane])
+      return;
+
+    Users[Lane] = *I;
+    NewDmask |= 1 << Comp;
+  }
+
+  // Abort if there's no change
+  if (NewDmask == OldDmask)
+    return;
+
+  // Adjust the writemask in the node
+  std::vector<SDValue> Ops;
+  Ops.push_back(DAG.getTargetConstant(NewDmask, SDLoc(Node), MVT::i32));
+  Ops.insert(Ops.end(), Node->op_begin() + 1, Node->op_end());
+  Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops);
+
+  // If we only got one lane, replace it with a copy
+  // (if NewDmask has only one bit set...)
+  if (NewDmask && (NewDmask & (NewDmask-1)) == 0) {
+    SDValue RC = DAG.getTargetConstant(AMDGPU::VGPR_32RegClassID, SDLoc(),
+                                       MVT::i32);
+    SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+                                      SDLoc(), Users[Lane]->getValueType(0),
+                                      SDValue(Node, 0), RC);
+    DAG.ReplaceAllUsesWith(Users[Lane], Copy);
+    return;
+  }
+
+  // Update the users of the node with the new indices
+  for (unsigned i = 0, Idx = AMDGPU::sub0; i < 4; ++i) {
+
+    SDNode *User = Users[i];
+    if (!User)
+      continue;
+
+    SDValue Op = DAG.getTargetConstant(Idx, SDLoc(User), MVT::i32);
+    DAG.UpdateNodeOperands(User, User->getOperand(0), Op);
+
+    switch (Idx) {
+    default: break;
+    case AMDGPU::sub0: Idx = AMDGPU::sub1; break;
+    case AMDGPU::sub1: Idx = AMDGPU::sub2; break;
+    case AMDGPU::sub2: Idx = AMDGPU::sub3; break;
+    }
+  }
+}
+
+/// \brief Legalize target independent instructions (e.g. INSERT_SUBREG)
+/// with frame index operands.
+/// LLVM assumes that inputs are to these instructions are registers.
+void SITargetLowering::legalizeTargetIndependentNode(SDNode *Node,
+                                                     SelectionDAG &DAG) const {
+
+  SmallVector<SDValue, 8> Ops;
+  for (unsigned i = 0; i < Node->getNumOperands(); ++i) {
+    if (!isa<FrameIndexSDNode>(Node->getOperand(i))) {
+      Ops.push_back(Node->getOperand(i));
+      continue;
+    }
+
+    SDLoc DL(Node);
+    Ops.push_back(SDValue(DAG.getMachineNode(AMDGPU::S_MOV_B32, DL,
+                                     Node->getOperand(i).getValueType(),
+                                     Node->getOperand(i)), 0));
+  }
+
+  DAG.UpdateNodeOperands(Node, Ops);
+}
+
+/// \brief Fold the instructions after selecting them.
+SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
+                                          SelectionDAG &DAG) const {
+  const SIInstrInfo *TII =
+      static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
+
+  if (TII->isMIMG(Node->getMachineOpcode()))
+    adjustWritemask(Node, DAG);
+
+  if (Node->getMachineOpcode() == AMDGPU::INSERT_SUBREG ||
+      Node->getMachineOpcode() == AMDGPU::REG_SEQUENCE) {
+    legalizeTargetIndependentNode(Node, DAG);
+    return Node;
+  }
+  return Node;
+}
+
+/// \brief Assign the register class depending on the number of
+/// bits set in the writemask
+void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
+                                                     SDNode *Node) const {
+  const SIInstrInfo *TII =
+      static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
+
+  MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+  TII->legalizeOperands(MI);
+
+  if (TII->isMIMG(MI->getOpcode())) {
+    unsigned VReg = MI->getOperand(0).getReg();
+    unsigned Writemask = MI->getOperand(1).getImm();
+    unsigned BitsSet = 0;
+    for (unsigned i = 0; i < 4; ++i)
+      BitsSet += Writemask & (1 << i) ? 1 : 0;
+
+    const TargetRegisterClass *RC;
+    switch (BitsSet) {
+    default: return;
+    case 1:  RC = &AMDGPU::VGPR_32RegClass; break;
+    case 2:  RC = &AMDGPU::VReg_64RegClass; break;
+    case 3:  RC = &AMDGPU::VReg_96RegClass; break;
+    }
+
+    unsigned NewOpcode = TII->getMaskedMIMGOp(MI->getOpcode(), BitsSet);
+    MI->setDesc(TII->get(NewOpcode));
+    MRI.setRegClass(VReg, RC);
+    return;
+  }
+
+  // Replace unused atomics with the no return version.
+  int NoRetAtomicOp = AMDGPU::getAtomicNoRetOp(MI->getOpcode());
+  if (NoRetAtomicOp != -1) {
+    if (!Node->hasAnyUseOfValue(0)) {
+      MI->setDesc(TII->get(NoRetAtomicOp));
+      MI->RemoveOperand(0);
+    }
+
+    return;
+  }
+}
+
+static SDValue buildSMovImm32(SelectionDAG &DAG, SDLoc DL, uint64_t Val) {
+  SDValue K = DAG.getTargetConstant(Val, DL, MVT::i32);
+  return SDValue(DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, K), 0);
+}
+
+MachineSDNode *SITargetLowering::wrapAddr64Rsrc(SelectionDAG &DAG,
+                                                SDLoc DL,
+                                                SDValue Ptr) const {
+  const SIInstrInfo *TII =
+      static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
+#if 1
+    // XXX - Workaround for moveToVALU not handling different register class
+    // inserts for REG_SEQUENCE.
+
+    // Build the half of the subregister with the constants.
+    const SDValue Ops0[] = {
+      DAG.getTargetConstant(AMDGPU::SGPR_64RegClassID, DL, MVT::i32),
+      buildSMovImm32(DAG, DL, 0),
+      DAG.getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
+      buildSMovImm32(DAG, DL, TII->getDefaultRsrcDataFormat() >> 32),
+      DAG.getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
+    };
+
+    SDValue SubRegHi = SDValue(DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL,
+                                                  MVT::v2i32, Ops0), 0);
+
+    // Combine the constants and the pointer.
+    const SDValue Ops1[] = {
+      DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32),
+      Ptr,
+      DAG.getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32),
+      SubRegHi,
+      DAG.getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32)
+    };
+
+    return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops1);
+#else
+    const SDValue Ops[] = {
+      DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32),
+      Ptr,
+      DAG.getTargetConstant(AMDGPU::sub0_sub1, MVT::i32),
+      buildSMovImm32(DAG, DL, 0),
+      DAG.getTargetConstant(AMDGPU::sub2, MVT::i32),
+      buildSMovImm32(DAG, DL, TII->getDefaultRsrcFormat() >> 32),
+      DAG.getTargetConstant(AMDGPU::sub3, MVT::i32)
+    };
+
+    return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops);
+
+#endif
+}
+
+/// \brief Return a resource descriptor with the 'Add TID' bit enabled
+///        The TID (Thread ID) is multipled by the stride value (bits [61:48]
+///        of the resource descriptor) to create an offset, which is added to the
+///        resource ponter.
+MachineSDNode *SITargetLowering::buildRSRC(SelectionDAG &DAG,
+                                           SDLoc DL,
+                                           SDValue Ptr,
+                                           uint32_t RsrcDword1,
+                                           uint64_t RsrcDword2And3) const {
+  SDValue PtrLo = DAG.getTargetExtractSubreg(AMDGPU::sub0, DL, MVT::i32, Ptr);
+  SDValue PtrHi = DAG.getTargetExtractSubreg(AMDGPU::sub1, DL, MVT::i32, Ptr);
+  if (RsrcDword1) {
+    PtrHi = SDValue(DAG.getMachineNode(AMDGPU::S_OR_B32, DL, MVT::i32, PtrHi,
+                                     DAG.getConstant(RsrcDword1, DL, MVT::i32)),
+                    0);
+  }
+
+  SDValue DataLo = buildSMovImm32(DAG, DL,
+                                  RsrcDword2And3 & UINT64_C(0xFFFFFFFF));
+  SDValue DataHi = buildSMovImm32(DAG, DL, RsrcDword2And3 >> 32);
+
+  const SDValue Ops[] = {
+    DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32),
+    PtrLo,
+    DAG.getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
+    PtrHi,
+    DAG.getTargetConstant(AMDGPU::sub1, DL, MVT::i32),
+    DataLo,
+    DAG.getTargetConstant(AMDGPU::sub2, DL, MVT::i32),
+    DataHi,
+    DAG.getTargetConstant(AMDGPU::sub3, DL, MVT::i32)
+  };
+
+  return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops);
+}
+
+MachineSDNode *SITargetLowering::buildScratchRSRC(SelectionDAG &DAG,
+                                                  SDLoc DL,
+                                                  SDValue Ptr) const {
+  const SIInstrInfo *TII =
+      static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
+  uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | AMDGPU::RSRC_TID_ENABLE |
+                  0xffffffff; // Size
+
+  return buildRSRC(DAG, DL, Ptr, 0, Rsrc);
+}
+
+SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
+                                               const TargetRegisterClass *RC,
+                                               unsigned Reg, EVT VT) const {
+  SDValue VReg = AMDGPUTargetLowering::CreateLiveInRegister(DAG, RC, Reg, VT);
+
+  return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(DAG.getEntryNode()),
+                            cast<RegisterSDNode>(VReg)->getReg(), VT);
+}
+
+//===----------------------------------------------------------------------===//
+//                         SI Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+std::pair<unsigned, const TargetRegisterClass *>
+SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+                                               const std::string &Constraint,
+                                               MVT VT) const {
+  if (Constraint == "r") {
+    switch(VT.SimpleTy) {
+      default: llvm_unreachable("Unhandled type for 'r' inline asm constraint");
+      case MVT::i64:
+        return std::make_pair(0U, &AMDGPU::SGPR_64RegClass);
+      case MVT::i32:
+        return std::make_pair(0U, &AMDGPU::SGPR_32RegClass);
+    }
+  }
+
+  if (Constraint.size() > 1) {
+    const TargetRegisterClass *RC = nullptr;
+    if (Constraint[1] == 'v') {
+      RC = &AMDGPU::VGPR_32RegClass;
+    } else if (Constraint[1] == 's') {
+      RC = &AMDGPU::SGPR_32RegClass;
+    }
+
+    if (RC) {
+      unsigned Idx = std::atoi(Constraint.substr(2).c_str());
+      if (Idx < RC->getNumRegs())
+        return std::make_pair(RC->getRegister(Idx), RC);
+    }
+  }
+  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h
new file mode 100644
index 0000000..a956b01
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -0,0 +1,125 @@
+//===-- SIISelLowering.h - SI DAG Lowering Interface ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief SI DAG Lowering interface definition
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_R600_SIISELLOWERING_H
+#define LLVM_LIB_TARGET_R600_SIISELLOWERING_H
+
+#include "AMDGPUISelLowering.h"
+#include "SIInstrInfo.h"
+
+namespace llvm {
+
+class SITargetLowering : public AMDGPUTargetLowering {
+  SDValue LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, SDLoc DL,
+                         SDValue Chain, unsigned Offset, bool Signed) const;
+  SDValue LowerSampleIntrinsic(unsigned Opcode, const SDValue &Op,
+                               SelectionDAG &DAG) const;
+  SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
+                             SelectionDAG &DAG) const override;
+
+  SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFastFDIV(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFDIV32(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFDIV64(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, bool Signed) const;
+  SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+
+  void adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;
+
+  SDValue performUCharToFloatCombine(SDNode *N,
+                                     DAGCombinerInfo &DCI) const;
+  SDValue performSHLPtrCombine(SDNode *N,
+                               unsigned AS,
+                               DAGCombinerInfo &DCI) const;
+  SDValue performAndCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue performOrCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue performClassCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+  SDValue performMin3Max3Combine(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+public:
+  SITargetLowering(TargetMachine &tm, const AMDGPUSubtarget &STI);
+
+  bool isShuffleMaskLegal(const SmallVectorImpl<int> &/*Mask*/,
+                          EVT /*VT*/) const override;
+
+  bool isLegalAddressingMode(const AddrMode &AM,
+                             Type *Ty, unsigned AS) const override;
+
+  bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
+                                      unsigned Align,
+                                      bool *IsFast) const override;
+
+  EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
+                          unsigned SrcAlign, bool IsMemset,
+                          bool ZeroMemset,
+                          bool MemcpyStrSrc,
+                          MachineFunction &MF) const override;
+
+  TargetLoweringBase::LegalizeTypeAction
+  getPreferredVectorAction(EVT VT) const override;
+
+  bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
+                                        Type *Ty) const override;
+
+  SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
+                               bool isVarArg,
+                               const SmallVectorImpl<ISD::InputArg> &Ins,
+                               SDLoc DL, SelectionDAG &DAG,
+                               SmallVectorImpl<SDValue> &InVals) const override;
+
+  MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
+                                      MachineBasicBlock * BB) const override;
+  bool enableAggressiveFMAFusion(EVT VT) const override;
+  EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
+  MVT getScalarShiftAmountTy(EVT VT) const override;
+  bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
+  SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+  SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
+  SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override;
+  void AdjustInstrPostInstrSelection(MachineInstr *MI,
+                                     SDNode *Node) const override;
+
+  int32_t analyzeImmediate(const SDNode *N) const;
+  SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC,
+                               unsigned Reg, EVT VT) const override;
+  void legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const;
+
+  MachineSDNode *wrapAddr64Rsrc(SelectionDAG &DAG, SDLoc DL, SDValue Ptr) const;
+  MachineSDNode *buildRSRC(SelectionDAG &DAG,
+                           SDLoc DL,
+                           SDValue Ptr,
+                           uint32_t RsrcDword1,
+                           uint64_t RsrcDword2And3) const;
+  MachineSDNode *buildScratchRSRC(SelectionDAG &DAG,
+                                  SDLoc DL,
+                                  SDValue Ptr) const;
+
+  std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(
+                                   const TargetRegisterInfo *TRI,
+                                   const std::string &Constraint, MVT VT) const override;
+  SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL, SDValue V) const;
+};
+
+} // End namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
new file mode 100644
index 0000000..90a37f1
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
@@ -0,0 +1,480 @@
+//===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Insert wait instructions for memory reads and writes.
+///
+/// Memory reads and writes are issued asynchronously, so we need to insert
+/// S_WAITCNT instructions when we want to access any of their results or
+/// overwrite any register that's used asynchronously.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIDefines.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+/// \brief One variable for each of the hardware counters
+typedef union {
+  struct {
+    unsigned VM;
+    unsigned EXP;
+    unsigned LGKM;
+  } Named;
+  unsigned Array[3];
+
+} Counters;
+
+typedef enum {
+  OTHER,
+  SMEM,
+  VMEM
+} InstType;
+
+typedef Counters RegCounters[512];
+typedef std::pair<unsigned, unsigned> RegInterval;
+
+class SIInsertWaits : public MachineFunctionPass {
+
+private:
+  static char ID;
+  const SIInstrInfo *TII;
+  const SIRegisterInfo *TRI;
+  const MachineRegisterInfo *MRI;
+
+  /// \brief Constant hardware limits
+  static const Counters WaitCounts;
+
+  /// \brief Constant zero value
+  static const Counters ZeroCounts;
+
+  /// \brief Counter values we have already waited on.
+  Counters WaitedOn;
+
+  /// \brief Counter values for last instruction issued.
+  Counters LastIssued;
+
+  /// \brief Registers used by async instructions.
+  RegCounters UsedRegs;
+
+  /// \brief Registers defined by async instructions.
+  RegCounters DefinedRegs;
+
+  /// \brief Different export instruction types seen since last wait.
+  unsigned ExpInstrTypesSeen;
+
+  /// \brief Type of the last opcode.
+  InstType LastOpcodeType;
+
+  bool LastInstWritesM0;
+
+  /// \brief Get increment/decrement amount for this instruction.
+  Counters getHwCounts(MachineInstr &MI);
+
+  /// \brief Is operand relevant for async execution?
+  bool isOpRelevant(MachineOperand &Op);
+
+  /// \brief Get register interval an operand affects.
+  RegInterval getRegInterval(MachineOperand &Op);
+
+  /// \brief Handle instructions async components
+  void pushInstruction(MachineBasicBlock &MBB,
+                       MachineBasicBlock::iterator I);
+
+  /// \brief Insert the actual wait instruction
+  bool insertWait(MachineBasicBlock &MBB,
+                  MachineBasicBlock::iterator I,
+                  const Counters &Counts);
+
+  /// \brief Do we need def2def checks?
+  bool unorderedDefines(MachineInstr &MI);
+
+  /// \brief Resolve all operand dependencies to counter requirements
+  Counters handleOperands(MachineInstr &MI);
+
+  /// \brief Insert S_NOP between an instruction writing M0 and S_SENDMSG.
+  void handleSendMsg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
+
+public:
+  SIInsertWaits(TargetMachine &tm) :
+    MachineFunctionPass(ID),
+    TII(nullptr),
+    TRI(nullptr),
+    ExpInstrTypesSeen(0) { }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  const char *getPassName() const override {
+    return "SI insert wait  instructions";
+  }
+
+};
+
+} // End anonymous namespace
+
+char SIInsertWaits::ID = 0;
+
+const Counters SIInsertWaits::WaitCounts = { { 15, 7, 7 } };
+const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } };
+
+FunctionPass *llvm::createSIInsertWaits(TargetMachine &tm) {
+  return new SIInsertWaits(tm);
+}
+
+Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
+
+  uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags;
+  Counters Result;
+
+  Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT);
+
+  // Only consider stores or EXP for EXP_CNT
+  Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT &&
+      (MI.getOpcode() == AMDGPU::EXP || MI.getDesc().mayStore()));
+
+  // LGKM may uses larger values
+  if (TSFlags & SIInstrFlags::LGKM_CNT) {
+
+    if (TII->isSMRD(MI.getOpcode())) {
+
+      MachineOperand &Op = MI.getOperand(0);
+      assert(Op.isReg() && "First LGKM operand must be a register!");
+
+      unsigned Reg = Op.getReg();
+      unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
+      Result.Named.LGKM = Size > 4 ? 2 : 1;
+
+    } else {
+      // DS
+      Result.Named.LGKM = 1;
+    }
+
+  } else {
+    Result.Named.LGKM = 0;
+  }
+
+  return Result;
+}
+
+bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
+
+  // Constants are always irrelevant
+  if (!Op.isReg())
+    return false;
+
+  // Defines are always relevant
+  if (Op.isDef())
+    return true;
+
+  // For exports all registers are relevant
+  MachineInstr &MI = *Op.getParent();
+  if (MI.getOpcode() == AMDGPU::EXP)
+    return true;
+
+  // For stores the stored value is also relevant
+  if (!MI.getDesc().mayStore())
+    return false;
+
+  // Check if this operand is the value being stored.
+  // Special case for DS instructions, since the address
+  // operand comes before the value operand and it may have
+  // multiple data operands.
+
+  if (TII->isDS(MI.getOpcode())) {
+    MachineOperand *Data = TII->getNamedOperand(MI, AMDGPU::OpName::data);
+    if (Data && Op.isIdenticalTo(*Data))
+      return true;
+
+    MachineOperand *Data0 = TII->getNamedOperand(MI, AMDGPU::OpName::data0);
+    if (Data0 && Op.isIdenticalTo(*Data0))
+      return true;
+
+    MachineOperand *Data1 = TII->getNamedOperand(MI, AMDGPU::OpName::data1);
+    if (Data1 && Op.isIdenticalTo(*Data1))
+      return true;
+
+    return false;
+  }
+
+  // NOTE: This assumes that the value operand is before the
+  // address operand, and that there is only one value operand.
+  for (MachineInstr::mop_iterator I = MI.operands_begin(),
+       E = MI.operands_end(); I != E; ++I) {
+
+    if (I->isReg() && I->isUse())
+      return Op.isIdenticalTo(*I);
+  }
+
+  return false;
+}
+
+RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) {
+
+  if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
+    return std::make_pair(0, 0);
+
+  unsigned Reg = Op.getReg();
+  unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
+
+  assert(Size >= 4);
+
+  RegInterval Result;
+  Result.first = TRI->getEncodingValue(Reg);
+  Result.second = Result.first + Size / 4;
+
+  return Result;
+}
+
+void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator I) {
+
+  // Get the hardware counter increments and sum them up
+  Counters Increment = getHwCounts(*I);
+  unsigned Sum = 0;
+
+  for (unsigned i = 0; i < 3; ++i) {
+    LastIssued.Array[i] += Increment.Array[i];
+    Sum += Increment.Array[i];
+  }
+
+  // If we don't increase anything then that's it
+  if (Sum == 0) {
+    LastOpcodeType = OTHER;
+    return;
+  }
+
+  if (MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() >=
+      AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+    // Any occurence of consecutive VMEM or SMEM instructions forms a VMEM
+    // or SMEM clause, respectively.
+    //
+    // The temporary workaround is to break the clauses with S_NOP.
+    //
+    // The proper solution would be to allocate registers such that all source
+    // and destination registers don't overlap, e.g. this is illegal:
+    //   r0 = load r2
+    //   r2 = load r0
+    if ((LastOpcodeType == SMEM && TII->isSMRD(I->getOpcode())) ||
+        (LastOpcodeType == VMEM && Increment.Named.VM)) {
+      // Insert a NOP to break the clause.
+      BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP))
+          .addImm(0);
+      LastInstWritesM0 = false;
+    }
+
+    if (TII->isSMRD(I->getOpcode()))
+      LastOpcodeType = SMEM;
+    else if (Increment.Named.VM)
+      LastOpcodeType = VMEM;
+  }
+
+  // Remember which export instructions we have seen
+  if (Increment.Named.EXP) {
+    ExpInstrTypesSeen |= I->getOpcode() == AMDGPU::EXP ? 1 : 2;
+  }
+
+  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+
+    MachineOperand &Op = I->getOperand(i);
+    if (!isOpRelevant(Op))
+      continue;
+
+    RegInterval Interval = getRegInterval(Op);
+    for (unsigned j = Interval.first; j < Interval.second; ++j) {
+
+      // Remember which registers we define
+      if (Op.isDef())
+        DefinedRegs[j] = LastIssued;
+
+      // and which one we are using
+      if (Op.isUse())
+        UsedRegs[j] = LastIssued;
+    }
+  }
+}
+
+bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator I,
+                               const Counters &Required) {
+
+  // End of program? No need to wait on anything
+  if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM)
+    return false;
+
+  // Figure out if the async instructions execute in order
+  bool Ordered[3];
+
+  // VM_CNT is always ordered
+  Ordered[0] = true;
+
+  // EXP_CNT is unordered if we have both EXP & VM-writes
+  Ordered[1] = ExpInstrTypesSeen == 3;
+
+  // LGKM_CNT is handled as always unordered. TODO: Handle LDS and GDS
+  Ordered[2] = false;
+
+  // The values we are going to put into the S_WAITCNT instruction
+  Counters Counts = WaitCounts;
+
+  // Do we really need to wait?
+  bool NeedWait = false;
+
+  for (unsigned i = 0; i < 3; ++i) {
+
+    if (Required.Array[i] <= WaitedOn.Array[i])
+      continue;
+
+    NeedWait = true;
+
+    if (Ordered[i]) {
+      unsigned Value = LastIssued.Array[i] - Required.Array[i];
+
+      // Adjust the value to the real hardware possibilities.
+      Counts.Array[i] = std::min(Value, WaitCounts.Array[i]);
+
+    } else
+      Counts.Array[i] = 0;
+
+    // Remember on what we have waited on.
+    WaitedOn.Array[i] = LastIssued.Array[i] - Counts.Array[i];
+  }
+
+  if (!NeedWait)
+    return false;
+
+  // Reset EXP_CNT instruction types
+  if (Counts.Named.EXP == 0)
+    ExpInstrTypesSeen = 0;
+
+  // Build the wait instruction
+  BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
+          .addImm((Counts.Named.VM & 0xF) |
+                  ((Counts.Named.EXP & 0x7) << 4) |
+                  ((Counts.Named.LGKM & 0x7) << 8));
+
+  LastOpcodeType = OTHER;
+  LastInstWritesM0 = false;
+  return true;
+}
+
+/// \brief helper function for handleOperands
+static void increaseCounters(Counters &Dst, const Counters &Src) {
+
+  for (unsigned i = 0; i < 3; ++i)
+    Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]);
+}
+
+Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
+
+  Counters Result = ZeroCounts;
+
+  // S_SENDMSG implicitly waits for all outstanding LGKM transfers to finish,
+  // but we also want to wait for any other outstanding transfers before
+  // signalling other hardware blocks
+  if (MI.getOpcode() == AMDGPU::S_SENDMSG)
+    return LastIssued;
+
+  // For each register affected by this
+  // instruction increase the result sequence
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+
+    MachineOperand &Op = MI.getOperand(i);
+    RegInterval Interval = getRegInterval(Op);
+    for (unsigned j = Interval.first; j < Interval.second; ++j) {
+
+      if (Op.isDef()) {
+        increaseCounters(Result, UsedRegs[j]);
+        increaseCounters(Result, DefinedRegs[j]);
+      }
+
+      if (Op.isUse())
+        increaseCounters(Result, DefinedRegs[j]);
+    }
+  }
+
+  return Result;
+}
+
+void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB,
+                                  MachineBasicBlock::iterator I) {
+  if (MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() <
+      AMDGPUSubtarget::VOLCANIC_ISLANDS)
+    return;
+
+  // There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG.
+  if (LastInstWritesM0 && I->getOpcode() == AMDGPU::S_SENDMSG) {
+    BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0);
+    LastInstWritesM0 = false;
+    return;
+  }
+
+  // Set whether this instruction sets M0
+  LastInstWritesM0 = false;
+
+  unsigned NumOperands = I->getNumOperands();
+  for (unsigned i = 0; i < NumOperands; i++) {
+    const MachineOperand &Op = I->getOperand(i);
+
+    if (Op.isReg() && Op.isDef() && Op.getReg() == AMDGPU::M0)
+      LastInstWritesM0 = true;
+  }
+}
+
+// FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States"
+// around other non-memory instructions.
+bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
+  bool Changes = false;
+
+  TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
+  TRI =
+      static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+
+  MRI = &MF.getRegInfo();
+
+  WaitedOn = ZeroCounts;
+  LastIssued = ZeroCounts;
+  LastOpcodeType = OTHER;
+  LastInstWritesM0 = false;
+
+  memset(&UsedRegs, 0, sizeof(UsedRegs));
+  memset(&DefinedRegs, 0, sizeof(DefinedRegs));
+
+  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+       BI != BE; ++BI) {
+
+    MachineBasicBlock &MBB = *BI;
+    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+         I != E; ++I) {
+
+      // Wait for everything before a barrier.
+      if (I->getOpcode() == AMDGPU::S_BARRIER)
+        Changes |= insertWait(MBB, I, LastIssued);
+      else
+        Changes |= insertWait(MBB, I, handleOperands(*I));
+
+      pushInstruction(MBB, I);
+      handleSendMsg(MBB, I);
+    }
+
+    // Wait for everything at the end of the MBB
+    Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
+  }
+
+  return Changes;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/contrib/llvm/lib/Target/AMDGPU/SIInstrFormats.td
new file mode 100644
index 0000000..211666a
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrFormats.td
@@ -0,0 +1,673 @@
+//===-- SIInstrFormats.td - SI Instruction Encodings ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// SI Instruction format definitions.
+//
+//===----------------------------------------------------------------------===//
+
+class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
+    AMDGPUInst<outs, ins, asm, pattern>, PredicateControl {
+
+  field bits<1> VM_CNT = 0;
+  field bits<1> EXP_CNT = 0;
+  field bits<1> LGKM_CNT = 0;
+
+  field bits<1> SALU = 0;
+  field bits<1> VALU = 0;
+
+  field bits<1> SOP1 = 0;
+  field bits<1> SOP2 = 0;
+  field bits<1> SOPC = 0;
+  field bits<1> SOPK = 0;
+  field bits<1> SOPP = 0;
+
+  field bits<1> VOP1 = 0;
+  field bits<1> VOP2 = 0;
+  field bits<1> VOP3 = 0;
+  field bits<1> VOPC = 0;
+
+  field bits<1> MUBUF = 0;
+  field bits<1> MTBUF = 0;
+  field bits<1> SMRD = 0;
+  field bits<1> DS = 0;
+  field bits<1> MIMG = 0;
+  field bits<1> FLAT = 0;
+  field bits<1> WQM = 0;
+  field bits<1> VGPRSpill = 0;
+
+  // These need to be kept in sync with the enum in SIInstrFlags.
+  let TSFlags{0} = VM_CNT;
+  let TSFlags{1} = EXP_CNT;
+  let TSFlags{2} = LGKM_CNT;
+
+  let TSFlags{3} = SALU;
+  let TSFlags{4} = VALU;
+
+  let TSFlags{5} = SOP1;
+  let TSFlags{6} = SOP2;
+  let TSFlags{7} = SOPC;
+  let TSFlags{8} = SOPK;
+  let TSFlags{9} = SOPP;
+
+  let TSFlags{10} = VOP1;
+  let TSFlags{11} = VOP2;
+  let TSFlags{12} = VOP3;
+  let TSFlags{13} = VOPC;
+
+  let TSFlags{14} = MUBUF;
+  let TSFlags{15} = MTBUF;
+  let TSFlags{16} = SMRD;
+  let TSFlags{17} = DS;
+  let TSFlags{18} = MIMG;
+  let TSFlags{19} = FLAT;
+  let TSFlags{20} = WQM;
+  let TSFlags{21} = VGPRSpill;
+
+  // Most instructions require adjustments after selection to satisfy
+  // operand requirements.
+  let hasPostISelHook = 1;
+  let SchedRW = [Write32Bit];
+}
+
+class Enc32 {
+  field bits<32> Inst;
+  int Size = 4;
+}
+
+class Enc64 {
+  field bits<64> Inst;
+  int Size = 8;
+}
+
+class VOPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVOPDst">;
+def VOPDstVCC : VOPDstOperand <VCCReg>;
+
+let Uses = [EXEC] in {
+
+class VOPAnyCommon <dag outs, dag ins, string asm, list<dag> pattern> :
+    InstSI <outs, ins, asm, pattern> {
+
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+  let UseNamedOperandTable = 1;
+  let VALU = 1;
+}
+
+class VOPCCommon <dag ins, string asm, list<dag> pattern> :
+    VOPAnyCommon <(outs VOPDstVCC:$dst), ins, asm, pattern> {
+
+  let DisableEncoding = "$dst";
+  let VOPC = 1;
+  let Size = 4;
+}
+
+class VOP1Common <dag outs, dag ins, string asm, list<dag> pattern> :
+    VOPAnyCommon <outs, ins, asm, pattern> {
+
+  let VOP1 = 1;
+  let Size = 4;
+}
+
+class VOP2Common <dag outs, dag ins, string asm, list<dag> pattern> :
+    VOPAnyCommon <outs, ins, asm, pattern> {
+
+  let VOP2 = 1;
+  let Size = 4;
+}
+
+class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :
+    VOPAnyCommon <outs, ins, asm, pattern> {
+
+  // Using complex patterns gives VOP3 patterns a very high complexity rating,
+  // but standalone patterns are almost always prefered, so we need to adjust the
+  // priority lower.  The goal is to use a high number to reduce complexity to
+  // zero (or less than zero).
+  let AddedComplexity = -1000;
+
+  let VOP3 = 1;
+  let VALU = 1;
+
+  let AsmMatchConverter = "cvtVOP3";
+  let isCodeGenOnly = 0;
+
+  int Size = 8;
+}
+
+} // End Uses = [EXEC]
+
+//===----------------------------------------------------------------------===//
+// Scalar operations
+//===----------------------------------------------------------------------===//
+
+class SOP1e <bits<8> op> : Enc32 {
+  bits<7> sdst;
+  bits<8> ssrc0;
+
+  let Inst{7-0} = ssrc0;
+  let Inst{15-8} = op;
+  let Inst{22-16} = sdst;
+  let Inst{31-23} = 0x17d; //encoding;
+}
+
+class SOP2e <bits<7> op> : Enc32 {
+  bits<7> sdst;
+  bits<8> ssrc0;
+  bits<8> ssrc1;
+
+  let Inst{7-0} = ssrc0;
+  let Inst{15-8} = ssrc1;
+  let Inst{22-16} = sdst;
+  let Inst{29-23} = op;
+  let Inst{31-30} = 0x2; // encoding
+}
+
+class SOPCe <bits<7> op> : Enc32 {
+  bits<8> ssrc0;
+  bits<8> ssrc1;
+
+  let Inst{7-0} = ssrc0;
+  let Inst{15-8} = ssrc1;
+  let Inst{22-16} = op;
+  let Inst{31-23} = 0x17e;
+}
+
+class SOPKe <bits<5> op> : Enc32 {
+  bits <7> sdst;
+  bits <16> simm16;
+
+  let Inst{15-0} = simm16;
+  let Inst{22-16} = sdst;
+  let Inst{27-23} = op;
+  let Inst{31-28} = 0xb; //encoding
+}
+
+class SOPK64e <bits<5> op> : Enc64 {
+  bits <7> sdst = 0;
+  bits <16> simm16;
+  bits <32> imm;
+
+  let Inst{15-0} = simm16;
+  let Inst{22-16} = sdst;
+  let Inst{27-23} = op;
+  let Inst{31-28} = 0xb;
+
+  let Inst{63-32} = imm;
+}
+
+class SOPPe <bits<7> op> : Enc32 {
+  bits <16> simm16;
+
+  let Inst{15-0} = simm16;
+  let Inst{22-16} = op;
+  let Inst{31-23} = 0x17f; // encoding
+}
+
+class SMRDe <bits<5> op, bits<1> imm> : Enc32 {
+  bits<7> sdst;
+  bits<7> sbase;
+  bits<8> offset;
+
+  let Inst{7-0} = offset;
+  let Inst{8} = imm;
+  let Inst{14-9} = sbase{6-1};
+  let Inst{21-15} = sdst;
+  let Inst{26-22} = op;
+  let Inst{31-27} = 0x18; //encoding
+}
+
+let SchedRW = [WriteSALU] in {
+class SOP1 <dag outs, dag ins, string asm, list<dag> pattern> :
+    InstSI<outs, ins, asm, pattern> {
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+  let isCodeGenOnly = 0;
+  let SALU = 1;
+  let SOP1 = 1;
+}
+
+class SOP2 <dag outs, dag ins, string asm, list<dag> pattern> :
+    InstSI <outs, ins, asm, pattern> {
+
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+  let isCodeGenOnly = 0;
+  let SALU = 1;
+  let SOP2 = 1;
+
+  let UseNamedOperandTable = 1;
+}
+
+class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+  InstSI<outs, ins, asm, pattern>, SOPCe <op> {
+
+  let DisableEncoding = "$dst";
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+  let SALU = 1;
+  let SOPC = 1;
+  let isCodeGenOnly = 0;
+
+  let UseNamedOperandTable = 1;
+}
+
+class SOPK <dag outs, dag ins, string asm, list<dag> pattern> :
+   InstSI <outs, ins , asm, pattern> {
+
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+  let SALU = 1;
+  let SOPK = 1;
+
+  let UseNamedOperandTable = 1;
+}
+
+class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern = []> :
+		InstSI <(outs), ins, asm, pattern >, SOPPe <op> {
+
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+  let SALU = 1;
+  let SOPP = 1;
+
+  let UseNamedOperandTable = 1;
+}
+
+} // let SchedRW = [WriteSALU]
+
+class SMRD <dag outs, dag ins, string asm, list<dag> pattern> :
+    InstSI<outs, ins, asm, pattern> {
+
+  let LGKM_CNT = 1;
+  let SMRD = 1;
+  let mayStore = 0;
+  let mayLoad = 1;
+  let hasSideEffects = 0;
+  let UseNamedOperandTable = 1;
+  let SchedRW = [WriteSMEM];
+}
+
+//===----------------------------------------------------------------------===//
+// Vector ALU operations
+//===----------------------------------------------------------------------===//
+
+class VOP1e <bits<8> op> : Enc32 {
+  bits<8> vdst;
+  bits<9> src0;
+
+  let Inst{8-0} = src0;
+  let Inst{16-9} = op;
+  let Inst{24-17} = vdst;
+  let Inst{31-25} = 0x3f; //encoding
+}
+
+class VOP2e <bits<6> op> : Enc32 {
+  bits<8> vdst;
+  bits<9> src0;
+  bits<8> src1;
+
+  let Inst{8-0} = src0;
+  let Inst{16-9} = src1;
+  let Inst{24-17} = vdst;
+  let Inst{30-25} = op;
+  let Inst{31} = 0x0; //encoding
+}
+
+class VOP2_MADKe <bits<6> op> : Enc64 {
+
+  bits<8>  vdst;
+  bits<9>  src0;
+  bits<8>  vsrc1;
+  bits<32> src2;
+
+  let Inst{8-0} = src0;
+  let Inst{16-9} = vsrc1;
+  let Inst{24-17} = vdst;
+  let Inst{30-25} = op;
+  let Inst{31} = 0x0; // encoding
+  let Inst{63-32} = src2;
+}
+
+class VOP3e <bits<9> op> : Enc64 {
+  bits<8> vdst;
+  bits<2> src0_modifiers;
+  bits<9> src0;
+  bits<2> src1_modifiers;
+  bits<9> src1;
+  bits<2> src2_modifiers;
+  bits<9> src2;
+  bits<1> clamp;
+  bits<2> omod;
+
+  let Inst{7-0} = vdst;
+  let Inst{8} = src0_modifiers{1};
+  let Inst{9} = src1_modifiers{1};
+  let Inst{10} = src2_modifiers{1};
+  let Inst{11} = clamp;
+  let Inst{25-17} = op;
+  let Inst{31-26} = 0x34; //encoding
+  let Inst{40-32} = src0;
+  let Inst{49-41} = src1;
+  let Inst{58-50} = src2;
+  let Inst{60-59} = omod;
+  let Inst{61} = src0_modifiers{0};
+  let Inst{62} = src1_modifiers{0};
+  let Inst{63} = src2_modifiers{0};
+}
+
+class VOP3be <bits<9> op> : Enc64 {
+  bits<8> vdst;
+  bits<2> src0_modifiers;
+  bits<9> src0;
+  bits<2> src1_modifiers;
+  bits<9> src1;
+  bits<2> src2_modifiers;
+  bits<9> src2;
+  bits<7> sdst;
+  bits<2> omod;
+
+  let Inst{7-0} = vdst;
+  let Inst{14-8} = sdst;
+  let Inst{25-17} = op;
+  let Inst{31-26} = 0x34; //encoding
+  let Inst{40-32} = src0;
+  let Inst{49-41} = src1;
+  let Inst{58-50} = src2;
+  let Inst{60-59} = omod;
+  let Inst{61} = src0_modifiers{0};
+  let Inst{62} = src1_modifiers{0};
+  let Inst{63} = src2_modifiers{0};
+}
+
+class VOPCe <bits<8> op> : Enc32 {
+  bits<9> src0;
+  bits<8> vsrc1;
+
+  let Inst{8-0} = src0;
+  let Inst{16-9} = vsrc1;
+  let Inst{24-17} = op;
+  let Inst{31-25} = 0x3e;
+}
+
+class VINTRPe <bits<2> op> : Enc32 {
+  bits<8> vdst;
+  bits<8> vsrc;
+  bits<2> attrchan;
+  bits<6> attr;
+
+  let Inst{7-0} = vsrc;
+  let Inst{9-8} = attrchan;
+  let Inst{15-10} = attr;
+  let Inst{17-16} = op;
+  let Inst{25-18} = vdst;
+  let Inst{31-26} = 0x32; // encoding
+}
+
+class DSe <bits<8> op> : Enc64 {
+  bits<8> vdst;
+  bits<1> gds;
+  bits<8> addr;
+  bits<8> data0;
+  bits<8> data1;
+  bits<8> offset0;
+  bits<8> offset1;
+
+  let Inst{7-0} = offset0;
+  let Inst{15-8} = offset1;
+  let Inst{17} = gds;
+  let Inst{25-18} = op;
+  let Inst{31-26} = 0x36; //encoding
+  let Inst{39-32} = addr;
+  let Inst{47-40} = data0;
+  let Inst{55-48} = data1;
+  let Inst{63-56} = vdst;
+}
+
+class MUBUFe <bits<7> op> : Enc64 {
+  bits<12> offset;
+  bits<1> offen;
+  bits<1> idxen;
+  bits<1> glc;
+  bits<1> addr64;
+  bits<1> lds;
+  bits<8> vaddr;
+  bits<8> vdata;
+  bits<7> srsrc;
+  bits<1> slc;
+  bits<1> tfe;
+  bits<8> soffset;
+
+  let Inst{11-0} = offset;
+  let Inst{12} = offen;
+  let Inst{13} = idxen;
+  let Inst{14} = glc;
+  let Inst{15} = addr64;
+  let Inst{16} = lds;
+  let Inst{24-18} = op;
+  let Inst{31-26} = 0x38; //encoding
+  let Inst{39-32} = vaddr;
+  let Inst{47-40} = vdata;
+  let Inst{52-48} = srsrc{6-2};
+  let Inst{54} = slc;
+  let Inst{55} = tfe;
+  let Inst{63-56} = soffset;
+}
+
+class MTBUFe <bits<3> op> : Enc64 {
+  bits<8> vdata;
+  bits<12> offset;
+  bits<1> offen;
+  bits<1> idxen;
+  bits<1> glc;
+  bits<1> addr64;
+  bits<4> dfmt;
+  bits<3> nfmt;
+  bits<8> vaddr;
+  bits<7> srsrc;
+  bits<1> slc;
+  bits<1> tfe;
+  bits<8> soffset;
+
+  let Inst{11-0} = offset;
+  let Inst{12} = offen;
+  let Inst{13} = idxen;
+  let Inst{14} = glc;
+  let Inst{15} = addr64;
+  let Inst{18-16} = op;
+  let Inst{22-19} = dfmt;
+  let Inst{25-23} = nfmt;
+  let Inst{31-26} = 0x3a; //encoding
+  let Inst{39-32} = vaddr;
+  let Inst{47-40} = vdata;
+  let Inst{52-48} = srsrc{6-2};
+  let Inst{54} = slc;
+  let Inst{55} = tfe;
+  let Inst{63-56} = soffset;
+}
+
+class MIMGe <bits<7> op> : Enc64 {
+  bits<8> vdata;
+  bits<4> dmask;
+  bits<1> unorm;
+  bits<1> glc;
+  bits<1> da;
+  bits<1> r128;
+  bits<1> tfe;
+  bits<1> lwe;
+  bits<1> slc;
+  bits<8> vaddr;
+  bits<7> srsrc;
+  bits<7> ssamp;
+
+  let Inst{11-8} = dmask;
+  let Inst{12} = unorm;
+  let Inst{13} = glc;
+  let Inst{14} = da;
+  let Inst{15} = r128;
+  let Inst{16} = tfe;
+  let Inst{17} = lwe;
+  let Inst{24-18} = op;
+  let Inst{25} = slc;
+  let Inst{31-26} = 0x3c;
+  let Inst{39-32} = vaddr;
+  let Inst{47-40} = vdata;
+  let Inst{52-48} = srsrc{6-2};
+  let Inst{57-53} = ssamp{6-2};
+}
+
+class FLATe<bits<7> op> : Enc64 {
+  bits<8> addr;
+  bits<8> data;
+  bits<8> vdst;
+  bits<1> slc;
+  bits<1> glc;
+  bits<1> tfe;
+
+  // 15-0 is reserved.
+  let Inst{16} = glc;
+  let Inst{17} = slc;
+  let Inst{24-18} = op;
+  let Inst{31-26} = 0x37; // Encoding.
+  let Inst{39-32} = addr;
+  let Inst{47-40} = data;
+  // 54-48 is reserved.
+  let Inst{55} = tfe;
+  let Inst{63-56} = vdst;
+}
+
+class EXPe : Enc64 {
+  bits<4> en;
+  bits<6> tgt;
+  bits<1> compr;
+  bits<1> done;
+  bits<1> vm;
+  bits<8> vsrc0;
+  bits<8> vsrc1;
+  bits<8> vsrc2;
+  bits<8> vsrc3;
+
+  let Inst{3-0} = en;
+  let Inst{9-4} = tgt;
+  let Inst{10} = compr;
+  let Inst{11} = done;
+  let Inst{12} = vm;
+  let Inst{31-26} = 0x3e;
+  let Inst{39-32} = vsrc0;
+  let Inst{47-40} = vsrc1;
+  let Inst{55-48} = vsrc2;
+  let Inst{63-56} = vsrc3;
+}
+
+let Uses = [EXEC] in {
+
+class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
+    VOP1Common <outs, ins, asm, pattern>,
+    VOP1e<op> {
+  let isCodeGenOnly = 0;
+}
+
+class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
+    VOP2Common <outs, ins, asm, pattern>, VOP2e<op> {
+  let isCodeGenOnly = 0;
+}
+
+class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
+    VOPCCommon <ins, asm, pattern>, VOPCe <op>;
+
+class VINTRPCommon <dag outs, dag ins, string asm, list<dag> pattern> :
+    InstSI <outs, ins, asm, pattern> {
+  let mayLoad = 1;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+}
+
+} // End Uses = [EXEC]
+
+//===----------------------------------------------------------------------===//
+// Vector I/O operations
+//===----------------------------------------------------------------------===//
+
+let Uses = [EXEC] in {
+
+class DS <dag outs, dag ins, string asm, list<dag> pattern> :
+    InstSI <outs, ins, asm, pattern> {
+
+  let LGKM_CNT = 1;
+  let DS = 1;
+  let UseNamedOperandTable = 1;
+  let Uses = [M0];
+
+  // Most instruction load and store data, so set this as the default.
+  let mayLoad = 1;
+  let mayStore = 1;
+
+  let hasSideEffects = 0;
+  let AsmMatchConverter = "cvtDS";
+  let SchedRW = [WriteLDS];
+}
+
+class MUBUF <dag outs, dag ins, string asm, list<dag> pattern> :
+    InstSI<outs, ins, asm, pattern> {
+
+  let VM_CNT = 1;
+  let EXP_CNT = 1;
+  let MUBUF = 1;
+
+  let hasSideEffects = 0;
+  let UseNamedOperandTable = 1;
+  let AsmMatchConverter = "cvtMubuf";
+  let SchedRW = [WriteVMEM];
+}
+
+class MTBUF <dag outs, dag ins, string asm, list<dag> pattern> :
+    InstSI<outs, ins, asm, pattern> {
+
+  let VM_CNT = 1;
+  let EXP_CNT = 1;
+  let MTBUF = 1;
+
+  let hasSideEffects = 0;
+  let UseNamedOperandTable = 1;
+  let SchedRW = [WriteVMEM];
+}
+
+class FLAT <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+    InstSI<outs, ins, asm, pattern>, FLATe <op> {
+  let FLAT = 1;
+  // Internally, FLAT instruction are executed as both an LDS and a
+  // Buffer instruction; so, they increment both VM_CNT and LGKM_CNT
+  // and are not considered done until both have been decremented.
+  let VM_CNT = 1;
+  let LGKM_CNT = 1;
+
+  let Uses = [EXEC, FLAT_SCR]; // M0
+
+  let UseNamedOperandTable = 1;
+  let hasSideEffects = 0;
+  let AsmMatchConverter = "cvtFlat";
+  let SchedRW = [WriteVMEM];
+}
+
+class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+    InstSI <outs, ins, asm, pattern>, MIMGe <op> {
+
+  let VM_CNT = 1;
+  let EXP_CNT = 1;
+  let MIMG = 1;
+
+  let hasSideEffects = 0; // XXX ????
+}
+
+
+} // End Uses = [EXEC]
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
new file mode 100644
index 0000000..47bc178
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -0,0 +1,2723 @@
+//===-- SIInstrInfo.cpp - SI Instruction Information  ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief SI Implementation of TargetInstrInfo.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "SIInstrInfo.h"
+#include "AMDGPUTargetMachine.h"
+#include "SIDefines.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st)
+    : AMDGPUInstrInfo(st), RI() {}
+
+//===----------------------------------------------------------------------===//
+// TargetInstrInfo callbacks
+//===----------------------------------------------------------------------===//
+
+static unsigned getNumOperandsNoGlue(SDNode *Node) {
+  unsigned N = Node->getNumOperands();
+  while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
+    --N;
+  return N;
+}
+
+static SDValue findChainOperand(SDNode *Load) {
+  SDValue LastOp = Load->getOperand(getNumOperandsNoGlue(Load) - 1);
+  assert(LastOp.getValueType() == MVT::Other && "Chain missing from load node");
+  return LastOp;
+}
+
+/// \brief Returns true if both nodes have the same value for the given
+///        operand \p Op, or if both nodes do not have this operand.
+static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) {
+  unsigned Opc0 = N0->getMachineOpcode();
+  unsigned Opc1 = N1->getMachineOpcode();
+
+  int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0, OpName);
+  int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1, OpName);
+
+  if (Op0Idx == -1 && Op1Idx == -1)
+    return true;
+
+
+  if ((Op0Idx == -1 && Op1Idx != -1) ||
+      (Op1Idx == -1 && Op0Idx != -1))
+    return false;
+
+  // getNamedOperandIdx returns the index for the MachineInstr's operands,
+  // which includes the result as the first operand. We are indexing into the
+  // MachineSDNode's operands, so we need to skip the result operand to get
+  // the real index.
+  --Op0Idx;
+  --Op1Idx;
+
+  return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx);
+}
+
+bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
+                                                    AliasAnalysis *AA) const {
+  // TODO: The generic check fails for VALU instructions that should be
+  // rematerializable due to implicit reads of exec. We really want all of the
+  // generic logic for this except for this.
+  switch (MI->getOpcode()) {
+  case AMDGPU::V_MOV_B32_e32:
+  case AMDGPU::V_MOV_B32_e64:
+    return true;
+  default:
+    return false;
+  }
+}
+
+bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
+                                          int64_t &Offset0,
+                                          int64_t &Offset1) const {
+  if (!Load0->isMachineOpcode() || !Load1->isMachineOpcode())
+    return false;
+
+  unsigned Opc0 = Load0->getMachineOpcode();
+  unsigned Opc1 = Load1->getMachineOpcode();
+
+  // Make sure both are actually loads.
+  if (!get(Opc0).mayLoad() || !get(Opc1).mayLoad())
+    return false;
+
+  if (isDS(Opc0) && isDS(Opc1)) {
+
+    // FIXME: Handle this case:
+    if (getNumOperandsNoGlue(Load0) != getNumOperandsNoGlue(Load1))
+      return false;
+
+    // Check base reg.
+    if (Load0->getOperand(1) != Load1->getOperand(1))
+      return false;
+
+    // Check chain.
+    if (findChainOperand(Load0) != findChainOperand(Load1))
+      return false;
+
+    // Skip read2 / write2 variants for simplicity.
+    // TODO: We should report true if the used offsets are adjacent (excluded
+    // st64 versions).
+    if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::data1) != -1 ||
+        AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::data1) != -1)
+      return false;
+
+    Offset0 = cast<ConstantSDNode>(Load0->getOperand(2))->getZExtValue();
+    Offset1 = cast<ConstantSDNode>(Load1->getOperand(2))->getZExtValue();
+    return true;
+  }
+
+  if (isSMRD(Opc0) && isSMRD(Opc1)) {
+    assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1));
+
+    // Check base reg.
+    if (Load0->getOperand(0) != Load1->getOperand(0))
+      return false;
+
+    const ConstantSDNode *Load0Offset =
+        dyn_cast<ConstantSDNode>(Load0->getOperand(1));
+    const ConstantSDNode *Load1Offset =
+        dyn_cast<ConstantSDNode>(Load1->getOperand(1));
+
+    if (!Load0Offset || !Load1Offset)
+      return false;
+
+    // Check chain.
+    if (findChainOperand(Load0) != findChainOperand(Load1))
+      return false;
+
+    Offset0 = Load0Offset->getZExtValue();
+    Offset1 = Load1Offset->getZExtValue();
+    return true;
+  }
+
+  // MUBUF and MTBUF can access the same addresses.
+  if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1))) {
+
+    // MUBUF and MTBUF have vaddr at different indices.
+    if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) ||
+        findChainOperand(Load0) != findChainOperand(Load1) ||
+        !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) ||
+        !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::srsrc))
+      return false;
+
+    int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
+    int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
+
+    if (OffIdx0 == -1 || OffIdx1 == -1)
+      return false;
+
+    // getNamedOperandIdx returns the index for MachineInstrs.  Since they
+    // inlcude the output in the operand list, but SDNodes don't, we need to
+    // subtract the index by one.
+    --OffIdx0;
+    --OffIdx1;
+
+    SDValue Off0 = Load0->getOperand(OffIdx0);
+    SDValue Off1 = Load1->getOperand(OffIdx1);
+
+    // The offset might be a FrameIndexSDNode.
+    if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1))
+      return false;
+
+    Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue();
+    Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue();
+    return true;
+  }
+
+  return false;
+}
+
+static bool isStride64(unsigned Opc) {
+  switch (Opc) {
+  case AMDGPU::DS_READ2ST64_B32:
+  case AMDGPU::DS_READ2ST64_B64:
+  case AMDGPU::DS_WRITE2ST64_B32:
+  case AMDGPU::DS_WRITE2ST64_B64:
+    return true;
+  default:
+    return false;
+  }
+}
+
+bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
+                                        unsigned &Offset,
+                                        const TargetRegisterInfo *TRI) const {
+  unsigned Opc = LdSt->getOpcode();
+  if (isDS(Opc)) {
+    const MachineOperand *OffsetImm = getNamedOperand(*LdSt,
+                                                      AMDGPU::OpName::offset);
+    if (OffsetImm) {
+      // Normal, single offset LDS instruction.
+      const MachineOperand *AddrReg = getNamedOperand(*LdSt,
+                                                      AMDGPU::OpName::addr);
+
+      BaseReg = AddrReg->getReg();
+      Offset = OffsetImm->getImm();
+      return true;
+    }
+
+    // The 2 offset instructions use offset0 and offset1 instead. We can treat
+    // these as a load with a single offset if the 2 offsets are consecutive. We
+    // will use this for some partially aligned loads.
+    const MachineOperand *Offset0Imm = getNamedOperand(*LdSt,
+                                                       AMDGPU::OpName::offset0);
+    const MachineOperand *Offset1Imm = getNamedOperand(*LdSt,
+                                                       AMDGPU::OpName::offset1);
+
+    uint8_t Offset0 = Offset0Imm->getImm();
+    uint8_t Offset1 = Offset1Imm->getImm();
+    assert(Offset1 > Offset0);
+
+    if (Offset1 - Offset0 == 1) {
+      // Each of these offsets is in element sized units, so we need to convert
+      // to bytes of the individual reads.
+
+      unsigned EltSize;
+      if (LdSt->mayLoad())
+        EltSize = getOpRegClass(*LdSt, 0)->getSize() / 2;
+      else {
+        assert(LdSt->mayStore());
+        int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);
+        EltSize = getOpRegClass(*LdSt, Data0Idx)->getSize();
+      }
+
+      if (isStride64(Opc))
+        EltSize *= 64;
+
+      const MachineOperand *AddrReg = getNamedOperand(*LdSt,
+                                                      AMDGPU::OpName::addr);
+      BaseReg = AddrReg->getReg();
+      Offset = EltSize * Offset0;
+      return true;
+    }
+
+    return false;
+  }
+
+  if (isMUBUF(Opc) || isMTBUF(Opc)) {
+    if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::soffset) != -1)
+      return false;
+
+    const MachineOperand *AddrReg = getNamedOperand(*LdSt,
+                                                    AMDGPU::OpName::vaddr);
+    if (!AddrReg)
+      return false;
+
+    const MachineOperand *OffsetImm = getNamedOperand(*LdSt,
+                                                      AMDGPU::OpName::offset);
+    BaseReg = AddrReg->getReg();
+    Offset = OffsetImm->getImm();
+    return true;
+  }
+
+  if (isSMRD(Opc)) {
+    const MachineOperand *OffsetImm = getNamedOperand(*LdSt,
+                                                      AMDGPU::OpName::offset);
+    if (!OffsetImm)
+      return false;
+
+    const MachineOperand *SBaseReg = getNamedOperand(*LdSt,
+                                                     AMDGPU::OpName::sbase);
+    BaseReg = SBaseReg->getReg();
+    Offset = OffsetImm->getImm();
+    return true;
+  }
+
+  return false;
+}
+
+bool SIInstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt,
+                                     MachineInstr *SecondLdSt,
+                                     unsigned NumLoads) const {
+  unsigned Opc0 = FirstLdSt->getOpcode();
+  unsigned Opc1 = SecondLdSt->getOpcode();
+
+  // TODO: This needs finer tuning
+  if (NumLoads > 4)
+    return false;
+
+  if (isDS(Opc0) && isDS(Opc1))
+    return true;
+
+  if (isSMRD(Opc0) && isSMRD(Opc1))
+    return true;
+
+  if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1)))
+    return true;
+
+  return false;
+}
+
+void
+SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator MI, DebugLoc DL,
+                         unsigned DestReg, unsigned SrcReg,
+                         bool KillSrc) const {
+
+  // If we are trying to copy to or from SCC, there is a bug somewhere else in
+  // the backend.  While it may be theoretically possible to do this, it should
+  // never be necessary.
+  assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC);
+
+  static const int16_t Sub0_15[] = {
+    AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
+    AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
+    AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
+    AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, 0
+  };
+
+  static const int16_t Sub0_7[] = {
+    AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
+    AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 0
+  };
+
+  static const int16_t Sub0_3[] = {
+    AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0
+  };
+
+  static const int16_t Sub0_2[] = {
+    AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, 0
+  };
+
+  static const int16_t Sub0_1[] = {
+    AMDGPU::sub0, AMDGPU::sub1, 0
+  };
+
+  unsigned Opcode;
+  const int16_t *SubIndices;
+
+  if (AMDGPU::SReg_32RegClass.contains(DestReg)) {
+    assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
+    BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
+            .addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+
+  } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) {
+    if (DestReg == AMDGPU::VCC) {
+      if (AMDGPU::SReg_64RegClass.contains(SrcReg)) {
+        BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC)
+          .addReg(SrcReg, getKillRegState(KillSrc));
+      } else {
+        // FIXME: Hack until VReg_1 removed.
+        assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
+        BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_I32_e32), AMDGPU::VCC)
+          .addImm(0)
+          .addReg(SrcReg, getKillRegState(KillSrc));
+      }
+
+      return;
+    }
+
+    assert(AMDGPU::SReg_64RegClass.contains(SrcReg));
+    BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
+            .addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+
+  } else if (AMDGPU::SReg_128RegClass.contains(DestReg)) {
+    assert(AMDGPU::SReg_128RegClass.contains(SrcReg));
+    Opcode = AMDGPU::S_MOV_B32;
+    SubIndices = Sub0_3;
+
+  } else if (AMDGPU::SReg_256RegClass.contains(DestReg)) {
+    assert(AMDGPU::SReg_256RegClass.contains(SrcReg));
+    Opcode = AMDGPU::S_MOV_B32;
+    SubIndices = Sub0_7;
+
+  } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) {
+    assert(AMDGPU::SReg_512RegClass.contains(SrcReg));
+    Opcode = AMDGPU::S_MOV_B32;
+    SubIndices = Sub0_15;
+
+  } else if (AMDGPU::VGPR_32RegClass.contains(DestReg)) {
+    assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) ||
+           AMDGPU::SReg_32RegClass.contains(SrcReg));
+    BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
+            .addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+
+  } else if (AMDGPU::VReg_64RegClass.contains(DestReg)) {
+    assert(AMDGPU::VReg_64RegClass.contains(SrcReg) ||
+           AMDGPU::SReg_64RegClass.contains(SrcReg));
+    Opcode = AMDGPU::V_MOV_B32_e32;
+    SubIndices = Sub0_1;
+
+  } else if (AMDGPU::VReg_96RegClass.contains(DestReg)) {
+    assert(AMDGPU::VReg_96RegClass.contains(SrcReg));
+    Opcode = AMDGPU::V_MOV_B32_e32;
+    SubIndices = Sub0_2;
+
+  } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) {
+    assert(AMDGPU::VReg_128RegClass.contains(SrcReg) ||
+           AMDGPU::SReg_128RegClass.contains(SrcReg));
+    Opcode = AMDGPU::V_MOV_B32_e32;
+    SubIndices = Sub0_3;
+
+  } else if (AMDGPU::VReg_256RegClass.contains(DestReg)) {
+    assert(AMDGPU::VReg_256RegClass.contains(SrcReg) ||
+           AMDGPU::SReg_256RegClass.contains(SrcReg));
+    Opcode = AMDGPU::V_MOV_B32_e32;
+    SubIndices = Sub0_7;
+
+  } else if (AMDGPU::VReg_512RegClass.contains(DestReg)) {
+    assert(AMDGPU::VReg_512RegClass.contains(SrcReg) ||
+           AMDGPU::SReg_512RegClass.contains(SrcReg));
+    Opcode = AMDGPU::V_MOV_B32_e32;
+    SubIndices = Sub0_15;
+
+  } else {
+    llvm_unreachable("Can't copy register!");
+  }
+
+  while (unsigned SubIdx = *SubIndices++) {
+    MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
+      get(Opcode), RI.getSubReg(DestReg, SubIdx));
+
+    Builder.addReg(RI.getSubReg(SrcReg, SubIdx), getKillRegState(KillSrc));
+
+    if (*SubIndices)
+      Builder.addReg(DestReg, RegState::Define | RegState::Implicit);
+  }
+}
+
+unsigned SIInstrInfo::commuteOpcode(const MachineInstr &MI) const {
+  const unsigned Opcode = MI.getOpcode();
+
+  int NewOpc;
+
+  // Try to map original to commuted opcode
+  NewOpc = AMDGPU::getCommuteRev(Opcode);
+  // Check if the commuted (REV) opcode exists on the target.
+  if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1)
+    return NewOpc;
+
+  // Try to map commuted to original opcode
+  NewOpc = AMDGPU::getCommuteOrig(Opcode);
+  // Check if the original (non-REV) opcode exists on the target.
+  if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1)
+    return NewOpc;
+
+  return Opcode;
+}
+
+unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
+
+  if (DstRC->getSize() == 4) {
+    return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
+  } else if (DstRC->getSize() == 8 && RI.isSGPRClass(DstRC)) {
+    return AMDGPU::S_MOV_B64;
+  } else if (DstRC->getSize() == 8 && !RI.isSGPRClass(DstRC)) {
+    return  AMDGPU::V_MOV_B64_PSEUDO;
+  }
+  return AMDGPU::COPY;
+}
+
+void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator MI,
+                                      unsigned SrcReg, bool isKill,
+                                      int FrameIndex,
+                                      const TargetRegisterClass *RC,
+                                      const TargetRegisterInfo *TRI) const {
+  MachineFunction *MF = MBB.getParent();
+  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+  MachineFrameInfo *FrameInfo = MF->getFrameInfo();
+  DebugLoc DL = MBB.findDebugLoc(MI);
+  int Opcode = -1;
+
+  if (RI.isSGPRClass(RC)) {
+    // We are only allowed to create one new instruction when spilling
+    // registers, so we need to use pseudo instruction for spilling
+    // SGPRs.
+    switch (RC->getSize() * 8) {
+      case 32:  Opcode = AMDGPU::SI_SPILL_S32_SAVE;  break;
+      case 64:  Opcode = AMDGPU::SI_SPILL_S64_SAVE;  break;
+      case 128: Opcode = AMDGPU::SI_SPILL_S128_SAVE; break;
+      case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break;
+      case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break;
+    }
+  } else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) {
+    MFI->setHasSpilledVGPRs();
+
+    switch(RC->getSize() * 8) {
+      case 32: Opcode = AMDGPU::SI_SPILL_V32_SAVE; break;
+      case 64: Opcode = AMDGPU::SI_SPILL_V64_SAVE; break;
+      case 96: Opcode = AMDGPU::SI_SPILL_V96_SAVE; break;
+      case 128: Opcode = AMDGPU::SI_SPILL_V128_SAVE; break;
+      case 256: Opcode = AMDGPU::SI_SPILL_V256_SAVE; break;
+      case 512: Opcode = AMDGPU::SI_SPILL_V512_SAVE; break;
+    }
+  }
+
+  if (Opcode != -1) {
+    FrameInfo->setObjectAlignment(FrameIndex, 4);
+    BuildMI(MBB, MI, DL, get(Opcode))
+            .addReg(SrcReg)
+            .addFrameIndex(FrameIndex)
+            // Place-holder registers, these will be filled in by
+            // SIPrepareScratchRegs.
+            .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
+            .addReg(AMDGPU::SGPR0, RegState::Undef);
+  } else {
+    LLVMContext &Ctx = MF->getFunction()->getContext();
+    Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to"
+                  " spill register");
+    BuildMI(MBB, MI, DL, get(AMDGPU::KILL))
+            .addReg(SrcReg);
+  }
+}
+
+void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MI,
+                                       unsigned DestReg, int FrameIndex,
+                                       const TargetRegisterClass *RC,
+                                       const TargetRegisterInfo *TRI) const {
+  MachineFunction *MF = MBB.getParent();
+  const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+  MachineFrameInfo *FrameInfo = MF->getFrameInfo();
+  DebugLoc DL = MBB.findDebugLoc(MI);
+  int Opcode = -1;
+
+  if (RI.isSGPRClass(RC)){
+    switch(RC->getSize() * 8) {
+      case 32:  Opcode = AMDGPU::SI_SPILL_S32_RESTORE; break;
+      case 64:  Opcode = AMDGPU::SI_SPILL_S64_RESTORE;  break;
+      case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break;
+      case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break;
+      case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break;
+    }
+  } else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) {
+    switch(RC->getSize() * 8) {
+      case 32: Opcode = AMDGPU::SI_SPILL_V32_RESTORE; break;
+      case 64: Opcode = AMDGPU::SI_SPILL_V64_RESTORE; break;
+      case 96: Opcode = AMDGPU::SI_SPILL_V96_RESTORE; break;
+      case 128: Opcode = AMDGPU::SI_SPILL_V128_RESTORE; break;
+      case 256: Opcode = AMDGPU::SI_SPILL_V256_RESTORE; break;
+      case 512: Opcode = AMDGPU::SI_SPILL_V512_RESTORE; break;
+    }
+  }
+
+  if (Opcode != -1) {
+    FrameInfo->setObjectAlignment(FrameIndex, 4);
+    BuildMI(MBB, MI, DL, get(Opcode), DestReg)
+            .addFrameIndex(FrameIndex)
+            // Place-holder registers, these will be filled in by
+            // SIPrepareScratchRegs.
+            .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
+            .addReg(AMDGPU::SGPR0, RegState::Undef);
+
+  } else {
+    LLVMContext &Ctx = MF->getFunction()->getContext();
+    Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to"
+                  " restore register");
+    BuildMI(MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), DestReg);
+  }
+}
+
+/// \param @Offset Offset in bytes of the FrameIndex being spilled
+unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB,
+                                               MachineBasicBlock::iterator MI,
+                                               RegScavenger *RS, unsigned TmpReg,
+                                               unsigned FrameOffset,
+                                               unsigned Size) const {
+  MachineFunction *MF = MBB.getParent();
+  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+  const AMDGPUSubtarget &ST = MF->getSubtarget<AMDGPUSubtarget>();
+  const SIRegisterInfo *TRI =
+      static_cast<const SIRegisterInfo*>(ST.getRegisterInfo());
+  DebugLoc DL = MBB.findDebugLoc(MI);
+  unsigned WorkGroupSize = MFI->getMaximumWorkGroupSize(*MF);
+  unsigned WavefrontSize = ST.getWavefrontSize();
+
+  unsigned TIDReg = MFI->getTIDReg();
+  if (!MFI->hasCalculatedTID()) {
+    MachineBasicBlock &Entry = MBB.getParent()->front();
+    MachineBasicBlock::iterator Insert = Entry.front();
+    DebugLoc DL = Insert->getDebugLoc();
+
+    TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass);
+    if (TIDReg == AMDGPU::NoRegister)
+      return TIDReg;
+
+
+    if (MFI->getShaderType() == ShaderType::COMPUTE &&
+        WorkGroupSize > WavefrontSize) {
+
+      unsigned TIDIGXReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_X);
+      unsigned TIDIGYReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Y);
+      unsigned TIDIGZReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Z);
+      unsigned InputPtrReg =
+          TRI->getPreloadedValue(*MF, SIRegisterInfo::INPUT_PTR);
+      for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) {
+        if (!Entry.isLiveIn(Reg))
+          Entry.addLiveIn(Reg);
+      }
+
+      RS->enterBasicBlock(&Entry);
+      unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
+      unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
+      BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0)
+              .addReg(InputPtrReg)
+              .addImm(SI::KernelInputOffsets::NGROUPS_Z);
+      BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp1)
+              .addReg(InputPtrReg)
+              .addImm(SI::KernelInputOffsets::NGROUPS_Y);
+
+      // NGROUPS.X * NGROUPS.Y
+      BuildMI(Entry, Insert, DL, get(AMDGPU::S_MUL_I32), STmp1)
+              .addReg(STmp1)
+              .addReg(STmp0);
+      // (NGROUPS.X * NGROUPS.Y) * TIDIG.X
+      BuildMI(Entry, Insert, DL, get(AMDGPU::V_MUL_U32_U24_e32), TIDReg)
+              .addReg(STmp1)
+              .addReg(TIDIGXReg);
+      // NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)
+      BuildMI(Entry, Insert, DL, get(AMDGPU::V_MAD_U32_U24), TIDReg)
+              .addReg(STmp0)
+              .addReg(TIDIGYReg)
+              .addReg(TIDReg);
+      // (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z
+      BuildMI(Entry, Insert, DL, get(AMDGPU::V_ADD_I32_e32), TIDReg)
+              .addReg(TIDReg)
+              .addReg(TIDIGZReg);
+    } else {
+      // Get the wave id
+      BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64),
+              TIDReg)
+              .addImm(-1)
+              .addImm(0);
+
+      BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e64),
+              TIDReg)
+              .addImm(-1)
+              .addReg(TIDReg);
+    }
+
+    BuildMI(Entry, Insert, DL, get(AMDGPU::V_LSHLREV_B32_e32),
+            TIDReg)
+            .addImm(2)
+            .addReg(TIDReg);
+    MFI->setTIDReg(TIDReg);
+  }
+
+  // Add FrameIndex to LDS offset
+  unsigned LDSOffset = MFI->LDSSize + (FrameOffset * WorkGroupSize);
+  BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), TmpReg)
+          .addImm(LDSOffset)
+          .addReg(TIDReg);
+
+  return TmpReg;
+}
+
+void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI,
+                             int Count) const {
+  while (Count > 0) {
+    int Arg;
+    if (Count >= 8)
+      Arg = 7;
+    else
+      Arg = Count - 1;
+    Count -= 8;
+    BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(AMDGPU::S_NOP))
+            .addImm(Arg);
+  }
+}
+
+bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+  MachineBasicBlock &MBB = *MI->getParent();
+  DebugLoc DL = MBB.findDebugLoc(MI);
+  switch (MI->getOpcode()) {
+  default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
+
+  case AMDGPU::SI_CONSTDATA_PTR: {
+    unsigned Reg = MI->getOperand(0).getReg();
+    unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
+    unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
+
+    BuildMI(MBB, MI, DL, get(AMDGPU::S_GETPC_B64), Reg);
+
+    // Add 32-bit offset from this instruction to the start of the constant data.
+    BuildMI(MBB, MI, DL, get(AMDGPU::S_ADD_U32), RegLo)
+            .addReg(RegLo)
+            .addTargetIndex(AMDGPU::TI_CONSTDATA_START)
+            .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit);
+    BuildMI(MBB, MI, DL, get(AMDGPU::S_ADDC_U32), RegHi)
+            .addReg(RegHi)
+            .addImm(0)
+            .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit)
+            .addReg(AMDGPU::SCC, RegState::Implicit);
+    MI->eraseFromParent();
+    break;
+  }
+  case AMDGPU::SGPR_USE:
+    // This is just a placeholder for register allocation.
+    MI->eraseFromParent();
+    break;
+
+  case AMDGPU::V_MOV_B64_PSEUDO: {
+    unsigned Dst = MI->getOperand(0).getReg();
+    unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
+    unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
+
+    const MachineOperand &SrcOp = MI->getOperand(1);
+    // FIXME: Will this work for 64-bit floating point immediates?
+    assert(!SrcOp.isFPImm());
+    if (SrcOp.isImm()) {
+      APInt Imm(64, SrcOp.getImm());
+      BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
+              .addImm(Imm.getLoBits(32).getZExtValue())
+              .addReg(Dst, RegState::Implicit);
+      BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
+              .addImm(Imm.getHiBits(32).getZExtValue())
+              .addReg(Dst, RegState::Implicit);
+    } else {
+      assert(SrcOp.isReg());
+      BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
+              .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0))
+              .addReg(Dst, RegState::Implicit);
+      BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
+              .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1))
+              .addReg(Dst, RegState::Implicit);
+    }
+    MI->eraseFromParent();
+    break;
+  }
+
+  case AMDGPU::V_CNDMASK_B64_PSEUDO: {
+    unsigned Dst = MI->getOperand(0).getReg();
+    unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
+    unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
+    unsigned Src0 = MI->getOperand(1).getReg();
+    unsigned Src1 = MI->getOperand(2).getReg();
+    const MachineOperand &SrcCond = MI->getOperand(3);
+
+    BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstLo)
+        .addReg(RI.getSubReg(Src0, AMDGPU::sub0))
+        .addReg(RI.getSubReg(Src1, AMDGPU::sub0))
+        .addOperand(SrcCond);
+    BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstHi)
+        .addReg(RI.getSubReg(Src0, AMDGPU::sub1))
+        .addReg(RI.getSubReg(Src1, AMDGPU::sub1))
+        .addOperand(SrcCond);
+    MI->eraseFromParent();
+    break;
+  }
+  }
+  return true;
+}
+
+MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
+                                              bool NewMI) const {
+
+  if (MI->getNumOperands() < 3)
+    return nullptr;
+
+  int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+                                           AMDGPU::OpName::src0);
+  assert(Src0Idx != -1 && "Should always have src0 operand");
+
+  MachineOperand &Src0 = MI->getOperand(Src0Idx);
+  if (!Src0.isReg())
+    return nullptr;
+
+  int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+                                           AMDGPU::OpName::src1);
+  if (Src1Idx == -1)
+    return nullptr;
+
+  MachineOperand &Src1 = MI->getOperand(Src1Idx);
+
+  // Make sure it's legal to commute operands for VOP2.
+  if (isVOP2(MI->getOpcode()) &&
+      (!isOperandLegal(MI, Src0Idx, &Src1) ||
+       !isOperandLegal(MI, Src1Idx, &Src0))) {
+    return nullptr;
+  }
+
+  if (!Src1.isReg()) {
+    // Allow commuting instructions with Imm operands.
+    if (NewMI || !Src1.isImm() ||
+       (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))) {
+      return nullptr;
+    }
+
+    // Be sure to copy the source modifiers to the right place.
+    if (MachineOperand *Src0Mods
+          = getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) {
+      MachineOperand *Src1Mods
+        = getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers);
+
+      int Src0ModsVal = Src0Mods->getImm();
+      if (!Src1Mods && Src0ModsVal != 0)
+        return nullptr;
+
+      // XXX - This assert might be a lie. It might be useful to have a neg
+      // modifier with 0.0.
+      int Src1ModsVal = Src1Mods->getImm();
+      assert((Src1ModsVal == 0) && "Not expecting modifiers with immediates");
+
+      Src1Mods->setImm(Src0ModsVal);
+      Src0Mods->setImm(Src1ModsVal);
+    }
+
+    unsigned Reg = Src0.getReg();
+    unsigned SubReg = Src0.getSubReg();
+    if (Src1.isImm())
+      Src0.ChangeToImmediate(Src1.getImm());
+    else
+      llvm_unreachable("Should only have immediates");
+
+    Src1.ChangeToRegister(Reg, false);
+    Src1.setSubReg(SubReg);
+  } else {
+    MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
+  }
+
+  if (MI)
+    MI->setDesc(get(commuteOpcode(*MI)));
+
+  return MI;
+}
+
+// This needs to be implemented because the source modifiers may be inserted
+// between the true commutable operands, and the base
+// TargetInstrInfo::commuteInstruction uses it.
+bool SIInstrInfo::findCommutedOpIndices(MachineInstr *MI,
+                                        unsigned &SrcOpIdx1,
+                                        unsigned &SrcOpIdx2) const {
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MCID.isCommutable())
+    return false;
+
+  unsigned Opc = MI->getOpcode();
+  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
+  if (Src0Idx == -1)
+    return false;
+
+  // FIXME: Workaround TargetInstrInfo::commuteInstruction asserting on
+  // immediate.
+  if (!MI->getOperand(Src0Idx).isReg())
+    return false;
+
+  int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
+  if (Src1Idx == -1)
+    return false;
+
+  if (!MI->getOperand(Src1Idx).isReg())
+    return false;
+
+  // If any source modifiers are set, the generic instruction commuting won't
+  // understand how to copy the source modifiers.
+  if (hasModifiersSet(*MI, AMDGPU::OpName::src0_modifiers) ||
+      hasModifiersSet(*MI, AMDGPU::OpName::src1_modifiers))
+    return false;
+
+  SrcOpIdx1 = Src0Idx;
+  SrcOpIdx2 = Src1Idx;
+  return true;
+}
+
+MachineInstr *SIInstrInfo::buildMovInstr(MachineBasicBlock *MBB,
+                                         MachineBasicBlock::iterator I,
+                                         unsigned DstReg,
+                                         unsigned SrcReg) const {
+  return BuildMI(*MBB, I, MBB->findDebugLoc(I), get(AMDGPU::V_MOV_B32_e32),
+                 DstReg) .addReg(SrcReg);
+}
+
+bool SIInstrInfo::isMov(unsigned Opcode) const {
+  switch(Opcode) {
+  default: return false;
+  case AMDGPU::S_MOV_B32:
+  case AMDGPU::S_MOV_B64:
+  case AMDGPU::V_MOV_B32_e32:
+  case AMDGPU::V_MOV_B32_e64:
+    return true;
+  }
+}
+
+bool
+SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
+  return RC != &AMDGPU::EXECRegRegClass;
+}
+
+static void removeModOperands(MachineInstr &MI) {
+  unsigned Opc = MI.getOpcode();
+  int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc,
+                                              AMDGPU::OpName::src0_modifiers);
+  int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc,
+                                              AMDGPU::OpName::src1_modifiers);
+  int Src2ModIdx = AMDGPU::getNamedOperandIdx(Opc,
+                                              AMDGPU::OpName::src2_modifiers);
+
+  MI.RemoveOperand(Src2ModIdx);
+  MI.RemoveOperand(Src1ModIdx);
+  MI.RemoveOperand(Src0ModIdx);
+}
+
+bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
+                                unsigned Reg, MachineRegisterInfo *MRI) const {
+  if (!MRI->hasOneNonDBGUse(Reg))
+    return false;
+
+  unsigned Opc = UseMI->getOpcode();
+  if (Opc == AMDGPU::V_MAD_F32) {
+    // Don't fold if we are using source modifiers. The new VOP2 instructions
+    // don't have them.
+    if (hasModifiersSet(*UseMI, AMDGPU::OpName::src0_modifiers) ||
+        hasModifiersSet(*UseMI, AMDGPU::OpName::src1_modifiers) ||
+        hasModifiersSet(*UseMI, AMDGPU::OpName::src2_modifiers)) {
+      return false;
+    }
+
+    MachineOperand *Src0 = getNamedOperand(*UseMI, AMDGPU::OpName::src0);
+    MachineOperand *Src1 = getNamedOperand(*UseMI, AMDGPU::OpName::src1);
+    MachineOperand *Src2 = getNamedOperand(*UseMI, AMDGPU::OpName::src2);
+
+    // Multiplied part is the constant: Use v_madmk_f32
+    // We should only expect these to be on src0 due to canonicalizations.
+    if (Src0->isReg() && Src0->getReg() == Reg) {
+      if (!Src1->isReg() ||
+          (Src1->isReg() && RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))))
+        return false;
+
+      if (!Src2->isReg() ||
+          (Src2->isReg() && RI.isSGPRClass(MRI->getRegClass(Src2->getReg()))))
+        return false;
+
+      // We need to do some weird looking operand shuffling since the madmk
+      // operands are out of the normal expected order with the multiplied
+      // constant as the last operand.
+      //
+      // v_mad_f32 src0, src1, src2 -> v_madmk_f32 src0 * src2K + src1
+      // src0 -> src2 K
+      // src1 -> src0
+      // src2 -> src1
+
+      const int64_t Imm = DefMI->getOperand(1).getImm();
+
+      // FIXME: This would be a lot easier if we could return a new instruction
+      // instead of having to modify in place.
+
+      // Remove these first since they are at the end.
+      UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
+                                                      AMDGPU::OpName::omod));
+      UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
+                                                      AMDGPU::OpName::clamp));
+
+      unsigned Src1Reg = Src1->getReg();
+      unsigned Src1SubReg = Src1->getSubReg();
+      unsigned Src2Reg = Src2->getReg();
+      unsigned Src2SubReg = Src2->getSubReg();
+      Src0->setReg(Src1Reg);
+      Src0->setSubReg(Src1SubReg);
+      Src0->setIsKill(Src1->isKill());
+
+      Src1->setReg(Src2Reg);
+      Src1->setSubReg(Src2SubReg);
+      Src1->setIsKill(Src2->isKill());
+
+      Src2->ChangeToImmediate(Imm);
+
+      removeModOperands(*UseMI);
+      UseMI->setDesc(get(AMDGPU::V_MADMK_F32));
+
+      bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
+      if (DeleteDef)
+        DefMI->eraseFromParent();
+
+      return true;
+    }
+
+    // Added part is the constant: Use v_madak_f32
+    if (Src2->isReg() && Src2->getReg() == Reg) {
+      // Not allowed to use constant bus for another operand.
+      // We can however allow an inline immediate as src0.
+      if (!Src0->isImm() &&
+          (Src0->isReg() && RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))
+        return false;
+
+      if (!Src1->isReg() ||
+          (Src1->isReg() && RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))))
+        return false;
+
+      const int64_t Imm = DefMI->getOperand(1).getImm();
+
+      // FIXME: This would be a lot easier if we could return a new instruction
+      // instead of having to modify in place.
+
+      // Remove these first since they are at the end.
+      UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
+                                                      AMDGPU::OpName::omod));
+      UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
+                                                      AMDGPU::OpName::clamp));
+
+      Src2->ChangeToImmediate(Imm);
+
+      // These come before src2.
+      removeModOperands(*UseMI);
+      UseMI->setDesc(get(AMDGPU::V_MADAK_F32));
+
+      bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
+      if (DeleteDef)
+        DefMI->eraseFromParent();
+
+      return true;
+    }
+  }
+
+  return false;
+}
+
+bool
+SIInstrInfo::isTriviallyReMaterializable(const MachineInstr *MI,
+                                         AliasAnalysis *AA) const {
+  switch(MI->getOpcode()) {
+  default: return AMDGPUInstrInfo::isTriviallyReMaterializable(MI, AA);
+  case AMDGPU::S_MOV_B32:
+  case AMDGPU::S_MOV_B64:
+  case AMDGPU::V_MOV_B32_e32:
+    return MI->getOperand(1).isImm();
+  }
+}
+
+static bool offsetsDoNotOverlap(int WidthA, int OffsetA,
+                                int WidthB, int OffsetB) {
+  int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
+  int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
+  int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
+  return LowOffset + LowWidth <= HighOffset;
+}
+
+bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr *MIa,
+                                               MachineInstr *MIb) const {
+  unsigned BaseReg0, Offset0;
+  unsigned BaseReg1, Offset1;
+
+  if (getMemOpBaseRegImmOfs(MIa, BaseReg0, Offset0, &RI) &&
+      getMemOpBaseRegImmOfs(MIb, BaseReg1, Offset1, &RI)) {
+    assert(MIa->hasOneMemOperand() && MIb->hasOneMemOperand() &&
+           "read2 / write2 not expected here yet");
+    unsigned Width0 = (*MIa->memoperands_begin())->getSize();
+    unsigned Width1 = (*MIb->memoperands_begin())->getSize();
+    if (BaseReg0 == BaseReg1 &&
+        offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
+                                                  MachineInstr *MIb,
+                                                  AliasAnalysis *AA) const {
+  unsigned Opc0 = MIa->getOpcode();
+  unsigned Opc1 = MIb->getOpcode();
+
+  assert(MIa && (MIa->mayLoad() || MIa->mayStore()) &&
+         "MIa must load from or modify a memory location");
+  assert(MIb && (MIb->mayLoad() || MIb->mayStore()) &&
+         "MIb must load from or modify a memory location");
+
+  if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects())
+    return false;
+
+  // XXX - Can we relax this between address spaces?
+  if (MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef())
+    return false;
+
+  // TODO: Should we check the address space from the MachineMemOperand? That
+  // would allow us to distinguish objects we know don't alias based on the
+  // underlying addres space, even if it was lowered to a different one,
+  // e.g. private accesses lowered to use MUBUF instructions on a scratch
+  // buffer.
+  if (isDS(Opc0)) {
+    if (isDS(Opc1))
+      return checkInstOffsetsDoNotOverlap(MIa, MIb);
+
+    return !isFLAT(Opc1);
+  }
+
+  if (isMUBUF(Opc0) || isMTBUF(Opc0)) {
+    if (isMUBUF(Opc1) || isMTBUF(Opc1))
+      return checkInstOffsetsDoNotOverlap(MIa, MIb);
+
+    return !isFLAT(Opc1) && !isSMRD(Opc1);
+  }
+
+  if (isSMRD(Opc0)) {
+    if (isSMRD(Opc1))
+      return checkInstOffsetsDoNotOverlap(MIa, MIb);
+
+    return !isFLAT(Opc1) && !isMUBUF(Opc0) && !isMTBUF(Opc0);
+  }
+
+  if (isFLAT(Opc0)) {
+    if (isFLAT(Opc1))
+      return checkInstOffsetsDoNotOverlap(MIa, MIb);
+
+    return false;
+  }
+
+  return false;
+}
+
+bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
+  int64_t SVal = Imm.getSExtValue();
+  if (SVal >= -16 && SVal <= 64)
+    return true;
+
+  if (Imm.getBitWidth() == 64) {
+    uint64_t Val = Imm.getZExtValue();
+    return (DoubleToBits(0.0) == Val) ||
+           (DoubleToBits(1.0) == Val) ||
+           (DoubleToBits(-1.0) == Val) ||
+           (DoubleToBits(0.5) == Val) ||
+           (DoubleToBits(-0.5) == Val) ||
+           (DoubleToBits(2.0) == Val) ||
+           (DoubleToBits(-2.0) == Val) ||
+           (DoubleToBits(4.0) == Val) ||
+           (DoubleToBits(-4.0) == Val);
+  }
+
+  // The actual type of the operand does not seem to matter as long
+  // as the bits match one of the inline immediate values.  For example:
+  //
+  // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
+  // so it is a legal inline immediate.
+  //
+  // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
+  // floating-point, so it is a legal inline immediate.
+  uint32_t Val = Imm.getZExtValue();
+
+  return (FloatToBits(0.0f) == Val) ||
+         (FloatToBits(1.0f) == Val) ||
+         (FloatToBits(-1.0f) == Val) ||
+         (FloatToBits(0.5f) == Val) ||
+         (FloatToBits(-0.5f) == Val) ||
+         (FloatToBits(2.0f) == Val) ||
+         (FloatToBits(-2.0f) == Val) ||
+         (FloatToBits(4.0f) == Val) ||
+         (FloatToBits(-4.0f) == Val);
+}
+
+bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
+                                   unsigned OpSize) const {
+  if (MO.isImm()) {
+    // MachineOperand provides no way to tell the true operand size, since it
+    // only records a 64-bit value. We need to know the size to determine if a
+    // 32-bit floating point immediate bit pattern is legal for an integer
+    // immediate. It would be for any 32-bit integer operand, but would not be
+    // for a 64-bit one.
+
+    unsigned BitSize = 8 * OpSize;
+    return isInlineConstant(APInt(BitSize, MO.getImm(), true));
+  }
+
+  return false;
+}
+
+bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO,
+                                    unsigned OpSize) const {
+  return MO.isImm() && !isInlineConstant(MO, OpSize);
+}
+
+static bool compareMachineOp(const MachineOperand &Op0,
+                             const MachineOperand &Op1) {
+  if (Op0.getType() != Op1.getType())
+    return false;
+
+  switch (Op0.getType()) {
+  case MachineOperand::MO_Register:
+    return Op0.getReg() == Op1.getReg();
+  case MachineOperand::MO_Immediate:
+    return Op0.getImm() == Op1.getImm();
+  default:
+    llvm_unreachable("Didn't expect to be comparing these operand types");
+  }
+}
+
+bool SIInstrInfo::isImmOperandLegal(const MachineInstr *MI, unsigned OpNo,
+                                 const MachineOperand &MO) const {
+  const MCOperandInfo &OpInfo = get(MI->getOpcode()).OpInfo[OpNo];
+
+  assert(MO.isImm() || MO.isTargetIndex() || MO.isFI());
+
+  if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE)
+    return true;
+
+  if (OpInfo.RegClass < 0)
+    return false;
+
+  unsigned OpSize = RI.getRegClass(OpInfo.RegClass)->getSize();
+  if (isLiteralConstant(MO, OpSize))
+    return RI.opCanUseLiteralConstant(OpInfo.OperandType);
+
+  return RI.opCanUseInlineConstant(OpInfo.OperandType);
+}
+
+bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
+  int Op32 = AMDGPU::getVOPe32(Opcode);
+  if (Op32 == -1)
+    return false;
+
+  return pseudoToMCOpcode(Op32) != -1;
+}
+
+bool SIInstrInfo::hasModifiers(unsigned Opcode) const {
+  // The src0_modifier operand is present on all instructions
+  // that have modifiers.
+
+  return AMDGPU::getNamedOperandIdx(Opcode,
+                                    AMDGPU::OpName::src0_modifiers) != -1;
+}
+
+bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI,
+                                  unsigned OpName) const {
+  const MachineOperand *Mods = getNamedOperand(MI, OpName);
+  return Mods && Mods->getImm();
+}
+
+bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
+                                  const MachineOperand &MO,
+                                  unsigned OpSize) const {
+  // Literal constants use the constant bus.
+  if (isLiteralConstant(MO, OpSize))
+    return true;
+
+  if (!MO.isReg() || !MO.isUse())
+    return false;
+
+  if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+    return RI.isSGPRClass(MRI.getRegClass(MO.getReg()));
+
+  // FLAT_SCR is just an SGPR pair.
+  if (!MO.isImplicit() && (MO.getReg() == AMDGPU::FLAT_SCR))
+    return true;
+
+  // EXEC register uses the constant bus.
+  if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
+    return true;
+
+  // SGPRs use the constant bus
+  if (MO.getReg() == AMDGPU::M0 || MO.getReg() == AMDGPU::VCC ||
+      (!MO.isImplicit() &&
+      (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) ||
+       AMDGPU::SGPR_64RegClass.contains(MO.getReg())))) {
+    return true;
+  }
+
+  return false;
+}
+
+bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
+                                    StringRef &ErrInfo) const {
+  uint16_t Opcode = MI->getOpcode();
+  const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+  int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
+  int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
+  int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
+
+  // Make sure the number of operands is correct.
+  const MCInstrDesc &Desc = get(Opcode);
+  if (!Desc.isVariadic() &&
+      Desc.getNumOperands() != MI->getNumExplicitOperands()) {
+     ErrInfo = "Instruction has wrong number of operands.";
+     return false;
+  }
+
+  // Make sure the register classes are correct
+  for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) {
+    if (MI->getOperand(i).isFPImm()) {
+      ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast "
+                "all fp values to integers.";
+      return false;
+    }
+
+    int RegClass = Desc.OpInfo[i].RegClass;
+
+    switch (Desc.OpInfo[i].OperandType) {
+    case MCOI::OPERAND_REGISTER:
+      if (MI->getOperand(i).isImm()) {
+        ErrInfo = "Illegal immediate value for operand.";
+        return false;
+      }
+      break;
+    case AMDGPU::OPERAND_REG_IMM32:
+      break;
+    case AMDGPU::OPERAND_REG_INLINE_C:
+      if (isLiteralConstant(MI->getOperand(i),
+                            RI.getRegClass(RegClass)->getSize())) {
+        ErrInfo = "Illegal immediate value for operand.";
+        return false;
+      }
+      break;
+    case MCOI::OPERAND_IMMEDIATE:
+      // Check if this operand is an immediate.
+      // FrameIndex operands will be replaced by immediates, so they are
+      // allowed.
+      if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFI()) {
+        ErrInfo = "Expected immediate, but got non-immediate";
+        return false;
+      }
+      // Fall-through
+    default:
+      continue;
+    }
+
+    if (!MI->getOperand(i).isReg())
+      continue;
+
+    if (RegClass != -1) {
+      unsigned Reg = MI->getOperand(i).getReg();
+      if (TargetRegisterInfo::isVirtualRegister(Reg))
+        continue;
+
+      const TargetRegisterClass *RC = RI.getRegClass(RegClass);
+      if (!RC->contains(Reg)) {
+        ErrInfo = "Operand has incorrect register class.";
+        return false;
+      }
+    }
+  }
+
+
+  // Verify VOP*
+  if (isVOP1(Opcode) || isVOP2(Opcode) || isVOP3(Opcode) || isVOPC(Opcode)) {
+    // Only look at the true operands. Only a real operand can use the constant
+    // bus, and we don't want to check pseudo-operands like the source modifier
+    // flags.
+    const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
+
+    unsigned ConstantBusCount = 0;
+    unsigned SGPRUsed = AMDGPU::NoRegister;
+    for (int OpIdx : OpIndices) {
+      if (OpIdx == -1)
+        break;
+      const MachineOperand &MO = MI->getOperand(OpIdx);
+      if (usesConstantBus(MRI, MO, getOpSize(Opcode, OpIdx))) {
+        if (MO.isReg()) {
+          if (MO.getReg() != SGPRUsed)
+            ++ConstantBusCount;
+          SGPRUsed = MO.getReg();
+        } else {
+          ++ConstantBusCount;
+        }
+      }
+    }
+    if (ConstantBusCount > 1) {
+      ErrInfo = "VOP* instruction uses the constant bus more than once";
+      return false;
+    }
+  }
+
+  // Verify misc. restrictions on specific instructions.
+  if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 ||
+      Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) {
+    const MachineOperand &Src0 = MI->getOperand(Src0Idx);
+    const MachineOperand &Src1 = MI->getOperand(Src1Idx);
+    const MachineOperand &Src2 = MI->getOperand(Src2Idx);
+    if (Src0.isReg() && Src1.isReg() && Src2.isReg()) {
+      if (!compareMachineOp(Src0, Src1) &&
+          !compareMachineOp(Src0, Src2)) {
+        ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2";
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+  default: return AMDGPU::INSTRUCTION_LIST_END;
+  case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE;
+  case AMDGPU::COPY: return AMDGPU::COPY;
+  case AMDGPU::PHI: return AMDGPU::PHI;
+  case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG;
+  case AMDGPU::S_MOV_B32:
+    return MI.getOperand(1).isReg() ?
+           AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
+  case AMDGPU::S_ADD_I32:
+  case AMDGPU::S_ADD_U32: return AMDGPU::V_ADD_I32_e32;
+  case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32;
+  case AMDGPU::S_SUB_I32:
+  case AMDGPU::S_SUB_U32: return AMDGPU::V_SUB_I32_e32;
+  case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
+  case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32;
+  case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e32;
+  case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e32;
+  case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e32;
+  case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e32;
+  case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e32;
+  case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e32;
+  case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e32;
+  case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
+  case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
+  case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
+  case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64;
+  case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32;
+  case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64;
+  case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32;
+  case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
+  case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
+  case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
+  case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64;
+  case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32;
+  case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
+  case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32;
+  case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32;
+  case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32;
+  case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32;
+  case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32;
+  case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32;
+  case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32;
+  case AMDGPU::S_LOAD_DWORD_IMM:
+  case AMDGPU::S_LOAD_DWORD_SGPR: return AMDGPU::BUFFER_LOAD_DWORD_ADDR64;
+  case AMDGPU::S_LOAD_DWORDX2_IMM:
+  case AMDGPU::S_LOAD_DWORDX2_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64;
+  case AMDGPU::S_LOAD_DWORDX4_IMM:
+  case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64;
+  case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
+  case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
+  case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
+  case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64;
+  }
+}
+
+bool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr &MI) const {
+  return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END;
+}
+
+const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
+                                                      unsigned OpNo) const {
+  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+  const MCInstrDesc &Desc = get(MI.getOpcode());
+  if (MI.isVariadic() || OpNo >= Desc.getNumOperands() ||
+      Desc.OpInfo[OpNo].RegClass == -1) {
+    unsigned Reg = MI.getOperand(OpNo).getReg();
+
+    if (TargetRegisterInfo::isVirtualRegister(Reg))
+      return MRI.getRegClass(Reg);
+    return RI.getPhysRegClass(Reg);
+  }
+
+  unsigned RCID = Desc.OpInfo[OpNo].RegClass;
+  return RI.getRegClass(RCID);
+}
+
+bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const {
+  switch (MI.getOpcode()) {
+  case AMDGPU::COPY:
+  case AMDGPU::REG_SEQUENCE:
+  case AMDGPU::PHI:
+  case AMDGPU::INSERT_SUBREG:
+    return RI.hasVGPRs(getOpRegClass(MI, 0));
+  default:
+    return RI.hasVGPRs(getOpRegClass(MI, OpNo));
+  }
+}
+
+void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const {
+  MachineBasicBlock::iterator I = MI;
+  MachineBasicBlock *MBB = MI->getParent();
+  MachineOperand &MO = MI->getOperand(OpIdx);
+  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+  unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass;
+  const TargetRegisterClass *RC = RI.getRegClass(RCID);
+  unsigned Opcode = AMDGPU::V_MOV_B32_e32;
+  if (MO.isReg())
+    Opcode = AMDGPU::COPY;
+  else if (RI.isSGPRClass(RC))
+    Opcode = AMDGPU::S_MOV_B32;
+
+
+  const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC);
+  if (RI.getCommonSubClass(&AMDGPU::VReg_64RegClass, VRC))
+    VRC = &AMDGPU::VReg_64RegClass;
+  else
+    VRC = &AMDGPU::VGPR_32RegClass;
+
+  unsigned Reg = MRI.createVirtualRegister(VRC);
+  DebugLoc DL = MBB->findDebugLoc(I);
+  BuildMI(*MI->getParent(), I, DL, get(Opcode), Reg)
+    .addOperand(MO);
+  MO.ChangeToRegister(Reg, false);
+}
+
+unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
+                                         MachineRegisterInfo &MRI,
+                                         MachineOperand &SuperReg,
+                                         const TargetRegisterClass *SuperRC,
+                                         unsigned SubIdx,
+                                         const TargetRegisterClass *SubRC)
+                                         const {
+  assert(SuperReg.isReg());
+
+  unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
+  unsigned SubReg = MRI.createVirtualRegister(SubRC);
+
+  // Just in case the super register is itself a sub-register, copy it to a new
+  // value so we don't need to worry about merging its subreg index with the
+  // SubIdx passed to this function. The register coalescer should be able to
+  // eliminate this extra copy.
+  MachineBasicBlock *MBB = MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+
+  BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg)
+    .addReg(SuperReg.getReg(), 0, SuperReg.getSubReg());
+
+  BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
+    .addReg(NewSuperReg, 0, SubIdx);
+
+  return SubReg;
+}
+
+MachineOperand SIInstrInfo::buildExtractSubRegOrImm(
+  MachineBasicBlock::iterator MII,
+  MachineRegisterInfo &MRI,
+  MachineOperand &Op,
+  const TargetRegisterClass *SuperRC,
+  unsigned SubIdx,
+  const TargetRegisterClass *SubRC) const {
+  if (Op.isImm()) {
+    // XXX - Is there a better way to do this?
+    if (SubIdx == AMDGPU::sub0)
+      return MachineOperand::CreateImm(Op.getImm() & 0xFFFFFFFF);
+    if (SubIdx == AMDGPU::sub1)
+      return MachineOperand::CreateImm(Op.getImm() >> 32);
+
+    llvm_unreachable("Unhandled register index for immediate");
+  }
+
+  unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC,
+                                       SubIdx, SubRC);
+  return MachineOperand::CreateReg(SubReg, false);
+}
+
+unsigned SIInstrInfo::split64BitImm(SmallVectorImpl<MachineInstr *> &Worklist,
+                                    MachineBasicBlock::iterator MI,
+                                    MachineRegisterInfo &MRI,
+                                    const TargetRegisterClass *RC,
+                                    const MachineOperand &Op) const {
+  MachineBasicBlock *MBB = MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+  unsigned LoDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+  unsigned HiDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+  unsigned Dst = MRI.createVirtualRegister(RC);
+
+  MachineInstr *Lo = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32),
+                             LoDst)
+    .addImm(Op.getImm() & 0xFFFFFFFF);
+  MachineInstr *Hi = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32),
+                             HiDst)
+    .addImm(Op.getImm() >> 32);
+
+  BuildMI(*MBB, MI, DL, get(TargetOpcode::REG_SEQUENCE), Dst)
+    .addReg(LoDst)
+    .addImm(AMDGPU::sub0)
+    .addReg(HiDst)
+    .addImm(AMDGPU::sub1);
+
+  Worklist.push_back(Lo);
+  Worklist.push_back(Hi);
+
+  return Dst;
+}
+
+// Change the order of operands from (0, 1, 2) to (0, 2, 1)
+void SIInstrInfo::swapOperands(MachineBasicBlock::iterator Inst) const {
+  assert(Inst->getNumExplicitOperands() == 3);
+  MachineOperand Op1 = Inst->getOperand(1);
+  Inst->RemoveOperand(1);
+  Inst->addOperand(Op1);
+}
+
+bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
+                                 const MachineOperand *MO) const {
+  const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+  const MCInstrDesc &InstDesc = get(MI->getOpcode());
+  const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx];
+  const TargetRegisterClass *DefinedRC =
+      OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr;
+  if (!MO)
+    MO = &MI->getOperand(OpIdx);
+
+  if (isVALU(InstDesc.Opcode) &&
+      usesConstantBus(MRI, *MO, DefinedRC->getSize())) {
+    unsigned SGPRUsed =
+        MO->isReg() ? MO->getReg() : (unsigned)AMDGPU::NoRegister;
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      if (i == OpIdx)
+        continue;
+      const MachineOperand &Op = MI->getOperand(i);
+      if (Op.isReg() && Op.getReg() != SGPRUsed &&
+          usesConstantBus(MRI, Op, getOpSize(*MI, i))) {
+        return false;
+      }
+    }
+  }
+
+  if (MO->isReg()) {
+    assert(DefinedRC);
+    const TargetRegisterClass *RC = MRI.getRegClass(MO->getReg());
+
+    // In order to be legal, the common sub-class must be equal to the
+    // class of the current operand.  For example:
+    //
+    // v_mov_b32 s0 ; Operand defined as vsrc_32
+    //              ; RI.getCommonSubClass(s0,vsrc_32) = sgpr ; LEGAL
+    //
+    // s_sendmsg 0, s0 ; Operand defined as m0reg
+    //                 ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
+
+    return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
+  }
+
+
+  // Handle non-register types that are treated like immediates.
+  assert(MO->isImm() || MO->isTargetIndex() || MO->isFI());
+
+  if (!DefinedRC) {
+    // This operand expects an immediate.
+    return true;
+  }
+
+  return isImmOperandLegal(MI, OpIdx, *MO);
+}
+
+void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
+  MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+
+  int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+                                           AMDGPU::OpName::src0);
+  int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+                                           AMDGPU::OpName::src1);
+  int Src2Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+                                           AMDGPU::OpName::src2);
+
+  // Legalize VOP2
+  if (isVOP2(MI->getOpcode()) && Src1Idx != -1) {
+    // Legalize src0
+    if (!isOperandLegal(MI, Src0Idx))
+      legalizeOpWithMove(MI, Src0Idx);
+
+    // Legalize src1
+    if (isOperandLegal(MI, Src1Idx))
+      return;
+
+    // Usually src0 of VOP2 instructions allow more types of inputs
+    // than src1, so try to commute the instruction to decrease our
+    // chances of having to insert a MOV instruction to legalize src1.
+    if (MI->isCommutable()) {
+      if (commuteInstruction(MI))
+        // If we are successful in commuting, then we know MI is legal, so
+        // we are done.
+        return;
+    }
+
+    legalizeOpWithMove(MI, Src1Idx);
+    return;
+  }
+
+  // XXX - Do any VOP3 instructions read VCC?
+  // Legalize VOP3
+  if (isVOP3(MI->getOpcode())) {
+    int VOP3Idx[3] = { Src0Idx, Src1Idx, Src2Idx };
+
+    // Find the one SGPR operand we are allowed to use.
+    unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx);
+
+    for (unsigned i = 0; i < 3; ++i) {
+      int Idx = VOP3Idx[i];
+      if (Idx == -1)
+        break;
+      MachineOperand &MO = MI->getOperand(Idx);
+
+      if (MO.isReg()) {
+        if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
+          continue; // VGPRs are legal
+
+        assert(MO.getReg() != AMDGPU::SCC && "SCC operand to VOP3 instruction");
+
+        if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) {
+          SGPRReg = MO.getReg();
+          // We can use one SGPR in each VOP3 instruction.
+          continue;
+        }
+      } else if (!isLiteralConstant(MO, getOpSize(MI->getOpcode(), Idx))) {
+        // If it is not a register and not a literal constant, then it must be
+        // an inline constant which is always legal.
+        continue;
+      }
+      // If we make it this far, then the operand is not legal and we must
+      // legalize it.
+      legalizeOpWithMove(MI, Idx);
+    }
+  }
+
+  // Legalize REG_SEQUENCE and PHI
+  // The register class of the operands much be the same type as the register
+  // class of the output.
+  if (MI->getOpcode() == AMDGPU::REG_SEQUENCE ||
+      MI->getOpcode() == AMDGPU::PHI) {
+    const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr;
+    for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
+      if (!MI->getOperand(i).isReg() ||
+          !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg()))
+        continue;
+      const TargetRegisterClass *OpRC =
+              MRI.getRegClass(MI->getOperand(i).getReg());
+      if (RI.hasVGPRs(OpRC)) {
+        VRC = OpRC;
+      } else {
+        SRC = OpRC;
+      }
+    }
+
+    // If any of the operands are VGPR registers, then they all most be
+    // otherwise we will create illegal VGPR->SGPR copies when legalizing
+    // them.
+    if (VRC || !RI.isSGPRClass(getOpRegClass(*MI, 0))) {
+      if (!VRC) {
+        assert(SRC);
+        VRC = RI.getEquivalentVGPRClass(SRC);
+      }
+      RC = VRC;
+    } else {
+      RC = SRC;
+    }
+
+    // Update all the operands so they have the same type.
+    for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
+      if (!MI->getOperand(i).isReg() ||
+          !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg()))
+        continue;
+      unsigned DstReg = MRI.createVirtualRegister(RC);
+      MachineBasicBlock *InsertBB;
+      MachineBasicBlock::iterator Insert;
+      if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) {
+        InsertBB = MI->getParent();
+        Insert = MI;
+      } else {
+        // MI is a PHI instruction.
+        InsertBB = MI->getOperand(i + 1).getMBB();
+        Insert = InsertBB->getFirstTerminator();
+      }
+      BuildMI(*InsertBB, Insert, MI->getDebugLoc(),
+              get(AMDGPU::COPY), DstReg)
+              .addOperand(MI->getOperand(i));
+      MI->getOperand(i).setReg(DstReg);
+    }
+  }
+
+  // Legalize INSERT_SUBREG
+  // src0 must have the same register class as dst
+  if (MI->getOpcode() == AMDGPU::INSERT_SUBREG) {
+    unsigned Dst = MI->getOperand(0).getReg();
+    unsigned Src0 = MI->getOperand(1).getReg();
+    const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
+    const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0);
+    if (DstRC != Src0RC) {
+      MachineBasicBlock &MBB = *MI->getParent();
+      unsigned NewSrc0 = MRI.createVirtualRegister(DstRC);
+      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), NewSrc0)
+              .addReg(Src0);
+      MI->getOperand(1).setReg(NewSrc0);
+    }
+    return;
+  }
+
+  // Legalize MUBUF* instructions
+  // FIXME: If we start using the non-addr64 instructions for compute, we
+  // may need to legalize them here.
+  int SRsrcIdx =
+      AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::srsrc);
+  if (SRsrcIdx != -1) {
+    // We have an MUBUF instruction
+    MachineOperand *SRsrc = &MI->getOperand(SRsrcIdx);
+    unsigned SRsrcRC = get(MI->getOpcode()).OpInfo[SRsrcIdx].RegClass;
+    if (RI.getCommonSubClass(MRI.getRegClass(SRsrc->getReg()),
+                                             RI.getRegClass(SRsrcRC))) {
+      // The operands are legal.
+      // FIXME: We may need to legalize operands besided srsrc.
+      return;
+    }
+
+    MachineBasicBlock &MBB = *MI->getParent();
+    // Extract the ptr from the resource descriptor.
+
+    // SRsrcPtrLo = srsrc:sub0
+    unsigned SRsrcPtrLo = buildExtractSubReg(MI, MRI, *SRsrc,
+        &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VGPR_32RegClass);
+
+    // SRsrcPtrHi = srsrc:sub1
+    unsigned SRsrcPtrHi = buildExtractSubReg(MI, MRI, *SRsrc,
+        &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VGPR_32RegClass);
+
+    // Create an empty resource descriptor
+    unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+    unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+    unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+    unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
+    uint64_t RsrcDataFormat = getDefaultRsrcDataFormat();
+
+    // Zero64 = 0
+    BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64),
+            Zero64)
+            .addImm(0);
+
+    // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
+    BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
+            SRsrcFormatLo)
+            .addImm(RsrcDataFormat & 0xFFFFFFFF);
+
+    // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
+    BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
+            SRsrcFormatHi)
+            .addImm(RsrcDataFormat >> 32);
+
+    // NewSRsrc = {Zero64, SRsrcFormat}
+    BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
+            NewSRsrc)
+            .addReg(Zero64)
+            .addImm(AMDGPU::sub0_sub1)
+            .addReg(SRsrcFormatLo)
+            .addImm(AMDGPU::sub2)
+            .addReg(SRsrcFormatHi)
+            .addImm(AMDGPU::sub3);
+
+    MachineOperand *VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr);
+    unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
+    unsigned NewVAddrLo;
+    unsigned NewVAddrHi;
+    if (VAddr) {
+      // This is already an ADDR64 instruction so we need to add the pointer
+      // extracted from the resource descriptor to the current value of VAddr.
+      NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+      NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
+      // NewVaddrLo = SRsrcPtrLo + VAddr:sub0
+      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32),
+              NewVAddrLo)
+              .addReg(SRsrcPtrLo)
+              .addReg(VAddr->getReg(), 0, AMDGPU::sub0)
+              .addReg(AMDGPU::VCC, RegState::ImplicitDefine);
+
+      // NewVaddrHi = SRsrcPtrHi + VAddr:sub1
+      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32),
+              NewVAddrHi)
+              .addReg(SRsrcPtrHi)
+              .addReg(VAddr->getReg(), 0, AMDGPU::sub1)
+              .addReg(AMDGPU::VCC, RegState::ImplicitDefine)
+              .addReg(AMDGPU::VCC, RegState::Implicit);
+
+    } else {
+      // This instructions is the _OFFSET variant, so we need to convert it to
+      // ADDR64.
+      MachineOperand *VData = getNamedOperand(*MI, AMDGPU::OpName::vdata);
+      MachineOperand *Offset = getNamedOperand(*MI, AMDGPU::OpName::offset);
+      MachineOperand *SOffset = getNamedOperand(*MI, AMDGPU::OpName::soffset);
+
+      // Create the new instruction.
+      unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI->getOpcode());
+      MachineInstr *Addr64 =
+          BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
+                  .addOperand(*VData)
+                  .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
+                                              // This will be replaced later
+                                              // with the new value of vaddr.
+                  .addOperand(*SRsrc)
+                  .addOperand(*SOffset)
+                  .addOperand(*Offset)
+                  .addImm(0) // glc
+                  .addImm(0) // slc
+                  .addImm(0); // tfe
+
+      MI->removeFromParent();
+      MI = Addr64;
+
+      NewVAddrLo = SRsrcPtrLo;
+      NewVAddrHi = SRsrcPtrHi;
+      VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr);
+      SRsrc = getNamedOperand(*MI, AMDGPU::OpName::srsrc);
+    }
+
+    // NewVaddr = {NewVaddrHi, NewVaddrLo}
+    BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
+            NewVAddr)
+            .addReg(NewVAddrLo)
+            .addImm(AMDGPU::sub0)
+            .addReg(NewVAddrHi)
+            .addImm(AMDGPU::sub1);
+
+
+    // Update the instruction to use NewVaddr
+    VAddr->setReg(NewVAddr);
+    // Update the instruction to use NewSRsrc
+    SRsrc->setReg(NewSRsrc);
+  }
+}
+
+void SIInstrInfo::splitSMRD(MachineInstr *MI,
+                            const TargetRegisterClass *HalfRC,
+                            unsigned HalfImmOp, unsigned HalfSGPROp,
+                            MachineInstr *&Lo, MachineInstr *&Hi) const {
+
+  DebugLoc DL = MI->getDebugLoc();
+  MachineBasicBlock *MBB = MI->getParent();
+  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+  unsigned RegLo = MRI.createVirtualRegister(HalfRC);
+  unsigned RegHi = MRI.createVirtualRegister(HalfRC);
+  unsigned HalfSize = HalfRC->getSize();
+  const MachineOperand *OffOp =
+      getNamedOperand(*MI, AMDGPU::OpName::offset);
+  const MachineOperand *SBase = getNamedOperand(*MI, AMDGPU::OpName::sbase);
+
+  // The SMRD has an 8-bit offset in dwords on SI and a 20-bit offset in bytes
+  // on VI.
+
+  bool IsKill = SBase->isKill();
+  if (OffOp) {
+    bool isVI =
+        MBB->getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() >=
+        AMDGPUSubtarget::VOLCANIC_ISLANDS;
+    unsigned OffScale = isVI ? 1 : 4;
+    // Handle the _IMM variant
+    unsigned LoOffset = OffOp->getImm() * OffScale;
+    unsigned HiOffset = LoOffset + HalfSize;
+    Lo = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegLo)
+                  // Use addReg instead of addOperand
+                  // to make sure kill flag is cleared.
+                  .addReg(SBase->getReg(), 0, SBase->getSubReg())
+                  .addImm(LoOffset / OffScale);
+
+    if (!isUInt<20>(HiOffset) || (!isVI && !isUInt<8>(HiOffset / OffScale))) {
+      unsigned OffsetSGPR =
+          MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+      BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), OffsetSGPR)
+              .addImm(HiOffset); // The offset in register is in bytes.
+      Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegHi)
+                    .addReg(SBase->getReg(), getKillRegState(IsKill),
+                            SBase->getSubReg())
+                    .addReg(OffsetSGPR);
+    } else {
+      Hi = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegHi)
+                     .addReg(SBase->getReg(), getKillRegState(IsKill),
+                             SBase->getSubReg())
+                     .addImm(HiOffset / OffScale);
+    }
+  } else {
+    // Handle the _SGPR variant
+    MachineOperand *SOff = getNamedOperand(*MI, AMDGPU::OpName::soff);
+    Lo = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegLo)
+                  .addReg(SBase->getReg(), 0, SBase->getSubReg())
+                  .addOperand(*SOff);
+    unsigned OffsetSGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+    BuildMI(*MBB, MI, DL, get(AMDGPU::S_ADD_I32), OffsetSGPR)
+            .addOperand(*SOff)
+            .addImm(HalfSize);
+    Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp))
+                  .addReg(SBase->getReg(), getKillRegState(IsKill),
+                          SBase->getSubReg())
+                  .addReg(OffsetSGPR);
+  }
+
+  unsigned SubLo, SubHi;
+  switch (HalfSize) {
+    case 4:
+      SubLo = AMDGPU::sub0;
+      SubHi = AMDGPU::sub1;
+      break;
+    case 8:
+      SubLo = AMDGPU::sub0_sub1;
+      SubHi = AMDGPU::sub2_sub3;
+      break;
+    case 16:
+      SubLo = AMDGPU::sub0_sub1_sub2_sub3;
+      SubHi = AMDGPU::sub4_sub5_sub6_sub7;
+      break;
+    case 32:
+      SubLo = AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
+      SubHi = AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15;
+      break;
+    default:
+      llvm_unreachable("Unhandled HalfSize");
+  }
+
+  BuildMI(*MBB, MI, DL, get(AMDGPU::REG_SEQUENCE))
+          .addOperand(MI->getOperand(0))
+          .addReg(RegLo)
+          .addImm(SubLo)
+          .addReg(RegHi)
+          .addImm(SubHi);
+}
+
+void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const {
+  MachineBasicBlock *MBB = MI->getParent();
+  switch (MI->getOpcode()) {
+    case AMDGPU::S_LOAD_DWORD_IMM:
+    case AMDGPU::S_LOAD_DWORD_SGPR:
+    case AMDGPU::S_LOAD_DWORDX2_IMM:
+    case AMDGPU::S_LOAD_DWORDX2_SGPR:
+    case AMDGPU::S_LOAD_DWORDX4_IMM:
+    case AMDGPU::S_LOAD_DWORDX4_SGPR: {
+      unsigned NewOpcode = getVALUOp(*MI);
+      unsigned RegOffset;
+      unsigned ImmOffset;
+
+      if (MI->getOperand(2).isReg()) {
+        RegOffset = MI->getOperand(2).getReg();
+        ImmOffset = 0;
+      } else {
+        assert(MI->getOperand(2).isImm());
+        // SMRD instructions take a dword offsets on SI and byte offset on VI
+        // and MUBUF instructions always take a byte offset.
+        ImmOffset = MI->getOperand(2).getImm();
+        if (MBB->getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() <=
+            AMDGPUSubtarget::SEA_ISLANDS)
+          ImmOffset <<= 2;
+        RegOffset = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+
+        if (isUInt<12>(ImmOffset)) {
+          BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
+                  RegOffset)
+                  .addImm(0);
+        } else {
+          BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
+                  RegOffset)
+                  .addImm(ImmOffset);
+          ImmOffset = 0;
+        }
+      }
+
+      unsigned SRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
+      unsigned DWord0 = RegOffset;
+      unsigned DWord1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+      unsigned DWord2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+      unsigned DWord3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+      uint64_t RsrcDataFormat = getDefaultRsrcDataFormat();
+
+      BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord1)
+              .addImm(0);
+      BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord2)
+              .addImm(RsrcDataFormat & 0xFFFFFFFF);
+      BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord3)
+              .addImm(RsrcDataFormat >> 32);
+      BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), SRsrc)
+              .addReg(DWord0)
+              .addImm(AMDGPU::sub0)
+              .addReg(DWord1)
+              .addImm(AMDGPU::sub1)
+              .addReg(DWord2)
+              .addImm(AMDGPU::sub2)
+              .addReg(DWord3)
+              .addImm(AMDGPU::sub3);
+      MI->setDesc(get(NewOpcode));
+      if (MI->getOperand(2).isReg()) {
+        MI->getOperand(2).setReg(SRsrc);
+      } else {
+        MI->getOperand(2).ChangeToRegister(SRsrc, false);
+      }
+      MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0));
+      MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset));
+      MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // glc
+      MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // slc
+      MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // tfe
+
+      const TargetRegisterClass *NewDstRC =
+          RI.getRegClass(get(NewOpcode).OpInfo[0].RegClass);
+
+      unsigned DstReg = MI->getOperand(0).getReg();
+      unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC);
+      MRI.replaceRegWith(DstReg, NewDstReg);
+      break;
+    }
+    case AMDGPU::S_LOAD_DWORDX8_IMM:
+    case AMDGPU::S_LOAD_DWORDX8_SGPR: {
+      MachineInstr *Lo, *Hi;
+      splitSMRD(MI, &AMDGPU::SReg_128RegClass, AMDGPU::S_LOAD_DWORDX4_IMM,
+                AMDGPU::S_LOAD_DWORDX4_SGPR, Lo, Hi);
+      MI->eraseFromParent();
+      moveSMRDToVALU(Lo, MRI);
+      moveSMRDToVALU(Hi, MRI);
+      break;
+    }
+
+    case AMDGPU::S_LOAD_DWORDX16_IMM:
+    case AMDGPU::S_LOAD_DWORDX16_SGPR: {
+      MachineInstr *Lo, *Hi;
+      splitSMRD(MI, &AMDGPU::SReg_256RegClass, AMDGPU::S_LOAD_DWORDX8_IMM,
+                AMDGPU::S_LOAD_DWORDX8_SGPR, Lo, Hi);
+      MI->eraseFromParent();
+      moveSMRDToVALU(Lo, MRI);
+      moveSMRDToVALU(Hi, MRI);
+      break;
+    }
+  }
+}
+
+void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
+  SmallVector<MachineInstr *, 128> Worklist;
+  Worklist.push_back(&TopInst);
+
+  while (!Worklist.empty()) {
+    MachineInstr *Inst = Worklist.pop_back_val();
+    MachineBasicBlock *MBB = Inst->getParent();
+    MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+    unsigned Opcode = Inst->getOpcode();
+    unsigned NewOpcode = getVALUOp(*Inst);
+
+    // Handle some special cases
+    switch (Opcode) {
+    default:
+      if (isSMRD(Inst->getOpcode())) {
+        moveSMRDToVALU(Inst, MRI);
+      }
+      break;
+    case AMDGPU::S_MOV_B64: {
+      DebugLoc DL = Inst->getDebugLoc();
+
+      // If the source operand is a register we can replace this with a
+      // copy.
+      if (Inst->getOperand(1).isReg()) {
+        MachineInstr *Copy = BuildMI(*MBB, Inst, DL, get(TargetOpcode::COPY))
+          .addOperand(Inst->getOperand(0))
+          .addOperand(Inst->getOperand(1));
+        Worklist.push_back(Copy);
+      } else {
+        // Otherwise, we need to split this into two movs, because there is
+        // no 64-bit VALU move instruction.
+        unsigned Reg = Inst->getOperand(0).getReg();
+        unsigned Dst = split64BitImm(Worklist,
+                                     Inst,
+                                     MRI,
+                                     MRI.getRegClass(Reg),
+                                     Inst->getOperand(1));
+        MRI.replaceRegWith(Reg, Dst);
+      }
+      Inst->eraseFromParent();
+      continue;
+    }
+    case AMDGPU::S_AND_B64:
+      splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32);
+      Inst->eraseFromParent();
+      continue;
+
+    case AMDGPU::S_OR_B64:
+      splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32);
+      Inst->eraseFromParent();
+      continue;
+
+    case AMDGPU::S_XOR_B64:
+      splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32);
+      Inst->eraseFromParent();
+      continue;
+
+    case AMDGPU::S_NOT_B64:
+      splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
+      Inst->eraseFromParent();
+      continue;
+
+    case AMDGPU::S_BCNT1_I32_B64:
+      splitScalar64BitBCNT(Worklist, Inst);
+      Inst->eraseFromParent();
+      continue;
+
+    case AMDGPU::S_BFE_I64: {
+      splitScalar64BitBFE(Worklist, Inst);
+      Inst->eraseFromParent();
+      continue;
+    }
+
+    case AMDGPU::S_LSHL_B32:
+      if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+        NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
+        swapOperands(Inst);
+      }
+      break;
+    case AMDGPU::S_ASHR_I32:
+      if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+        NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
+        swapOperands(Inst);
+      }
+      break;
+    case AMDGPU::S_LSHR_B32:
+      if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+        NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
+        swapOperands(Inst);
+      }
+      break;
+    case AMDGPU::S_LSHL_B64:
+      if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+        NewOpcode = AMDGPU::V_LSHLREV_B64;
+        swapOperands(Inst);
+      }
+      break;
+    case AMDGPU::S_ASHR_I64:
+      if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+        NewOpcode = AMDGPU::V_ASHRREV_I64;
+        swapOperands(Inst);
+      }
+      break;
+    case AMDGPU::S_LSHR_B64:
+      if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+        NewOpcode = AMDGPU::V_LSHRREV_B64;
+        swapOperands(Inst);
+      }
+      break;
+
+    case AMDGPU::S_BFE_U64:
+    case AMDGPU::S_BFM_B64:
+      llvm_unreachable("Moving this op to VALU not implemented");
+    }
+
+    if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
+      // We cannot move this instruction to the VALU, so we should try to
+      // legalize its operands instead.
+      legalizeOperands(Inst);
+      continue;
+    }
+
+    // Use the new VALU Opcode.
+    const MCInstrDesc &NewDesc = get(NewOpcode);
+    Inst->setDesc(NewDesc);
+
+    // Remove any references to SCC. Vector instructions can't read from it, and
+    // We're just about to add the implicit use / defs of VCC, and we don't want
+    // both.
+    for (unsigned i = Inst->getNumOperands() - 1; i > 0; --i) {
+      MachineOperand &Op = Inst->getOperand(i);
+      if (Op.isReg() && Op.getReg() == AMDGPU::SCC)
+        Inst->RemoveOperand(i);
+    }
+
+    if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
+      // We are converting these to a BFE, so we need to add the missing
+      // operands for the size and offset.
+      unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
+      Inst->addOperand(MachineOperand::CreateImm(0));
+      Inst->addOperand(MachineOperand::CreateImm(Size));
+
+    } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
+      // The VALU version adds the second operand to the result, so insert an
+      // extra 0 operand.
+      Inst->addOperand(MachineOperand::CreateImm(0));
+    }
+
+    addDescImplicitUseDef(NewDesc, Inst);
+
+    if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
+      const MachineOperand &OffsetWidthOp = Inst->getOperand(2);
+      // If we need to move this to VGPRs, we need to unpack the second operand
+      // back into the 2 separate ones for bit offset and width.
+      assert(OffsetWidthOp.isImm() &&
+             "Scalar BFE is only implemented for constant width and offset");
+      uint32_t Imm = OffsetWidthOp.getImm();
+
+      uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
+      uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
+      Inst->RemoveOperand(2); // Remove old immediate.
+      Inst->addOperand(MachineOperand::CreateImm(Offset));
+      Inst->addOperand(MachineOperand::CreateImm(BitWidth));
+    }
+
+    // Update the destination register class.
+
+    const TargetRegisterClass *NewDstRC = getOpRegClass(*Inst, 0);
+
+    switch (Opcode) {
+      // For target instructions, getOpRegClass just returns the virtual
+      // register class associated with the operand, so we need to find an
+      // equivalent VGPR register class in order to move the instruction to the
+      // VALU.
+    case AMDGPU::COPY:
+    case AMDGPU::PHI:
+    case AMDGPU::REG_SEQUENCE:
+    case AMDGPU::INSERT_SUBREG:
+      if (RI.hasVGPRs(NewDstRC))
+        continue;
+      NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
+      if (!NewDstRC)
+        continue;
+      break;
+    default:
+      break;
+    }
+
+    unsigned DstReg = Inst->getOperand(0).getReg();
+    unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC);
+    MRI.replaceRegWith(DstReg, NewDstReg);
+
+    // Legalize the operands
+    legalizeOperands(Inst);
+
+    for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg),
+           E = MRI.use_end(); I != E; ++I) {
+      MachineInstr &UseMI = *I->getParent();
+      if (!canReadVGPR(UseMI, I.getOperandNo())) {
+        Worklist.push_back(&UseMI);
+      }
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Indirect addressing callbacks
+//===----------------------------------------------------------------------===//
+
+unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex,
+                                                 unsigned Channel) const {
+  assert(Channel == 0);
+  return RegIndex;
+}
+
+const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const {
+  return &AMDGPU::VGPR_32RegClass;
+}
+
+void SIInstrInfo::splitScalar64BitUnaryOp(
+  SmallVectorImpl<MachineInstr *> &Worklist,
+  MachineInstr *Inst,
+  unsigned Opcode) const {
+  MachineBasicBlock &MBB = *Inst->getParent();
+  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+
+  MachineOperand &Dest = Inst->getOperand(0);
+  MachineOperand &Src0 = Inst->getOperand(1);
+  DebugLoc DL = Inst->getDebugLoc();
+
+  MachineBasicBlock::iterator MII = Inst;
+
+  const MCInstrDesc &InstDesc = get(Opcode);
+  const TargetRegisterClass *Src0RC = Src0.isReg() ?
+    MRI.getRegClass(Src0.getReg()) :
+    &AMDGPU::SGPR_32RegClass;
+
+  const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
+
+  MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
+                                                       AMDGPU::sub0, Src0SubRC);
+
+  const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
+  const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0);
+
+  unsigned DestSub0 = MRI.createVirtualRegister(DestRC);
+  MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
+    .addOperand(SrcReg0Sub0);
+
+  MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
+                                                       AMDGPU::sub1, Src0SubRC);
+
+  unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC);
+  MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
+    .addOperand(SrcReg0Sub1);
+
+  unsigned FullDestReg = MRI.createVirtualRegister(DestRC);
+  BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
+    .addReg(DestSub0)
+    .addImm(AMDGPU::sub0)
+    .addReg(DestSub1)
+    .addImm(AMDGPU::sub1);
+
+  MRI.replaceRegWith(Dest.getReg(), FullDestReg);
+
+  // Try to legalize the operands in case we need to swap the order to keep it
+  // valid.
+  Worklist.push_back(LoHalf);
+  Worklist.push_back(HiHalf);
+}
+
+void SIInstrInfo::splitScalar64BitBinaryOp(
+  SmallVectorImpl<MachineInstr *> &Worklist,
+  MachineInstr *Inst,
+  unsigned Opcode) const {
+  MachineBasicBlock &MBB = *Inst->getParent();
+  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+
+  MachineOperand &Dest = Inst->getOperand(0);
+  MachineOperand &Src0 = Inst->getOperand(1);
+  MachineOperand &Src1 = Inst->getOperand(2);
+  DebugLoc DL = Inst->getDebugLoc();
+
+  MachineBasicBlock::iterator MII = Inst;
+
+  const MCInstrDesc &InstDesc = get(Opcode);
+  const TargetRegisterClass *Src0RC = Src0.isReg() ?
+    MRI.getRegClass(Src0.getReg()) :
+    &AMDGPU::SGPR_32RegClass;
+
+  const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
+  const TargetRegisterClass *Src1RC = Src1.isReg() ?
+    MRI.getRegClass(Src1.getReg()) :
+    &AMDGPU::SGPR_32RegClass;
+
+  const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0);
+
+  MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
+                                                       AMDGPU::sub0, Src0SubRC);
+  MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
+                                                       AMDGPU::sub0, Src1SubRC);
+
+  const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
+  const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0);
+
+  unsigned DestSub0 = MRI.createVirtualRegister(DestRC);
+  MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
+    .addOperand(SrcReg0Sub0)
+    .addOperand(SrcReg1Sub0);
+
+  MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
+                                                       AMDGPU::sub1, Src0SubRC);
+  MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
+                                                       AMDGPU::sub1, Src1SubRC);
+
+  unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC);
+  MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
+    .addOperand(SrcReg0Sub1)
+    .addOperand(SrcReg1Sub1);
+
+  unsigned FullDestReg = MRI.createVirtualRegister(DestRC);
+  BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
+    .addReg(DestSub0)
+    .addImm(AMDGPU::sub0)
+    .addReg(DestSub1)
+    .addImm(AMDGPU::sub1);
+
+  MRI.replaceRegWith(Dest.getReg(), FullDestReg);
+
+  // Try to legalize the operands in case we need to swap the order to keep it
+  // valid.
+  Worklist.push_back(LoHalf);
+  Worklist.push_back(HiHalf);
+}
+
+void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist,
+                                       MachineInstr *Inst) const {
+  MachineBasicBlock &MBB = *Inst->getParent();
+  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+
+  MachineBasicBlock::iterator MII = Inst;
+  DebugLoc DL = Inst->getDebugLoc();
+
+  MachineOperand &Dest = Inst->getOperand(0);
+  MachineOperand &Src = Inst->getOperand(1);
+
+  const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64);
+  const TargetRegisterClass *SrcRC = Src.isReg() ?
+    MRI.getRegClass(Src.getReg()) :
+    &AMDGPU::SGPR_32RegClass;
+
+  unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+  unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
+  const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0);
+
+  MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
+                                                      AMDGPU::sub0, SrcSubRC);
+  MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
+                                                      AMDGPU::sub1, SrcSubRC);
+
+  MachineInstr *First = BuildMI(MBB, MII, DL, InstDesc, MidReg)
+    .addOperand(SrcRegSub0)
+    .addImm(0);
+
+  MachineInstr *Second = BuildMI(MBB, MII, DL, InstDesc, ResultReg)
+    .addOperand(SrcRegSub1)
+    .addReg(MidReg);
+
+  MRI.replaceRegWith(Dest.getReg(), ResultReg);
+
+  Worklist.push_back(First);
+  Worklist.push_back(Second);
+}
+
+void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
+                                      MachineInstr *Inst) const {
+  MachineBasicBlock &MBB = *Inst->getParent();
+  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+  MachineBasicBlock::iterator MII = Inst;
+  DebugLoc DL = Inst->getDebugLoc();
+
+  MachineOperand &Dest = Inst->getOperand(0);
+  uint32_t Imm = Inst->getOperand(2).getImm();
+  uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
+  uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
+
+  (void) Offset;
+
+  // Only sext_inreg cases handled.
+  assert(Inst->getOpcode() == AMDGPU::S_BFE_I64 &&
+         BitWidth <= 32 &&
+         Offset == 0 &&
+         "Not implemented");
+
+  if (BitWidth < 32) {
+    unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+    unsigned MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+    unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
+
+    BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo)
+      .addReg(Inst->getOperand(1).getReg(), 0, AMDGPU::sub0)
+      .addImm(0)
+      .addImm(BitWidth);
+
+    BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi)
+      .addImm(31)
+      .addReg(MidRegLo);
+
+    BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
+      .addReg(MidRegLo)
+      .addImm(AMDGPU::sub0)
+      .addReg(MidRegHi)
+      .addImm(AMDGPU::sub1);
+
+    MRI.replaceRegWith(Dest.getReg(), ResultReg);
+    return;
+  }
+
+  MachineOperand &Src = Inst->getOperand(1);
+  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+  unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
+
+  BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg)
+    .addImm(31)
+    .addReg(Src.getReg(), 0, AMDGPU::sub0);
+
+  BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
+    .addReg(Src.getReg(), 0, AMDGPU::sub0)
+    .addImm(AMDGPU::sub0)
+    .addReg(TmpReg)
+    .addImm(AMDGPU::sub1);
+
+  MRI.replaceRegWith(Dest.getReg(), ResultReg);
+}
+
+void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc &NewDesc,
+                                        MachineInstr *Inst) const {
+  // Add the implict and explicit register definitions.
+  if (NewDesc.ImplicitUses) {
+    for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) {
+      unsigned Reg = NewDesc.ImplicitUses[i];
+      Inst->addOperand(MachineOperand::CreateReg(Reg, false, true));
+    }
+  }
+
+  if (NewDesc.ImplicitDefs) {
+    for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) {
+      unsigned Reg = NewDesc.ImplicitDefs[i];
+      Inst->addOperand(MachineOperand::CreateReg(Reg, true, true));
+    }
+  }
+}
+
+unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI,
+                                   int OpIndices[3]) const {
+  const MCInstrDesc &Desc = get(MI->getOpcode());
+
+  // Find the one SGPR operand we are allowed to use.
+  unsigned SGPRReg = AMDGPU::NoRegister;
+
+  // First we need to consider the instruction's operand requirements before
+  // legalizing. Some operands are required to be SGPRs, such as implicit uses
+  // of VCC, but we are still bound by the constant bus requirement to only use
+  // one.
+  //
+  // If the operand's class is an SGPR, we can never move it.
+
+  for (const MachineOperand &MO : MI->implicit_operands()) {
+    // We only care about reads.
+    if (MO.isDef())
+      continue;
+
+    if (MO.getReg() == AMDGPU::VCC)
+      return AMDGPU::VCC;
+
+    if (MO.getReg() == AMDGPU::FLAT_SCR)
+      return AMDGPU::FLAT_SCR;
+  }
+
+  unsigned UsedSGPRs[3] = { AMDGPU::NoRegister };
+  const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+
+  for (unsigned i = 0; i < 3; ++i) {
+    int Idx = OpIndices[i];
+    if (Idx == -1)
+      break;
+
+    const MachineOperand &MO = MI->getOperand(Idx);
+    if (RI.isSGPRClassID(Desc.OpInfo[Idx].RegClass))
+      SGPRReg = MO.getReg();
+
+    if (MO.isReg() && RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
+      UsedSGPRs[i] = MO.getReg();
+  }
+
+  if (SGPRReg != AMDGPU::NoRegister)
+    return SGPRReg;
+
+  // We don't have a required SGPR operand, so we have a bit more freedom in
+  // selecting operands to move.
+
+  // Try to select the most used SGPR. If an SGPR is equal to one of the
+  // others, we choose that.
+  //
+  // e.g.
+  // V_FMA_F32 v0, s0, s0, s0 -> No moves
+  // V_FMA_F32 v0, s0, s1, s0 -> Move s1
+
+  if (UsedSGPRs[0] != AMDGPU::NoRegister) {
+    if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
+      SGPRReg = UsedSGPRs[0];
+  }
+
+  if (SGPRReg == AMDGPU::NoRegister && UsedSGPRs[1] != AMDGPU::NoRegister) {
+    if (UsedSGPRs[1] == UsedSGPRs[2])
+      SGPRReg = UsedSGPRs[1];
+  }
+
+  return SGPRReg;
+}
+
+MachineInstrBuilder SIInstrInfo::buildIndirectWrite(
+                                   MachineBasicBlock *MBB,
+                                   MachineBasicBlock::iterator I,
+                                   unsigned ValueReg,
+                                   unsigned Address, unsigned OffsetReg) const {
+  const DebugLoc &DL = MBB->findDebugLoc(I);
+  unsigned IndirectBaseReg = AMDGPU::VGPR_32RegClass.getRegister(
+                                      getIndirectIndexBegin(*MBB->getParent()));
+
+  return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_DST_V1))
+          .addReg(IndirectBaseReg, RegState::Define)
+          .addOperand(I->getOperand(0))
+          .addReg(IndirectBaseReg)
+          .addReg(OffsetReg)
+          .addImm(0)
+          .addReg(ValueReg);
+}
+
+MachineInstrBuilder SIInstrInfo::buildIndirectRead(
+                                   MachineBasicBlock *MBB,
+                                   MachineBasicBlock::iterator I,
+                                   unsigned ValueReg,
+                                   unsigned Address, unsigned OffsetReg) const {
+  const DebugLoc &DL = MBB->findDebugLoc(I);
+  unsigned IndirectBaseReg = AMDGPU::VGPR_32RegClass.getRegister(
+                                      getIndirectIndexBegin(*MBB->getParent()));
+
+  return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC))
+          .addOperand(I->getOperand(0))
+          .addOperand(I->getOperand(1))
+          .addReg(IndirectBaseReg)
+          .addReg(OffsetReg)
+          .addImm(0);
+
+}
+
+void SIInstrInfo::reserveIndirectRegisters(BitVector &Reserved,
+                                            const MachineFunction &MF) const {
+  int End = getIndirectIndexEnd(MF);
+  int Begin = getIndirectIndexBegin(MF);
+
+  if (End == -1)
+    return;
+
+
+  for (int Index = Begin; Index <= End; ++Index)
+    Reserved.set(AMDGPU::VGPR_32RegClass.getRegister(Index));
+
+  for (int Index = std::max(0, Begin - 1); Index <= End; ++Index)
+    Reserved.set(AMDGPU::VReg_64RegClass.getRegister(Index));
+
+  for (int Index = std::max(0, Begin - 2); Index <= End; ++Index)
+    Reserved.set(AMDGPU::VReg_96RegClass.getRegister(Index));
+
+  for (int Index = std::max(0, Begin - 3); Index <= End; ++Index)
+    Reserved.set(AMDGPU::VReg_128RegClass.getRegister(Index));
+
+  for (int Index = std::max(0, Begin - 7); Index <= End; ++Index)
+    Reserved.set(AMDGPU::VReg_256RegClass.getRegister(Index));
+
+  for (int Index = std::max(0, Begin - 15); Index <= End; ++Index)
+    Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index));
+}
+
+MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI,
+                                             unsigned OperandName) const {
+  int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName);
+  if (Idx == -1)
+    return nullptr;
+
+  return &MI.getOperand(Idx);
+}
+
+uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
+  uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT;
+  if (ST.isAmdHsaOS())
+    RsrcDataFormat |= (1ULL << 56);
+
+  return RsrcDataFormat;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h
new file mode 100644
index 0000000..6fafb94
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -0,0 +1,391 @@
+//===-- SIInstrInfo.h - SI Instruction Info Interface -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface definition for SIInstrInfo.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_LIB_TARGET_R600_SIINSTRINFO_H
+#define LLVM_LIB_TARGET_R600_SIINSTRINFO_H
+
+#include "AMDGPUInstrInfo.h"
+#include "SIDefines.h"
+#include "SIRegisterInfo.h"
+
+namespace llvm {
+
+class SIInstrInfo : public AMDGPUInstrInfo {
+private:
+  const SIRegisterInfo RI;
+
+  unsigned buildExtractSubReg(MachineBasicBlock::iterator MI,
+                              MachineRegisterInfo &MRI,
+                              MachineOperand &SuperReg,
+                              const TargetRegisterClass *SuperRC,
+                              unsigned SubIdx,
+                              const TargetRegisterClass *SubRC) const;
+  MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI,
+                                         MachineRegisterInfo &MRI,
+                                         MachineOperand &SuperReg,
+                                         const TargetRegisterClass *SuperRC,
+                                         unsigned SubIdx,
+                                         const TargetRegisterClass *SubRC) const;
+
+  unsigned split64BitImm(SmallVectorImpl<MachineInstr *> &Worklist,
+                         MachineBasicBlock::iterator MI,
+                         MachineRegisterInfo &MRI,
+                         const TargetRegisterClass *RC,
+                         const MachineOperand &Op) const;
+
+  void swapOperands(MachineBasicBlock::iterator Inst) const;
+
+  void splitScalar64BitUnaryOp(SmallVectorImpl<MachineInstr *> &Worklist,
+                               MachineInstr *Inst, unsigned Opcode) const;
+
+  void splitScalar64BitBinaryOp(SmallVectorImpl<MachineInstr *> &Worklist,
+                                MachineInstr *Inst, unsigned Opcode) const;
+
+  void splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist,
+                            MachineInstr *Inst) const;
+  void splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
+                           MachineInstr *Inst) const;
+
+  void addDescImplicitUseDef(const MCInstrDesc &Desc, MachineInstr *MI) const;
+
+  bool checkInstOffsetsDoNotOverlap(MachineInstr *MIa,
+                                    MachineInstr *MIb) const;
+
+  unsigned findUsedSGPR(const MachineInstr *MI, int OpIndices[3]) const;
+
+public:
+  explicit SIInstrInfo(const AMDGPUSubtarget &st);
+
+  const SIRegisterInfo &getRegisterInfo() const override {
+    return RI;
+  }
+
+  bool isReallyTriviallyReMaterializable(const MachineInstr *MI,
+                                         AliasAnalysis *AA) const override;
+
+  bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+                               int64_t &Offset1,
+                               int64_t &Offset2) const override;
+
+  bool getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
+                             unsigned &Offset,
+                             const TargetRegisterInfo *TRI) const final;
+
+  bool shouldClusterLoads(MachineInstr *FirstLdSt,
+                          MachineInstr *SecondLdSt,
+                          unsigned NumLoads) const final;
+
+  void copyPhysReg(MachineBasicBlock &MBB,
+                   MachineBasicBlock::iterator MI, DebugLoc DL,
+                   unsigned DestReg, unsigned SrcReg,
+                   bool KillSrc) const override;
+
+  unsigned calculateLDSSpillAddress(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MI,
+                                    RegScavenger *RS,
+                                    unsigned TmpReg,
+                                    unsigned Offset,
+                                    unsigned Size) const;
+
+  void storeRegToStackSlot(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MI,
+                           unsigned SrcReg, bool isKill, int FrameIndex,
+                           const TargetRegisterClass *RC,
+                           const TargetRegisterInfo *TRI) const override;
+
+  void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MI,
+                            unsigned DestReg, int FrameIndex,
+                            const TargetRegisterClass *RC,
+                            const TargetRegisterInfo *TRI) const override;
+
+  bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override;
+
+  // \brief Returns an opcode that can be used to move a value to a \p DstRC
+  // register.  If there is no hardware instruction that can store to \p
+  // DstRC, then AMDGPU::COPY is returned.
+  unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
+  unsigned commuteOpcode(const MachineInstr &MI) const;
+
+  MachineInstr *commuteInstruction(MachineInstr *MI,
+                                   bool NewMI = false) const override;
+  bool findCommutedOpIndices(MachineInstr *MI,
+                             unsigned &SrcOpIdx1,
+                             unsigned &SrcOpIdx2) const override;
+
+  bool isTriviallyReMaterializable(const MachineInstr *MI,
+                                   AliasAnalysis *AA = nullptr) const;
+
+  bool areMemAccessesTriviallyDisjoint(
+    MachineInstr *MIa, MachineInstr *MIb,
+    AliasAnalysis *AA = nullptr) const override;
+
+  MachineInstr *buildMovInstr(MachineBasicBlock *MBB,
+                              MachineBasicBlock::iterator I,
+                              unsigned DstReg, unsigned SrcReg) const override;
+  bool isMov(unsigned Opcode) const override;
+
+  bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const override;
+
+  bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
+                     unsigned Reg, MachineRegisterInfo *MRI) const final;
+
+  unsigned getMachineCSELookAheadLimit() const override { return 500; }
+
+  bool isSALU(uint16_t Opcode) const {
+    return get(Opcode).TSFlags & SIInstrFlags::SALU;
+  }
+
+  bool isVALU(uint16_t Opcode) const {
+    return get(Opcode).TSFlags & SIInstrFlags::VALU;
+  }
+
+  bool isSOP1(uint16_t Opcode) const {
+    return get(Opcode).TSFlags & SIInstrFlags::SOP1;
+  }
+
+  bool isSOP2(uint16_t Opcode) const {
+    return get(Opcode).TSFlags & SIInstrFlags::SOP2;
+  }
+
+  bool isSOPC(uint16_t Opcode) const {
+    return get(Opcode).TSFlags & SIInstrFlags::SOPC;
+  }
+
+  bool isSOPK(uint16_t Opcode) const {
+    return get(Opcode).TSFlags & SIInstrFlags::SOPK;
+  }
+
+  bool isSOPP(uint16_t Opcode) const {
+    return get(Opcode).TSFlags & SIInstrFlags::SOPP;
+  }
+
+  bool isVOP1(uint16_t Opcode) const {
+    return get(Opcode).TSFlags & SIInstrFlags::VOP1;
+  }
+
+  bool isVOP2(uint16_t Opcode) const {
+    return get(Opcode).TSFlags & SIInstrFlags::VOP2;
+  }
+
+  bool isVOP3(uint16_t Opcode) const {
+    return get(Opcode).TSFlags & SIInstrFlags::VOP3;
+  }
+
+  bool isVOPC(uint16_t Opcode) const {
+    return get(Opcode).TSFlags & SIInstrFlags::VOPC;
+  }
+
+  bool isMUBUF(uint16_t Opcode) const {
+    return get(Opcode).TSFlags & SIInstrFlags::MUBUF;
+  }
+
+  bool isMTBUF(uint16_t Opcode) const {
+    return get(Opcode).TSFlags & SIInstrFlags::MTBUF;
+  }
+
+  bool isSMRD(uint16_t Opcode) const {
+    return get(Opcode).TSFlags & SIInstrFlags::SMRD;
+  }
+
+  bool isDS(uint16_t Opcode) const {
+    return get(Opcode).TSFlags & SIInstrFlags::DS;
+  }
+
+  bool isMIMG(uint16_t Opcode) const {
+    return get(Opcode).TSFlags & SIInstrFlags::MIMG;
+  }
+
+  bool isFLAT(uint16_t Opcode) const {
+    return get(Opcode).TSFlags & SIInstrFlags::FLAT;
+  }
+
+  bool isWQM(uint16_t Opcode) const {
+    return get(Opcode).TSFlags & SIInstrFlags::WQM;
+  }
+
+  bool isVGPRSpill(uint16_t Opcode) const {
+    return get(Opcode).TSFlags & SIInstrFlags::VGPRSpill;
+  }
+
+  bool isInlineConstant(const APInt &Imm) const;
+  bool isInlineConstant(const MachineOperand &MO, unsigned OpSize) const;
+  bool isLiteralConstant(const MachineOperand &MO, unsigned OpSize) const;
+
+  bool isImmOperandLegal(const MachineInstr *MI, unsigned OpNo,
+                         const MachineOperand &MO) const;
+
+  /// \brief Return true if this 64-bit VALU instruction has a 32-bit encoding.
+  /// This function will return false if you pass it a 32-bit instruction.
+  bool hasVALU32BitEncoding(unsigned Opcode) const;
+
+  /// \brief Returns true if this operand uses the constant bus.
+  bool usesConstantBus(const MachineRegisterInfo &MRI,
+                       const MachineOperand &MO,
+                       unsigned OpSize) const;
+
+  /// \brief Return true if this instruction has any modifiers.
+  ///  e.g. src[012]_mod, omod, clamp.
+  bool hasModifiers(unsigned Opcode) const;
+
+  bool hasModifiersSet(const MachineInstr &MI,
+                       unsigned OpName) const;
+
+  bool verifyInstruction(const MachineInstr *MI,
+                         StringRef &ErrInfo) const override;
+
+  static unsigned getVALUOp(const MachineInstr &MI);
+
+  bool isSALUOpSupportedOnVALU(const MachineInstr &MI) const;
+
+  /// \brief Return the correct register class for \p OpNo.  For target-specific
+  /// instructions, this will return the register class that has been defined
+  /// in tablegen.  For generic instructions, like REG_SEQUENCE it will return
+  /// the register class of its machine operand.
+  /// to infer the correct register class base on the other operands.
+  const TargetRegisterClass *getOpRegClass(const MachineInstr &MI,
+                                           unsigned OpNo) const;
+
+  /// \brief Return the size in bytes of the operand OpNo on the given
+  // instruction opcode.
+  unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const {
+    const MCOperandInfo &OpInfo = get(Opcode).OpInfo[OpNo];
+
+    if (OpInfo.RegClass == -1) {
+      // If this is an immediate operand, this must be a 32-bit literal.
+      assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE);
+      return 4;
+    }
+
+    return RI.getRegClass(OpInfo.RegClass)->getSize();
+  }
+
+  /// \brief This form should usually be preferred since it handles operands
+  /// with unknown register classes.
+  unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const {
+    return getOpRegClass(MI, OpNo)->getSize();
+  }
+
+  /// \returns true if it is legal for the operand at index \p OpNo
+  /// to read a VGPR.
+  bool canReadVGPR(const MachineInstr &MI, unsigned OpNo) const;
+
+  /// \brief Legalize the \p OpIndex operand of this instruction by inserting
+  /// a MOV.  For example:
+  /// ADD_I32_e32 VGPR0, 15
+  /// to
+  /// MOV VGPR1, 15
+  /// ADD_I32_e32 VGPR0, VGPR1
+  ///
+  /// If the operand being legalized is a register, then a COPY will be used
+  /// instead of MOV.
+  void legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const;
+
+  /// \brief Check if \p MO is a legal operand if it was the \p OpIdx Operand
+  /// for \p MI.
+  bool isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
+                      const MachineOperand *MO = nullptr) const;
+
+  /// \brief Legalize all operands in this instruction.  This function may
+  /// create new instruction and insert them before \p MI.
+  void legalizeOperands(MachineInstr *MI) const;
+
+  /// \brief Split an SMRD instruction into two smaller loads of half the
+  //  size storing the results in \p Lo and \p Hi.
+  void splitSMRD(MachineInstr *MI, const TargetRegisterClass *HalfRC,
+                 unsigned HalfImmOp, unsigned HalfSGPROp,
+                 MachineInstr *&Lo, MachineInstr *&Hi) const;
+
+  void moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const;
+
+  /// \brief Replace this instruction's opcode with the equivalent VALU
+  /// opcode.  This function will also move the users of \p MI to the
+  /// VALU if necessary.
+  void moveToVALU(MachineInstr &MI) const;
+
+  unsigned calculateIndirectAddress(unsigned RegIndex,
+                                    unsigned Channel) const override;
+
+  const TargetRegisterClass *getIndirectAddrRegClass() const override;
+
+  MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
+                                         MachineBasicBlock::iterator I,
+                                         unsigned ValueReg,
+                                         unsigned Address,
+                                         unsigned OffsetReg) const override;
+
+  MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
+                                        MachineBasicBlock::iterator I,
+                                        unsigned ValueReg,
+                                        unsigned Address,
+                                        unsigned OffsetReg) const override;
+  void reserveIndirectRegisters(BitVector &Reserved,
+                                const MachineFunction &MF) const;
+
+  void LoadM0(MachineInstr *MoveRel, MachineBasicBlock::iterator I,
+              unsigned SavReg, unsigned IndexReg) const;
+
+  void insertNOPs(MachineBasicBlock::iterator MI, int Count) const;
+
+  /// \brief Returns the operand named \p Op.  If \p MI does not have an
+  /// operand named \c Op, this function returns nullptr.
+  MachineOperand *getNamedOperand(MachineInstr &MI, unsigned OperandName) const;
+
+  const MachineOperand *getNamedOperand(const MachineInstr &MI,
+                                        unsigned OpName) const {
+    return getNamedOperand(const_cast<MachineInstr &>(MI), OpName);
+  }
+
+  uint64_t getDefaultRsrcDataFormat() const;
+
+};
+
+namespace AMDGPU {
+
+  int getVOPe64(uint16_t Opcode);
+  int getVOPe32(uint16_t Opcode);
+  int getCommuteRev(uint16_t Opcode);
+  int getCommuteOrig(uint16_t Opcode);
+  int getAddr64Inst(uint16_t Opcode);
+  int getAtomicRetOp(uint16_t Opcode);
+  int getAtomicNoRetOp(uint16_t Opcode);
+
+  const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
+  const uint64_t RSRC_TID_ENABLE = 1LL << 55;
+
+} // End namespace AMDGPU
+
+namespace SI {
+namespace KernelInputOffsets {
+
+/// Offsets in bytes from the start of the input buffer
+enum Offsets {
+  NGROUPS_X = 0,
+  NGROUPS_Y = 4,
+  NGROUPS_Z = 8,
+  GLOBAL_SIZE_X = 12,
+  GLOBAL_SIZE_Y = 16,
+  GLOBAL_SIZE_Z = 20,
+  LOCAL_SIZE_X = 24,
+  LOCAL_SIZE_Y = 28,
+  LOCAL_SIZE_Z = 32
+};
+
+} // End namespace KernelInputOffsets
+} // End namespace SI
+
+} // End namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td
new file mode 100644
index 0000000..93e4ca7
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -0,0 +1,2647 @@
+//===-- SIInstrInfo.td - SI Instruction Infos -------------*- tablegen -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+def isCI : Predicate<"Subtarget->getGeneration() "
+                      ">= AMDGPUSubtarget::SEA_ISLANDS">;
+def isVI : Predicate <
+  "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
+  AssemblerPredicate<"FeatureGCN3Encoding">;
+
+def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">;
+
+class vop {
+  field bits<9> SI3;
+  field bits<10> VI3;
+}
+
+class vopc <bits<8> si, bits<8> vi = !add(0x40, si)> : vop {
+  field bits<8> SI = si;
+  field bits<8> VI = vi;
+
+  field bits<9>  SI3 = {0, si{7-0}};
+  field bits<10> VI3 = {0, 0, vi{7-0}};
+}
+
+class vop1 <bits<8> si, bits<8> vi = si> : vop {
+  field bits<8> SI = si;
+  field bits<8> VI = vi;
+
+  field bits<9>  SI3 = {1, 1, si{6-0}};
+  field bits<10> VI3 = !add(0x140, vi);
+}
+
+class vop2 <bits<6> si, bits<6> vi = si> : vop {
+  field bits<6> SI = si;
+  field bits<6> VI = vi;
+
+  field bits<9>  SI3 = {1, 0, 0, si{5-0}};
+  field bits<10> VI3 = {0, 1, 0, 0, vi{5-0}};
+}
+
+// Specify a VOP2 opcode for SI and VOP3 opcode for VI
+// that doesn't have VOP2 encoding on VI
+class vop23 <bits<6> si, bits<10> vi> : vop2 <si> {
+  let VI3 = vi;
+}
+
+class vop3 <bits<9> si, bits<10> vi = {0, si}> : vop {
+  let SI3 = si;
+  let VI3 = vi;
+}
+
+class sop1 <bits<8> si, bits<8> vi = si> {
+  field bits<8> SI = si;
+  field bits<8> VI = vi;
+}
+
+class sop2 <bits<7> si, bits<7> vi = si> {
+  field bits<7> SI = si;
+  field bits<7> VI = vi;
+}
+
+class sopk <bits<5> si, bits<5> vi = si> {
+  field bits<5> SI = si;
+  field bits<5> VI = vi;
+}
+
+// Execpt for the NONE field, this must be kept in sync with the SISubtarget enum
+// in AMDGPUInstrInfo.cpp
+def SISubtarget {
+  int NONE = -1;
+  int SI = 0;
+  int VI = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// SI DAG Nodes
+//===----------------------------------------------------------------------===//
+
+def SIload_constant : SDNode<"AMDGPUISD::LOAD_CONSTANT",
+  SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisVT<1, v4i32>, SDTCisVT<2, i32>]>,
+                      [SDNPMayLoad, SDNPMemOperand]
+>;
+
+def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT",
+  SDTypeProfile<0, 13,
+    [SDTCisVT<0, v4i32>,   // rsrc(SGPR)
+     SDTCisVT<1, iAny>,   // vdata(VGPR)
+     SDTCisVT<2, i32>,    // num_channels(imm)
+     SDTCisVT<3, i32>,    // vaddr(VGPR)
+     SDTCisVT<4, i32>,    // soffset(SGPR)
+     SDTCisVT<5, i32>,    // inst_offset(imm)
+     SDTCisVT<6, i32>,    // dfmt(imm)
+     SDTCisVT<7, i32>,    // nfmt(imm)
+     SDTCisVT<8, i32>,    // offen(imm)
+     SDTCisVT<9, i32>,    // idxen(imm)
+     SDTCisVT<10, i32>,   // glc(imm)
+     SDTCisVT<11, i32>,   // slc(imm)
+     SDTCisVT<12, i32>    // tfe(imm)
+    ]>,
+  [SDNPMayStore, SDNPMemOperand, SDNPHasChain]
+>;
+
+def SIload_input : SDNode<"AMDGPUISD::LOAD_INPUT",
+  SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisVT<1, v4i32>, SDTCisVT<2, i16>,
+                       SDTCisVT<3, i32>]>
+>;
+
+class SDSample<string opcode> : SDNode <opcode,
+  SDTypeProfile<1, 4, [SDTCisVT<0, v4f32>, SDTCisVT<2, v32i8>,
+                       SDTCisVT<3, v4i32>, SDTCisVT<4, i32>]>
+>;
+
+def SIsample : SDSample<"AMDGPUISD::SAMPLE">;
+def SIsampleb : SDSample<"AMDGPUISD::SAMPLEB">;
+def SIsampled : SDSample<"AMDGPUISD::SAMPLED">;
+def SIsamplel : SDSample<"AMDGPUISD::SAMPLEL">;
+
+def SIconstdata_ptr : SDNode<
+  "AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 0, [SDTCisVT<0, i64>]>
+>;
+
+//===----------------------------------------------------------------------===//
+// SDNodes and PatFrag for local loads and stores to enable s_mov_b32 m0, -1
+// to be glued to the memory instructions.
+//===----------------------------------------------------------------------===//
+
+def SIld_local : SDNode <"ISD::LOAD", SDTLoad,
+  [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
+>;
+
+def si_ld_local : PatFrag <(ops node:$ptr), (SIld_local node:$ptr), [{
+  return isLocalLoad(cast<LoadSDNode>(N));
+}]>;
+
+def si_load_local : PatFrag <(ops node:$ptr), (si_ld_local node:$ptr), [{
+  return cast<LoadSDNode>(N)->getAddressingMode() == ISD::UNINDEXED &&
+         cast<LoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
+}]>;
+
+def si_load_local_align8 : Aligned8Bytes <
+  (ops node:$ptr), (si_load_local node:$ptr)
+>;
+
+def si_sextload_local : PatFrag <(ops node:$ptr), (si_ld_local node:$ptr), [{
+  return cast<LoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
+}]>;
+def si_az_extload_local : AZExtLoadBase <si_ld_local>;
+
+multiclass SIExtLoadLocal <PatFrag ld_node> {
+
+  def _i8 : PatFrag <(ops node:$ptr), (ld_node node:$ptr),
+                     [{return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;}]
+  >;
+
+  def _i16 : PatFrag <(ops node:$ptr), (ld_node node:$ptr),
+                     [{return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;}]
+  >;
+}
+
+defm si_sextload_local : SIExtLoadLocal <si_sextload_local>;
+defm si_az_extload_local : SIExtLoadLocal <si_az_extload_local>;
+
+def SIst_local : SDNode <"ISD::STORE", SDTStore,
+  [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue]
+>;
+
+def si_st_local : PatFrag <
+  (ops node:$val, node:$ptr), (SIst_local node:$val, node:$ptr), [{
+  return isLocalStore(cast<StoreSDNode>(N));
+}]>;
+
+def si_store_local : PatFrag <
+  (ops node:$val, node:$ptr), (si_st_local node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED &&
+         !cast<StoreSDNode>(N)->isTruncatingStore();
+}]>;
+
+def si_store_local_align8 : Aligned8Bytes <
+  (ops node:$val, node:$ptr), (si_store_local node:$val, node:$ptr)
+>;
+
+def si_truncstore_local : PatFrag <
+  (ops node:$val, node:$ptr), (si_st_local node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->isTruncatingStore();
+}]>;
+
+def si_truncstore_local_i8 : PatFrag <
+  (ops node:$val, node:$ptr), (si_truncstore_local node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+
+def si_truncstore_local_i16 : PatFrag <
+  (ops node:$val, node:$ptr), (si_truncstore_local node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+
+multiclass SIAtomicM0Glue2 <string op_name> {
+
+  def _glue : SDNode <"ISD::ATOMIC_"#op_name, SDTAtomic2,
+    [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
+  >;
+
+  def _local : local_binary_atomic_op <!cast<SDNode>(NAME#"_glue")>;
+}
+
+defm si_atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">;
+defm si_atomic_load_and : SIAtomicM0Glue2 <"LOAD_AND">;
+defm si_atomic_load_min : SIAtomicM0Glue2 <"LOAD_MIN">;
+defm si_atomic_load_max : SIAtomicM0Glue2 <"LOAD_MAX">;
+defm si_atomic_load_or : SIAtomicM0Glue2 <"LOAD_OR">;
+defm si_atomic_load_sub : SIAtomicM0Glue2 <"LOAD_SUB">;
+defm si_atomic_load_xor : SIAtomicM0Glue2 <"LOAD_XOR">;
+defm si_atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">;
+defm si_atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">;
+defm si_atomic_swap : SIAtomicM0Glue2 <"SWAP">;
+
+def si_atomic_cmp_swap_glue : SDNode <"ISD::ATOMIC_CMP_SWAP", SDTAtomic3,
+  [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
+>;
+
+defm si_atomic_cmp_swap : AtomicCmpSwapLocal <si_atomic_cmp_swap_glue>;
+
+// Transformation function, extract the lower 32bit of a 64bit immediate
+def LO32 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getZExtValue() & 0xffffffff, SDLoc(N),
+                                   MVT::i32);
+}]>;
+
+def LO32f : SDNodeXForm<fpimm, [{
+  APInt V = N->getValueAPF().bitcastToAPInt().trunc(32);
+  return CurDAG->getTargetConstantFP(APFloat(APFloat::IEEEsingle, V), MVT::f32);
+}]>;
+
+// Transformation function, extract the upper 32bit of a 64bit immediate
+def HI32 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getZExtValue() >> 32, SDLoc(N), MVT::i32);
+}]>;
+
+def HI32f : SDNodeXForm<fpimm, [{
+  APInt V = N->getValueAPF().bitcastToAPInt().lshr(32).trunc(32);
+  return CurDAG->getTargetConstantFP(APFloat(APFloat::IEEEsingle, V), SDLoc(N),
+                                     MVT::f32);
+}]>;
+
+def IMM8bitDWORD : PatLeaf <(imm),
+  [{return (N->getZExtValue() & ~0x3FC) == 0;}]
+>;
+
+def as_dword_i32imm : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getZExtValue() >> 2, SDLoc(N), MVT::i32);
+}]>;
+
+def as_i1imm : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1);
+}]>;
+
+def as_i8imm : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i8);
+}]>;
+
+def as_i16imm : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
+}]>;
+
+def as_i32imm: SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
+}]>;
+
+def as_i64imm: SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64);
+}]>;
+
+// Copied from the AArch64 backend:
+def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
+return CurDAG->getTargetConstant(
+  N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
+}]>;
+
+// Copied from the AArch64 backend:
+def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{
+return CurDAG->getTargetConstant(
+  N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
+}]>;
+
+def IMM8bit : PatLeaf <(imm),
+  [{return isUInt<8>(N->getZExtValue());}]
+>;
+
+def IMM12bit : PatLeaf <(imm),
+  [{return isUInt<12>(N->getZExtValue());}]
+>;
+
+def IMM16bit : PatLeaf <(imm),
+  [{return isUInt<16>(N->getZExtValue());}]
+>;
+
+def IMM20bit : PatLeaf <(imm),
+  [{return isUInt<20>(N->getZExtValue());}]
+>;
+
+def IMM32bit : PatLeaf <(imm),
+  [{return isUInt<32>(N->getZExtValue());}]
+>;
+
+def mubuf_vaddr_offset : PatFrag<
+  (ops node:$ptr, node:$offset, node:$imm_offset),
+  (add (add node:$ptr, node:$offset), node:$imm_offset)
+>;
+
+class InlineImm <ValueType vt> : PatLeaf <(vt imm), [{
+  return isInlineImmediate(N);
+}]>;
+
+class InlineFPImm <ValueType vt> : PatLeaf <(vt fpimm), [{
+  return isInlineImmediate(N);
+}]>;
+
+class SGPRImm <dag frag> : PatLeaf<frag, [{
+  if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+    return false;
+  }
+  const SIRegisterInfo *SIRI =
+      static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
+  for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
+                                                U != E; ++U) {
+    if (SIRI->isSGPRClass(getOperandRegClass(*U, U.getOperandNo()))) {
+      return true;
+    }
+  }
+  return false;
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Custom Operands
+//===----------------------------------------------------------------------===//
+
+def FRAMEri32 : Operand<iPTR> {
+  let MIOperandInfo = (ops i32:$ptr, i32imm:$index);
+}
+
+def SoppBrTarget : AsmOperandClass {
+  let Name = "SoppBrTarget";
+  let ParserMethod = "parseSOppBrTarget";
+}
+
+def sopp_brtarget : Operand<OtherVT> {
+  let EncoderMethod = "getSOPPBrEncoding";
+  let OperandType = "OPERAND_PCREL";
+  let ParserMatchClass = SoppBrTarget;
+}
+
+include "SIInstrFormats.td"
+include "VIInstrFormats.td"
+
+def MubufOffsetMatchClass : AsmOperandClass {
+  let Name = "MubufOffset";
+  let ParserMethod = "parseMubufOptionalOps";
+  let RenderMethod = "addImmOperands";
+}
+
+class DSOffsetBaseMatchClass <string parser> : AsmOperandClass {
+  let Name = "DSOffset"#parser;
+  let ParserMethod = parser;
+  let RenderMethod = "addImmOperands";
+  let PredicateMethod = "isDSOffset";
+}
+
+def DSOffsetMatchClass : DSOffsetBaseMatchClass <"parseDSOptionalOps">;
+def DSOffsetGDSMatchClass : DSOffsetBaseMatchClass <"parseDSOffsetOptional">;
+
+def DSOffset01MatchClass : AsmOperandClass {
+  let Name = "DSOffset1";
+  let ParserMethod = "parseDSOff01OptionalOps";
+  let RenderMethod = "addImmOperands";
+  let PredicateMethod = "isDSOffset01";
+}
+
+class GDSBaseMatchClass <string parser> : AsmOperandClass {
+  let Name = "GDS"#parser;
+  let PredicateMethod = "isImm";
+  let ParserMethod = parser;
+  let RenderMethod = "addImmOperands";
+}
+
+def GDSMatchClass : GDSBaseMatchClass <"parseDSOptionalOps">;
+def GDS01MatchClass : GDSBaseMatchClass <"parseDSOff01OptionalOps">;
+
+class GLCBaseMatchClass <string parser> : AsmOperandClass {
+  let Name = "GLC"#parser;
+  let PredicateMethod = "isImm";
+  let ParserMethod = parser; 
+  let RenderMethod = "addImmOperands";
+}
+
+def GLCMubufMatchClass : GLCBaseMatchClass <"parseMubufOptionalOps">;
+def GLCFlatMatchClass : GLCBaseMatchClass <"parseFlatOptionalOps">;
+
+class SLCBaseMatchClass <string parser> : AsmOperandClass {
+  let Name = "SLC"#parser;
+  let PredicateMethod = "isImm";
+  let ParserMethod = parser;
+  let RenderMethod = "addImmOperands";
+}
+
+def SLCMubufMatchClass : SLCBaseMatchClass <"parseMubufOptionalOps">;
+def SLCFlatMatchClass : SLCBaseMatchClass <"parseFlatOptionalOps">;
+def SLCFlatAtomicMatchClass : SLCBaseMatchClass <"parseFlatAtomicOptionalOps">;
+
+class TFEBaseMatchClass <string parser> : AsmOperandClass {
+  let Name = "TFE"#parser;
+  let PredicateMethod = "isImm";
+  let ParserMethod = parser;
+  let RenderMethod = "addImmOperands";
+}
+
+def TFEMubufMatchClass : TFEBaseMatchClass <"parseMubufOptionalOps">;
+def TFEFlatMatchClass : TFEBaseMatchClass <"parseFlatOptionalOps">;
+def TFEFlatAtomicMatchClass : TFEBaseMatchClass <"parseFlatAtomicOptionalOps">;
+
+def OModMatchClass : AsmOperandClass {
+  let Name = "OMod";
+  let PredicateMethod = "isImm";
+  let ParserMethod = "parseVOP3OptionalOps";
+  let RenderMethod = "addImmOperands";
+}
+
+def ClampMatchClass : AsmOperandClass {
+  let Name = "Clamp";
+  let PredicateMethod = "isImm";
+  let ParserMethod = "parseVOP3OptionalOps";
+  let RenderMethod = "addImmOperands";
+}
+
+let OperandType = "OPERAND_IMMEDIATE" in {
+
+def offen : Operand<i1> {
+  let PrintMethod = "printOffen";
+}
+def idxen : Operand<i1> {
+  let PrintMethod = "printIdxen";
+}
+def addr64 : Operand<i1> {
+  let PrintMethod = "printAddr64";
+}
+def mbuf_offset : Operand<i16> {
+  let PrintMethod = "printMBUFOffset";
+  let ParserMatchClass = MubufOffsetMatchClass;
+}
+class ds_offset_base <AsmOperandClass mc> : Operand<i16> {
+  let PrintMethod = "printDSOffset";
+  let ParserMatchClass = mc;
+}
+def ds_offset : ds_offset_base <DSOffsetMatchClass>;
+def ds_offset_gds : ds_offset_base <DSOffsetGDSMatchClass>;
+
+def ds_offset0 : Operand<i8> {
+  let PrintMethod = "printDSOffset0";
+  let ParserMatchClass = DSOffset01MatchClass;
+}
+def ds_offset1 : Operand<i8> {
+  let PrintMethod = "printDSOffset1";
+  let ParserMatchClass = DSOffset01MatchClass;
+}
+class gds_base <AsmOperandClass mc> : Operand <i1> {
+  let PrintMethod = "printGDS";
+  let ParserMatchClass = mc;
+}
+def gds : gds_base <GDSMatchClass>;
+
+def gds01 : gds_base <GDS01MatchClass>;
+
+class glc_base <AsmOperandClass mc> : Operand <i1> {
+  let PrintMethod = "printGLC";
+  let ParserMatchClass = mc;
+}
+
+def glc : glc_base <GLCMubufMatchClass>;
+def glc_flat : glc_base <GLCFlatMatchClass>;
+
+class slc_base <AsmOperandClass mc> : Operand <i1> {
+  let PrintMethod = "printSLC";
+  let ParserMatchClass = mc;
+}
+
+def slc : slc_base <SLCMubufMatchClass>;
+def slc_flat : slc_base <SLCFlatMatchClass>;
+def slc_flat_atomic : slc_base <SLCFlatAtomicMatchClass>;
+
+class tfe_base <AsmOperandClass mc> : Operand <i1> {
+  let PrintMethod = "printTFE";
+  let ParserMatchClass = mc;
+}
+
+def tfe : tfe_base <TFEMubufMatchClass>;
+def tfe_flat : tfe_base <TFEFlatMatchClass>;
+def tfe_flat_atomic : tfe_base <TFEFlatAtomicMatchClass>;
+
+def omod : Operand <i32> {
+  let PrintMethod = "printOModSI";
+  let ParserMatchClass = OModMatchClass;
+}
+
+def ClampMod : Operand <i1> {
+  let PrintMethod = "printClampSI";
+  let ParserMatchClass = ClampMatchClass;
+}
+
+} // End OperandType = "OPERAND_IMMEDIATE"
+
+def VOPDstS64 : VOPDstOperand <SReg_64>;
+
+//===----------------------------------------------------------------------===//
+// Complex patterns
+//===----------------------------------------------------------------------===//
+
+def DS1Addr1Offset : ComplexPattern<i32, 2, "SelectDS1Addr1Offset">;
+def DS64Bit4ByteAligned : ComplexPattern<i32, 3, "SelectDS64Bit4ByteAligned">;
+
+def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">;
+def MUBUFAddr64 : ComplexPattern<i64, 7, "SelectMUBUFAddr64">;
+def MUBUFAddr64Atomic : ComplexPattern<i64, 5, "SelectMUBUFAddr64">;
+def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">;
+def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
+def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
+
+def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;
+def VOP3Mods0Clamp : ComplexPattern<untyped, 3, "SelectVOP3Mods0Clamp">;
+def VOP3Mods0Clamp0OMod : ComplexPattern<untyped, 4, "SelectVOP3Mods0Clamp0OMod">;
+def VOP3Mods  : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
+
+//===----------------------------------------------------------------------===//
+// SI assembler operands
+//===----------------------------------------------------------------------===//
+
+def SIOperand {
+  int ZERO = 0x80;
+  int VCC = 0x6A;
+  int FLAT_SCR = 0x68;
+}
+
+def SRCMODS {
+  int NONE = 0;
+  int NEG = 1;
+}
+
+def DSTCLAMP {
+  int NONE = 0;
+}
+
+def DSTOMOD {
+  int NONE = 0;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// SI Instruction multiclass helpers.
+//
+// Instructions with _32 take 32-bit operands.
+// Instructions with _64 take 64-bit operands.
+//
+// VOP_* instructions can use either a 32-bit or 64-bit encoding.  The 32-bit
+// encoding is the standard encoding, but instruction that make use of
+// any of the instruction modifiers must use the 64-bit encoding.
+//
+// Instructions with _e32 use the 32-bit encoding.
+// Instructions with _e64 use the 64-bit encoding.
+//
+//===----------------------------------------------------------------------===//
+
+class SIMCInstr <string pseudo, int subtarget> {
+  string PseudoInstr = pseudo;
+  int Subtarget = subtarget;
+}
+
+//===----------------------------------------------------------------------===//
+// EXP classes
+//===----------------------------------------------------------------------===//
+
+class EXPCommon : InstSI<
+  (outs),
+  (ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm,
+       VGPR_32:$src0, VGPR_32:$src1, VGPR_32:$src2, VGPR_32:$src3),
+  "exp $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3",
+  [] > {
+
+  let EXP_CNT = 1;
+  let Uses = [EXEC];
+}
+
+multiclass EXP_m {
+
+  let isPseudo = 1, isCodeGenOnly = 1 in {
+    def "" : EXPCommon, SIMCInstr <"exp", SISubtarget.NONE> ;
+  }
+
+  def _si : EXPCommon, SIMCInstr <"exp", SISubtarget.SI>, EXPe;
+
+  def _vi : EXPCommon, SIMCInstr <"exp", SISubtarget.VI>, EXPe_vi;
+}
+
+//===----------------------------------------------------------------------===//
+// Scalar classes
+//===----------------------------------------------------------------------===//
+
+class SOP1_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
+  SOP1 <outs, ins, "", pattern>,
+  SIMCInstr<opName, SISubtarget.NONE> {
+  let isPseudo = 1;
+  let isCodeGenOnly = 1;
+}
+
+class SOP1_Real_si <sop1 op, string opName, dag outs, dag ins, string asm> :
+  SOP1 <outs, ins, asm, []>,
+  SOP1e <op.SI>,
+  SIMCInstr<opName, SISubtarget.SI> {
+  let isCodeGenOnly = 0;
+  let AssemblerPredicates = [isSICI];
+}
+
+class SOP1_Real_vi <sop1 op, string opName, dag outs, dag ins, string asm> :
+  SOP1 <outs, ins, asm, []>,
+  SOP1e <op.VI>,
+  SIMCInstr<opName, SISubtarget.VI> {
+  let isCodeGenOnly = 0;
+  let AssemblerPredicates = [isVI];
+}
+
+multiclass SOP1_m <sop1 op, string opName, dag outs, dag ins, string asm,
+                   list<dag> pattern> {
+
+  def "" : SOP1_Pseudo <opName, outs, ins, pattern>;
+
+  def _si : SOP1_Real_si <op, opName, outs, ins, asm>;
+
+  def _vi : SOP1_Real_vi <op, opName, outs, ins, asm>;
+
+}
+
+multiclass SOP1_32 <sop1 op, string opName, list<dag> pattern> : SOP1_m <
+    op, opName, (outs SReg_32:$dst), (ins SSrc_32:$src0),
+    opName#" $dst, $src0", pattern
+>;
+
+multiclass SOP1_64 <sop1 op, string opName, list<dag> pattern> : SOP1_m <
+    op, opName, (outs SReg_64:$dst), (ins SSrc_64:$src0),
+    opName#" $dst, $src0", pattern
+>;
+
+// no input, 64-bit output.
+multiclass SOP1_64_0 <sop1 op, string opName, list<dag> pattern> {
+  def "" : SOP1_Pseudo <opName, (outs SReg_64:$dst), (ins), pattern>;
+
+  def _si : SOP1_Real_si <op, opName, (outs SReg_64:$dst), (ins),
+    opName#" $dst"> {
+    let ssrc0 = 0;
+  }
+
+  def _vi : SOP1_Real_vi <op, opName, (outs SReg_64:$dst), (ins),
+    opName#" $dst"> {
+    let ssrc0 = 0;
+  }
+}
+
+// 64-bit input, no output
+multiclass SOP1_1 <sop1 op, string opName, list<dag> pattern> {
+  def "" : SOP1_Pseudo <opName, (outs), (ins SReg_64:$src0), pattern>;
+
+  def _si : SOP1_Real_si <op, opName, (outs), (ins SReg_64:$src0),
+    opName#" $src0"> {
+    let sdst = 0;
+  }
+
+  def _vi : SOP1_Real_vi <op, opName, (outs), (ins SReg_64:$src0),
+    opName#" $src0"> {
+    let sdst = 0;
+  }
+}
+
+// 64-bit input, 32-bit output.
+multiclass SOP1_32_64 <sop1 op, string opName, list<dag> pattern> : SOP1_m <
+    op, opName, (outs SReg_32:$dst), (ins SSrc_64:$src0),
+    opName#" $dst, $src0", pattern
+>;
+
+class SOP2_Pseudo<string opName, dag outs, dag ins, list<dag> pattern> :
+  SOP2<outs, ins, "", pattern>,
+  SIMCInstr<opName, SISubtarget.NONE> {
+  let isPseudo = 1;
+  let isCodeGenOnly = 1;
+  let Size = 4;
+
+  // Pseudo instructions have no encodings, but adding this field here allows
+  // us to do:
+  // let sdst = xxx in {
+  // for multiclasses that include both real and pseudo instructions.
+  field bits<7> sdst = 0;
+}
+
+class SOP2_Real_si<sop2 op, string opName, dag outs, dag ins, string asm> :
+  SOP2<outs, ins, asm, []>,
+  SOP2e<op.SI>,
+  SIMCInstr<opName, SISubtarget.SI> {
+  let AssemblerPredicates = [isSICI];
+}
+
+class SOP2_Real_vi<sop2 op, string opName, dag outs, dag ins, string asm> :
+  SOP2<outs, ins, asm, []>,
+  SOP2e<op.VI>,
+  SIMCInstr<opName, SISubtarget.VI> {
+  let AssemblerPredicates = [isVI];
+}
+
+multiclass SOP2_SELECT_32 <sop2 op, string opName, list<dag> pattern> {
+  def "" : SOP2_Pseudo <opName, (outs SReg_32:$dst),
+    (ins SSrc_32:$src0, SSrc_32:$src1, SCCReg:$scc), pattern>;
+
+  def _si : SOP2_Real_si <op, opName, (outs SReg_32:$dst),
+    (ins SSrc_32:$src0, SSrc_32:$src1, SCCReg:$scc),
+    opName#" $dst, $src0, $src1 [$scc]">;
+
+  def _vi : SOP2_Real_vi <op, opName, (outs SReg_32:$dst),
+    (ins SSrc_32:$src0, SSrc_32:$src1, SCCReg:$scc),
+    opName#" $dst, $src0, $src1 [$scc]">;
+}
+
+multiclass SOP2_m <sop2 op, string opName, dag outs, dag ins, string asm,
+                   list<dag> pattern> {
+
+  def "" : SOP2_Pseudo <opName, outs, ins, pattern>;
+
+  def _si : SOP2_Real_si <op, opName, outs, ins, asm>;
+
+  def _vi : SOP2_Real_vi <op, opName, outs, ins, asm>;
+
+}
+
+multiclass SOP2_32 <sop2 op, string opName, list<dag> pattern> : SOP2_m <
+    op, opName, (outs SReg_32:$dst), (ins SSrc_32:$src0, SSrc_32:$src1),
+    opName#" $dst, $src0, $src1", pattern
+>;
+
+multiclass SOP2_64 <sop2 op, string opName, list<dag> pattern> : SOP2_m <
+    op, opName, (outs SReg_64:$dst), (ins SSrc_64:$src0, SSrc_64:$src1),
+    opName#" $dst, $src0, $src1", pattern
+>;
+
+multiclass SOP2_64_32 <sop2 op, string opName, list<dag> pattern> : SOP2_m <
+    op, opName, (outs SReg_64:$dst), (ins SSrc_64:$src0, SSrc_32:$src1),
+    opName#" $dst, $src0, $src1", pattern
+>;
+
+class SOPC_Helper <bits<7> op, RegisterOperand rc, ValueType vt,
+                    string opName, PatLeaf cond> : SOPC <
+  op, (outs SCCReg:$dst), (ins rc:$src0, rc:$src1),
+  opName#" $src0, $src1", []>;
+
+class SOPC_32<bits<7> op, string opName, PatLeaf cond = COND_NULL>
+  : SOPC_Helper<op, SSrc_32, i32, opName, cond>;
+
+class SOPC_64<bits<7> op, string opName, PatLeaf cond = COND_NULL>
+  : SOPC_Helper<op, SSrc_64, i64, opName, cond>;
+
+class SOPK_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
+  SOPK <outs, ins, "", pattern>,
+  SIMCInstr<opName, SISubtarget.NONE> {
+  let isPseudo = 1;
+  let isCodeGenOnly = 1;
+}
+
+class SOPK_Real_si <sopk op, string opName, dag outs, dag ins, string asm> :
+  SOPK <outs, ins, asm, []>,
+  SOPKe <op.SI>,
+  SIMCInstr<opName, SISubtarget.SI> {
+  let AssemblerPredicates = [isSICI];
+  let isCodeGenOnly = 0;
+}
+
+class SOPK_Real_vi <sopk op, string opName, dag outs, dag ins, string asm> :
+  SOPK <outs, ins, asm, []>,
+  SOPKe <op.VI>,
+  SIMCInstr<opName, SISubtarget.VI> {
+  let AssemblerPredicates = [isVI];
+  let isCodeGenOnly = 0;
+}
+
+multiclass SOPK_m <sopk op, string opName, dag outs, dag ins, string opAsm,
+                   string asm = opName#opAsm> {
+  def "" : SOPK_Pseudo <opName, outs, ins, []>;
+
+  def _si : SOPK_Real_si <op, opName, outs, ins, asm>;
+
+  def _vi : SOPK_Real_vi <op, opName, outs, ins, asm>;
+
+}
+
+multiclass SOPK_32 <sopk op, string opName, list<dag> pattern> {
+  def "" : SOPK_Pseudo <opName, (outs SReg_32:$dst), (ins u16imm:$src0),
+    pattern>;
+
+  def _si : SOPK_Real_si <op, opName, (outs SReg_32:$dst), (ins u16imm:$src0),
+    opName#" $dst, $src0">;
+
+  def _vi : SOPK_Real_vi <op, opName, (outs SReg_32:$dst), (ins u16imm:$src0),
+    opName#" $dst, $src0">;
+}
+
+multiclass SOPK_SCC <sopk op, string opName, list<dag> pattern> {
+  def "" : SOPK_Pseudo <opName, (outs SCCReg:$dst),
+    (ins SReg_32:$src0, u16imm:$src1), pattern>;
+
+  let DisableEncoding = "$dst" in {
+    def _si : SOPK_Real_si <op, opName, (outs SCCReg:$dst),
+      (ins SReg_32:$sdst, u16imm:$simm16), opName#" $sdst, $simm16">;
+
+    def _vi : SOPK_Real_vi <op, opName, (outs SCCReg:$dst),
+      (ins SReg_32:$sdst, u16imm:$simm16), opName#" $sdst, $simm16">;
+  }
+}
+
+multiclass SOPK_32TIE <sopk op, string opName, list<dag> pattern> : SOPK_m <
+  op, opName, (outs SReg_32:$sdst), (ins SReg_32:$src0, u16imm:$simm16),
+  " $sdst, $simm16"
+>;
+
+multiclass SOPK_IMM32 <sopk op, string opName, dag outs, dag ins,
+                       string argAsm, string asm = opName#argAsm> {
+
+  def "" : SOPK_Pseudo <opName, outs, ins, []>;
+
+  def _si : SOPK <outs, ins, asm, []>,
+            SOPK64e <op.SI>,
+            SIMCInstr<opName, SISubtarget.SI> {
+              let AssemblerPredicates = [isSICI];
+              let isCodeGenOnly = 0;
+            }
+
+  def _vi : SOPK <outs, ins, asm, []>,
+            SOPK64e <op.VI>,
+            SIMCInstr<opName, SISubtarget.VI> {
+              let AssemblerPredicates = [isVI];
+              let isCodeGenOnly = 0;
+            }
+}
+//===----------------------------------------------------------------------===//
+// SMRD classes
+//===----------------------------------------------------------------------===//
+
+class SMRD_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
+  SMRD <outs, ins, "", pattern>,
+  SIMCInstr<opName, SISubtarget.NONE> {
+  let isPseudo = 1;
+  let isCodeGenOnly = 1;
+}
+
+class SMRD_Real_si <bits<5> op, string opName, bit imm, dag outs, dag ins,
+                    string asm> :
+  SMRD <outs, ins, asm, []>,
+  SMRDe <op, imm>,
+  SIMCInstr<opName, SISubtarget.SI> {
+  let AssemblerPredicates = [isSICI];
+}
+
+class SMRD_Real_vi <bits<8> op, string opName, bit imm, dag outs, dag ins,
+                    string asm> :
+  SMRD <outs, ins, asm, []>,
+  SMEMe_vi <op, imm>,
+  SIMCInstr<opName, SISubtarget.VI> {
+  let AssemblerPredicates = [isVI];
+}
+
+multiclass SMRD_m <bits<5> op, string opName, bit imm, dag outs, dag ins,
+                   string asm, list<dag> pattern> {
+
+  def "" : SMRD_Pseudo <opName, outs, ins, pattern>;
+
+  def _si : SMRD_Real_si <op, opName, imm, outs, ins, asm>;
+
+  // glc is only applicable to scalar stores, which are not yet
+  // implemented.
+  let glc = 0 in {
+    def _vi : SMRD_Real_vi <{0, 0, 0, op}, opName, imm, outs, ins, asm>;
+  }
+}
+
+multiclass SMRD_Helper <bits<5> op, string opName, RegisterClass baseClass,
+                        RegisterClass dstClass> {
+  defm _IMM : SMRD_m <
+    op, opName#"_IMM", 1, (outs dstClass:$dst),
+    (ins baseClass:$sbase, u32imm:$offset),
+    opName#" $dst, $sbase, $offset", []
+  >;
+
+  defm _SGPR : SMRD_m <
+    op, opName#"_SGPR", 0, (outs dstClass:$dst),
+    (ins baseClass:$sbase, SReg_32:$soff),
+    opName#" $dst, $sbase, $soff", []
+  >;
+}
+
+//===----------------------------------------------------------------------===//
+// Vector ALU classes
+//===----------------------------------------------------------------------===//
+
+// This must always be right before the operand being input modified.
+def InputMods : OperandWithDefaultOps <i32, (ops (i32 0))> {
+  let PrintMethod = "printOperandAndMods";
+}
+
+def InputModsMatchClass : AsmOperandClass {
+  let Name = "RegWithInputMods";
+}
+
+def InputModsNoDefault : Operand <i32> {
+  let PrintMethod = "printOperandAndMods";
+  let ParserMatchClass = InputModsMatchClass;
+}
+
+class getNumSrcArgs<ValueType Src1, ValueType Src2> {
+  int ret =
+    !if (!eq(Src1.Value, untyped.Value),      1,   // VOP1
+         !if (!eq(Src2.Value, untyped.Value), 2,   // VOP2
+                                              3)); // VOP3
+}
+
+// Returns the register class to use for the destination of VOP[123C]
+// instructions for the given VT.
+class getVALUDstForVT<ValueType VT> {
+  RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>,
+                          !if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>,
+                            VOPDstOperand<SReg_64>)); // else VT == i1
+}
+
+// Returns the register class to use for source 0 of VOP[12C]
+// instructions for the given VT.
+class getVOPSrc0ForVT<ValueType VT> {
+  RegisterOperand ret = !if(!eq(VT.Size, 32), VSrc_32, VSrc_64);
+}
+
+// Returns the register class to use for source 1 of VOP[12C] for the
+// given VT.
+class getVOPSrc1ForVT<ValueType VT> {
+  RegisterClass ret = !if(!eq(VT.Size, 32), VGPR_32, VReg_64);
+}
+
+// Returns the register class to use for sources of VOP3 instructions for the
+// given VT.
+class getVOP3SrcForVT<ValueType VT> {
+  RegisterOperand ret = !if(!eq(VT.Size, 32), VCSrc_32, VCSrc_64);
+}
+
+// Returns 1 if the source arguments have modifiers, 0 if they do not.
+class hasModifiers<ValueType SrcVT> {
+  bit ret = !if(!eq(SrcVT.Value, f32.Value), 1,
+            !if(!eq(SrcVT.Value, f64.Value), 1, 0));
+}
+
+// Returns the input arguments for VOP[12C] instructions for the given SrcVT.
+class getIns32 <RegisterOperand Src0RC, RegisterClass Src1RC, int NumSrcArgs> {
+  dag ret = !if(!eq(NumSrcArgs, 1), (ins Src0RC:$src0),               // VOP1
+            !if(!eq(NumSrcArgs, 2), (ins Src0RC:$src0, Src1RC:$src1), // VOP2
+                                    (ins)));
+}
+
+// Returns the input arguments for VOP3 instructions for the given SrcVT.
+class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
+                RegisterOperand Src2RC, int NumSrcArgs,
+                bit HasModifiers> {
+
+  dag ret =
+    !if (!eq(NumSrcArgs, 1),
+      !if (!eq(HasModifiers, 1),
+        // VOP1 with modifiers
+        (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0,
+             ClampMod:$clamp, omod:$omod)
+      /* else */,
+        // VOP1 without modifiers
+        (ins Src0RC:$src0)
+      /* endif */ ),
+    !if (!eq(NumSrcArgs, 2),
+      !if (!eq(HasModifiers, 1),
+        // VOP 2 with modifiers
+        (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0,
+             InputModsNoDefault:$src1_modifiers, Src1RC:$src1,
+             ClampMod:$clamp, omod:$omod)
+      /* else */,
+        // VOP2 without modifiers
+        (ins Src0RC:$src0, Src1RC:$src1)
+      /* endif */ )
+    /* NumSrcArgs == 3 */,
+      !if (!eq(HasModifiers, 1),
+        // VOP3 with modifiers
+        (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0,
+             InputModsNoDefault:$src1_modifiers, Src1RC:$src1,
+             InputModsNoDefault:$src2_modifiers, Src2RC:$src2,
+             ClampMod:$clamp, omod:$omod)
+      /* else */,
+        // VOP3 without modifiers
+        (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2)
+      /* endif */ )));
+}
+
+// Returns the assembly string for the inputs and outputs of a VOP[12C]
+// instruction.  This does not add the _e32 suffix, so it can be reused
+// by getAsm64.
+class getAsm32 <int NumSrcArgs> {
+  string src1 = ", $src1";
+  string src2 = ", $src2";
+  string ret = "$dst, $src0"#
+               !if(!eq(NumSrcArgs, 1), "", src1)#
+               !if(!eq(NumSrcArgs, 3), src2, "");
+}
+
+// Returns the assembly string for the inputs and outputs of a VOP3
+// instruction.
+class getAsm64 <int NumSrcArgs, bit HasModifiers> {
+  string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
+  string src1 = !if(!eq(NumSrcArgs, 1), "",
+                   !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
+                                           " $src1_modifiers,"));
+  string src2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
+  string ret =
+  !if(!eq(HasModifiers, 0),
+      getAsm32<NumSrcArgs>.ret,
+      "$dst, "#src0#src1#src2#"$clamp"#"$omod");
+}
+
+
+class VOPProfile <list<ValueType> _ArgVT> {
+
+  field list<ValueType> ArgVT = _ArgVT;
+
+  field ValueType DstVT = ArgVT[0];
+  field ValueType Src0VT = ArgVT[1];
+  field ValueType Src1VT = ArgVT[2];
+  field ValueType Src2VT = ArgVT[3];
+  field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret;
+  field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT>.ret;
+  field RegisterClass Src1RC32 = getVOPSrc1ForVT<Src1VT>.ret;
+  field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret;
+  field RegisterOperand Src1RC64 = getVOP3SrcForVT<Src1VT>.ret;
+  field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret;
+
+  field int NumSrcArgs = getNumSrcArgs<Src1VT, Src2VT>.ret;
+  field bit HasModifiers = hasModifiers<Src0VT>.ret;
+
+  field dag Outs = (outs DstRC:$dst);
+
+  field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
+  field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
+                             HasModifiers>.ret;
+
+  field string Asm32 = getAsm32<NumSrcArgs>.ret;
+  field string Asm64 = getAsm64<NumSrcArgs, HasModifiers>.ret;
+}
+
+// FIXME: I think these F16/I16 profiles will need to use f16/i16 types in order
+//        for the instruction patterns to work.
+def VOP_F16_F16 : VOPProfile <[f32, f32, untyped, untyped]>;
+def VOP_F16_I16 : VOPProfile <[f32, i32, untyped, untyped]>;
+def VOP_I16_F16 : VOPProfile <[i32, f32, untyped, untyped]>;
+
+def VOP_F16_F16_F16 : VOPProfile <[f32, f32, f32, untyped]>;
+def VOP_F16_F16_I16 : VOPProfile <[f32, f32, i32, untyped]>;
+def VOP_I16_I16_I16 : VOPProfile <[i32, i32, i32, untyped]>;
+
+def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>;
+def VOP_F32_F64 : VOPProfile <[f32, f64, untyped, untyped]>;
+def VOP_F32_I32 : VOPProfile <[f32, i32, untyped, untyped]>;
+def VOP_F64_F32 : VOPProfile <[f64, f32, untyped, untyped]>;
+def VOP_F64_F64 : VOPProfile <[f64, f64, untyped, untyped]>;
+def VOP_F64_I32 : VOPProfile <[f64, i32, untyped, untyped]>;
+def VOP_I32_F32 : VOPProfile <[i32, f32, untyped, untyped]>;
+def VOP_I32_F64 : VOPProfile <[i32, f64, untyped, untyped]>;
+def VOP_I32_I32 : VOPProfile <[i32, i32, untyped, untyped]>;
+
+def VOP_F32_F32_F32 : VOPProfile <[f32, f32, f32, untyped]>;
+def VOP_F32_F32_I32 : VOPProfile <[f32, f32, i32, untyped]>;
+def VOP_F64_F64_F64 : VOPProfile <[f64, f64, f64, untyped]>;
+def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>;
+def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>;
+def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>;
+def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
+def VOP_I32_I32_I32_VCC : VOPProfile <[i32, i32, i32, untyped]> {
+  let Src0RC32 = VCSrc_32;
+}
+
+def VOP_I1_F32_I32 : VOPProfile <[i1, f32, i32, untyped]> {
+  let Ins64 = (ins InputModsNoDefault:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
+  let Asm64 = "$dst, $src0_modifiers, $src1";
+}
+
+def VOP_I1_F64_I32 : VOPProfile <[i1, f64, i32, untyped]> {
+  let Ins64 = (ins InputModsNoDefault:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
+  let Asm64 = "$dst, $src0_modifiers, $src1";
+}
+
+def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
+def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
+def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;
+def VOP_CNDMASK : VOPProfile <[i32, i32, i32, untyped]> {
+  let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VCCReg:$src2);
+  let Ins64 = (ins Src0RC64:$src0, Src1RC64:$src1, SSrc_64:$src2);
+  let Asm64 = "$dst, $src0, $src1, $src2";
+}
+
+def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
+def VOP_MADK : VOPProfile <[f32, f32, f32, f32]> {
+  field dag Ins = (ins VCSrc_32:$src0, VGPR_32:$vsrc1, u32imm:$src2);
+  field string Asm = "$dst, $src0, $vsrc1, $src2";
+}
+def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
+def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
+def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
+
+
+class VOP <string opName> {
+  string OpName = opName;
+}
+
+class VOP2_REV <string revOp, bit isOrig> {
+  string RevOp = revOp;
+  bit IsOrig = isOrig;
+}
+
+class AtomicNoRet <string noRetOp, bit isRet> {
+  string NoRetOp = noRetOp;
+  bit IsRet = isRet;
+}
+
+class VOP1_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
+  VOP1Common <outs, ins, "", pattern>,
+  VOP <opName>,
+  SIMCInstr <opName#"_e32", SISubtarget.NONE>,
+  MnemonicAlias<opName#"_e32", opName> {
+  let isPseudo = 1;
+  let isCodeGenOnly = 1;
+
+  field bits<8> vdst;
+  field bits<9> src0;
+}
+
+class VOP1_Real_si <string opName, vop1 op, dag outs, dag ins, string asm> :
+  VOP1<op.SI, outs, ins, asm, []>,
+  SIMCInstr <opName#"_e32", SISubtarget.SI> {
+  let AssemblerPredicate = SIAssemblerPredicate;
+}
+
+class VOP1_Real_vi <string opName, vop1 op, dag outs, dag ins, string asm> :
+  VOP1<op.VI, outs, ins, asm, []>,
+  SIMCInstr <opName#"_e32", SISubtarget.VI> {
+  let AssemblerPredicates = [isVI];
+}
+
+multiclass VOP1_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern,
+                   string opName> {
+  def "" : VOP1_Pseudo <outs, ins, pattern, opName>;
+
+  def _si : VOP1_Real_si <opName, op, outs, ins, asm>;
+
+  def _vi : VOP1_Real_vi <opName, op, outs, ins, asm>;
+}
+
+multiclass VOP1SI_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern,
+                   string opName> {
+  def "" : VOP1_Pseudo <outs, ins, pattern, opName>;
+
+  def _si : VOP1_Real_si <opName, op, outs, ins, asm>;
+}
+
+class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
+  VOP2Common <outs, ins, "", pattern>,
+  VOP <opName>,
+  SIMCInstr<opName#"_e32", SISubtarget.NONE>,
+  MnemonicAlias<opName#"_e32", opName> {
+  let isPseudo = 1;
+  let isCodeGenOnly = 1;
+}
+
+class VOP2_Real_si <string opName, vop2 op, dag outs, dag ins, string asm> :
+  VOP2 <op.SI, outs, ins, opName#asm, []>,
+  SIMCInstr <opName#"_e32", SISubtarget.SI> {
+  let AssemblerPredicates = [isSICI];
+}
+
+class VOP2_Real_vi <string opName, vop2 op, dag outs, dag ins, string asm> :
+  VOP2 <op.VI, outs, ins, opName#asm, []>,
+  SIMCInstr <opName#"_e32", SISubtarget.VI> {
+  let AssemblerPredicates = [isVI];
+}
+
+multiclass VOP2SI_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
+                     string opName, string revOp> {
+  def "" : VOP2_Pseudo <outs, ins, pattern, opName>,
+           VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
+
+  def _si : VOP2_Real_si <opName, op, outs, ins, asm>;
+}
+
+multiclass VOP2_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
+                   string opName, string revOp> {
+  def "" : VOP2_Pseudo <outs, ins, pattern, opName>,
+           VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
+
+  def _si : VOP2_Real_si <opName, op, outs, ins, asm>;
+
+  def _vi : VOP2_Real_vi <opName, op, outs, ins, asm>;
+
+}
+
+class VOP3DisableFields <bit HasSrc1, bit HasSrc2, bit HasModifiers> {
+
+  bits<2> src0_modifiers = !if(HasModifiers, ?, 0);
+  bits<2> src1_modifiers = !if(HasModifiers, !if(HasSrc1, ?, 0), 0);
+  bits<2> src2_modifiers = !if(HasModifiers, !if(HasSrc2, ?, 0), 0);
+  bits<2> omod = !if(HasModifiers, ?, 0);
+  bits<1> clamp = !if(HasModifiers, ?, 0);
+  bits<9> src1 = !if(HasSrc1, ?, 0);
+  bits<9> src2 = !if(HasSrc2, ?, 0);
+}
+
+class VOP3DisableModFields <bit HasSrc0Mods,
+                            bit HasSrc1Mods = 0,
+                            bit HasSrc2Mods = 0,
+                            bit HasOutputMods = 0> {
+  bits<2> src0_modifiers = !if(HasSrc0Mods, ?, 0);
+  bits<2> src1_modifiers = !if(HasSrc1Mods, ?, 0);
+  bits<2> src2_modifiers = !if(HasSrc2Mods, ?, 0);
+  bits<2> omod = !if(HasOutputMods, ?, 0);
+  bits<1> clamp = !if(HasOutputMods, ?, 0);
+}
+
+class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
+  VOP3Common <outs, ins, "", pattern>,
+  VOP <opName>,
+  SIMCInstr<opName#"_e64", SISubtarget.NONE>,
+  MnemonicAlias<opName#"_e64", opName> {
+  let isPseudo = 1;
+  let isCodeGenOnly = 1;
+}
+
+class VOP3_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> :
+  VOP3Common <outs, ins, asm, []>,
+  VOP3e <op>,
+  SIMCInstr<opName#"_e64", SISubtarget.SI> {
+  let AssemblerPredicates = [isSICI];
+}
+
+class VOP3_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName> :
+  VOP3Common <outs, ins, asm, []>,
+  VOP3e_vi <op>,
+  SIMCInstr <opName#"_e64", SISubtarget.VI> {
+  let AssemblerPredicates = [isVI];
+}
+
+class VOP3b_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> :
+  VOP3Common <outs, ins, asm, []>,
+  VOP3be <op>,
+  SIMCInstr<opName#"_e64", SISubtarget.SI> {
+  let AssemblerPredicates = [isSICI];
+}
+
+class VOP3b_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName> :
+  VOP3Common <outs, ins, asm, []>,
+  VOP3be_vi <op>,
+  SIMCInstr <opName#"_e64", SISubtarget.VI> {
+  let AssemblerPredicates = [isVI];
+}
+
+multiclass VOP3_m <vop op, dag outs, dag ins, string asm, list<dag> pattern,
+                   string opName, int NumSrcArgs, bit HasMods = 1> {
+
+  def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
+
+  def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
+            VOP3DisableFields<!if(!eq(NumSrcArgs, 1), 0, 1),
+                              !if(!eq(NumSrcArgs, 2), 0, 1),
+                              HasMods>;
+  def _vi : VOP3_Real_vi <op.VI3, outs, ins, asm, opName>,
+            VOP3DisableFields<!if(!eq(NumSrcArgs, 1), 0, 1),
+                              !if(!eq(NumSrcArgs, 2), 0, 1),
+                              HasMods>;
+}
+
+// VOP3_m without source modifiers
+multiclass VOP3_m_nomods <vop op, dag outs, dag ins, string asm, list<dag> pattern,
+                   string opName, int NumSrcArgs, bit HasMods = 1> {
+
+  def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
+
+  let src0_modifiers = 0,
+      src1_modifiers = 0,
+      src2_modifiers = 0,
+      clamp = 0,
+      omod = 0 in {
+    def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>;
+    def _vi : VOP3_Real_vi <op.VI3, outs, ins, asm, opName>;
+  }
+}
+
+multiclass VOP3_1_m <vop op, dag outs, dag ins, string asm,
+                     list<dag> pattern, string opName, bit HasMods = 1> {
+
+  def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
+
+  def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
+            VOP3DisableFields<0, 0, HasMods>;
+
+  def _vi : VOP3_Real_vi <op.VI3, outs, ins, asm, opName>,
+            VOP3DisableFields<0, 0, HasMods>;
+}
+
+multiclass VOP3SI_1_m <vop op, dag outs, dag ins, string asm,
+                     list<dag> pattern, string opName, bit HasMods = 1> {
+
+  def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
+
+  def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
+            VOP3DisableFields<0, 0, HasMods>;
+  // No VI instruction. This class is for SI only.
+}
+
+multiclass VOP3_2_m <vop op, dag outs, dag ins, string asm,
+                     list<dag> pattern, string opName, string revOp,
+                     bit HasMods = 1, bit UseFullOp = 0> {
+
+  def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
+           VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
+
+  def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
+            VOP3DisableFields<1, 0, HasMods>;
+
+  def _vi : VOP3_Real_vi <op.VI3, outs, ins, asm, opName>,
+            VOP3DisableFields<1, 0, HasMods>;
+}
+
+multiclass VOP3SI_2_m <vop op, dag outs, dag ins, string asm,
+                     list<dag> pattern, string opName, string revOp,
+                     bit HasMods = 1, bit UseFullOp = 0> {
+
+  def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
+           VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
+
+  def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
+            VOP3DisableFields<1, 0, HasMods>;
+
+  // No VI instruction. This class is for SI only.
+}
+
+// XXX - Is v_div_scale_{f32|f64} only available in vop3b without
+// option of implicit vcc use?
+multiclass VOP3b_2_m <vop op, dag outs, dag ins, string asm,
+                      list<dag> pattern, string opName, string revOp,
+                      bit HasMods = 1, bit UseFullOp = 0> {
+  def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
+           VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
+
+  // The VOP2 variant puts the carry out into VCC, the VOP3 variant
+  // can write it into any SGPR. We currently don't use the carry out,
+  // so for now hardcode it to VCC as well.
+  let sdst = SIOperand.VCC, Defs = [VCC] in {
+    def _si : VOP3b_Real_si <op.SI3, outs, ins, asm, opName>,
+              VOP3DisableFields<1, 0, HasMods>;
+
+    def _vi : VOP3b_Real_vi <op.VI3, outs, ins, asm, opName>,
+              VOP3DisableFields<1, 0, HasMods>;
+  } // End sdst = SIOperand.VCC, Defs = [VCC]
+}
+
+multiclass VOP3b_3_m <vop op, dag outs, dag ins, string asm,
+                      list<dag> pattern, string opName, string revOp,
+                      bit HasMods = 1, bit UseFullOp = 0> {
+  def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
+
+
+  def _si : VOP3b_Real_si <op.SI3, outs, ins, asm, opName>,
+            VOP3DisableFields<1, 1, HasMods>;
+
+  def _vi : VOP3b_Real_vi <op.VI3, outs, ins, asm, opName>,
+            VOP3DisableFields<1, 1, HasMods>;
+}
+
+multiclass VOP3_C_m <vop op, dag outs, dag ins, string asm,
+                     list<dag> pattern, string opName,
+                     bit HasMods, bit defExec, string revOp> {
+
+  def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
+           VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
+
+  def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
+            VOP3DisableFields<1, 0, HasMods> {
+    let Defs = !if(defExec, [EXEC], []);
+  }
+
+  def _vi : VOP3_Real_vi <op.VI3, outs, ins, asm, opName>,
+            VOP3DisableFields<1, 0, HasMods> {
+    let Defs = !if(defExec, [EXEC], []);
+  }
+}
+
+// An instruction that is VOP2 on SI and VOP3 on VI, no modifiers.
+multiclass VOP2SI_3VI_m <vop3 op, string opName, dag outs, dag ins,
+                         string asm, list<dag> pattern = []> {
+  let isPseudo = 1, isCodeGenOnly = 1 in {
+    def "" : VOPAnyCommon <outs, ins, "", pattern>,
+             SIMCInstr<opName, SISubtarget.NONE>;
+  }
+
+  def _si : VOP2 <op.SI3{5-0}, outs, ins, asm, []>,
+            SIMCInstr <opName, SISubtarget.SI> {
+            let AssemblerPredicates = [isSICI];
+  }
+
+  def _vi : VOP3Common <outs, ins, asm, []>,
+            VOP3e_vi <op.VI3>,
+            VOP3DisableFields <1, 0, 0>,
+            SIMCInstr <opName, SISubtarget.VI> {
+            let AssemblerPredicates = [isVI];
+  }
+}
+
+multiclass VOP1_Helper <vop1 op, string opName, dag outs,
+                        dag ins32, string asm32, list<dag> pat32,
+                        dag ins64, string asm64, list<dag> pat64,
+                        bit HasMods> {
+
+  defm _e32 : VOP1_m <op, outs, ins32, opName#asm32, pat32, opName>;
+
+  defm _e64 : VOP3_1_m <op, outs, ins64, opName#asm64, pat64, opName, HasMods>;
+}
+
+multiclass VOP1Inst <vop1 op, string opName, VOPProfile P,
+                     SDPatternOperator node = null_frag> : VOP1_Helper <
+  op, opName, P.Outs,
+  P.Ins32, P.Asm32, [],
+  P.Ins64, P.Asm64,
+  !if(P.HasModifiers,
+      [(set P.DstVT:$dst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
+                                i32:$src0_modifiers, i1:$clamp, i32:$omod))))],
+      [(set P.DstVT:$dst, (node P.Src0VT:$src0))]),
+  P.HasModifiers
+>;
+
+multiclass VOP1InstSI <vop1 op, string opName, VOPProfile P,
+                       SDPatternOperator node = null_frag> {
+
+  defm _e32 : VOP1SI_m <op, P.Outs, P.Ins32, opName#P.Asm32, [], opName>;
+
+  defm _e64 : VOP3SI_1_m <op, P.Outs, P.Ins64, opName#P.Asm64,
+    !if(P.HasModifiers,
+      [(set P.DstVT:$dst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
+                                i32:$src0_modifiers, i1:$clamp, i32:$omod))))],
+      [(set P.DstVT:$dst, (node P.Src0VT:$src0))]),
+    opName, P.HasModifiers>;
+}
+
+multiclass VOP2_Helper <vop2 op, string opName, dag outs,
+                        dag ins32, string asm32, list<dag> pat32,
+                        dag ins64, string asm64, list<dag> pat64,
+                        string revOp, bit HasMods> {
+  defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOp>;
+
+  defm _e64 : VOP3_2_m <op,
+    outs, ins64, opName#asm64, pat64, opName, revOp, HasMods
+  >;
+}
+
+multiclass VOP2Inst <vop2 op, string opName, VOPProfile P,
+                     SDPatternOperator node = null_frag,
+                     string revOp = opName> : VOP2_Helper <
+  op, opName, P.Outs,
+  P.Ins32, P.Asm32, [],
+  P.Ins64, P.Asm64,
+  !if(P.HasModifiers,
+      [(set P.DstVT:$dst,
+           (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+                                      i1:$clamp, i32:$omod)),
+                 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
+      [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
+  revOp, P.HasModifiers
+>;
+
+multiclass VOP2InstSI <vop2 op, string opName, VOPProfile P,
+                       SDPatternOperator node = null_frag,
+                       string revOp = opName> {
+  defm _e32 : VOP2SI_m <op, P.Outs, P.Ins32, P.Asm32, [], opName, revOp>;
+
+  defm _e64 : VOP3SI_2_m <op, P.Outs, P.Ins64, opName#P.Asm64,
+    !if(P.HasModifiers,
+        [(set P.DstVT:$dst,
+             (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+                                        i1:$clamp, i32:$omod)),
+                   (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
+        [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
+    opName, revOp, P.HasModifiers>;
+}
+
+multiclass VOP2b_Helper <vop2 op, string opName, dag outs,
+                         dag ins32, string asm32, list<dag> pat32,
+                         dag ins64, string asm64, list<dag> pat64,
+                         string revOp, bit HasMods> {
+
+  defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOp>;
+
+  defm _e64 : VOP3b_2_m <op,
+    outs, ins64, opName#asm64, pat64, opName, revOp, HasMods
+  >;
+}
+
+multiclass VOP2bInst <vop2 op, string opName, VOPProfile P,
+                      SDPatternOperator node = null_frag,
+                      string revOp = opName> : VOP2b_Helper <
+  op, opName, P.Outs,
+  P.Ins32, P.Asm32, [],
+  P.Ins64, P.Asm64,
+  !if(P.HasModifiers,
+      [(set P.DstVT:$dst,
+           (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+                                      i1:$clamp, i32:$omod)),
+                 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
+      [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
+  revOp, P.HasModifiers
+>;
+
+// A VOP2 instruction that is VOP3-only on VI.
+multiclass VOP2_VI3_Helper <vop23 op, string opName, dag outs,
+                            dag ins32, string asm32, list<dag> pat32,
+                            dag ins64, string asm64, list<dag> pat64,
+                            string revOp, bit HasMods> {
+  defm _e32 : VOP2SI_m <op, outs, ins32, asm32, pat32, opName, revOp>;
+
+  defm _e64 : VOP3_2_m <op, outs, ins64, opName#asm64, pat64, opName,
+                        revOp, HasMods>;
+}
+
+multiclass VOP2_VI3_Inst <vop23 op, string opName, VOPProfile P,
+                          SDPatternOperator node = null_frag,
+                          string revOp = opName>
+                          : VOP2_VI3_Helper <
+  op, opName, P.Outs,
+  P.Ins32, P.Asm32, [],
+  P.Ins64, P.Asm64,
+  !if(P.HasModifiers,
+      [(set P.DstVT:$dst,
+           (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+                                      i1:$clamp, i32:$omod)),
+                 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
+      [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
+  revOp, P.HasModifiers
+>;
+
+multiclass VOP2MADK <vop2 op, string opName, list<dag> pattern = []> {
+
+  def "" : VOP2_Pseudo <VOP_MADK.Outs, VOP_MADK.Ins, pattern, opName>;
+
+let isCodeGenOnly = 0 in {
+  def _si : VOP2Common <VOP_MADK.Outs, VOP_MADK.Ins,
+                        !strconcat(opName, VOP_MADK.Asm), []>,
+            SIMCInstr <opName#"_e32", SISubtarget.SI>,
+            VOP2_MADKe <op.SI> {
+            let AssemblerPredicates = [isSICI];
+            }
+
+  def _vi : VOP2Common <VOP_MADK.Outs, VOP_MADK.Ins,
+                        !strconcat(opName, VOP_MADK.Asm), []>,
+            SIMCInstr <opName#"_e32", SISubtarget.VI>,
+            VOP2_MADKe <op.VI> {
+            let AssemblerPredicates = [isVI];
+            }
+} // End isCodeGenOnly = 0
+}
+
+class VOPC_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
+  VOPCCommon <ins, "", pattern>,
+  VOP <opName>,
+  SIMCInstr<opName#"_e32", SISubtarget.NONE>,
+  MnemonicAlias<opName#"_e32", opName> {
+  let isPseudo = 1;
+  let isCodeGenOnly = 1;
+}
+
+multiclass VOPC_m <vopc op, dag outs, dag ins, string asm, list<dag> pattern,
+                   string opName, bit DefExec, string revOpName = ""> {
+  def "" : VOPC_Pseudo <outs, ins, pattern, opName>;
+
+  def _si : VOPC<op.SI, ins, asm, []>,
+            SIMCInstr <opName#"_e32", SISubtarget.SI> {
+    let Defs = !if(DefExec, [EXEC], []);
+    let hasSideEffects = DefExec;
+  }
+
+  def _vi : VOPC<op.VI, ins, asm, []>,
+            SIMCInstr <opName#"_e32", SISubtarget.VI> {
+    let Defs = !if(DefExec, [EXEC], []);
+    let hasSideEffects = DefExec;
+  }
+}
+
+multiclass VOPC_Helper <vopc op, string opName,
+                        dag ins32, string asm32, list<dag> pat32,
+                        dag out64, dag ins64, string asm64, list<dag> pat64,
+                        bit HasMods, bit DefExec, string revOp> {
+  defm _e32 : VOPC_m <op, (outs), ins32, opName#asm32, pat32, opName, DefExec>;
+
+  defm _e64 : VOP3_C_m <op, out64, ins64, opName#asm64, pat64,
+                        opName, HasMods, DefExec, revOp>;
+}
+
+// Special case for class instructions which only have modifiers on
+// the 1st source operand.
+multiclass VOPC_Class_Helper <vopc op, string opName,
+                             dag ins32, string asm32, list<dag> pat32,
+                             dag out64, dag ins64, string asm64, list<dag> pat64,
+                             bit HasMods, bit DefExec, string revOp> {
+  defm _e32 : VOPC_m <op, (outs), ins32, opName#asm32, pat32, opName, DefExec>;
+
+  defm _e64 : VOP3_C_m <op, out64, ins64, opName#asm64, pat64,
+                        opName, HasMods, DefExec, revOp>,
+                        VOP3DisableModFields<1, 0, 0>;
+}
+
+multiclass VOPCInst <vopc op, string opName,
+                     VOPProfile P, PatLeaf cond = COND_NULL,
+                     string revOp = opName,
+                     bit DefExec = 0> : VOPC_Helper <
+  op, opName,
+  P.Ins32, P.Asm32, [],
+  (outs VOPDstS64:$dst), P.Ins64, P.Asm64,
+  !if(P.HasModifiers,
+      [(set i1:$dst,
+          (setcc (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+                                      i1:$clamp, i32:$omod)),
+                 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
+                 cond))],
+      [(set i1:$dst, (setcc P.Src0VT:$src0, P.Src1VT:$src1, cond))]),
+  P.HasModifiers, DefExec, revOp
+>;
+
+multiclass VOPCClassInst <vopc op, string opName, VOPProfile P,
+                     bit DefExec = 0> : VOPC_Class_Helper <
+  op, opName,
+  P.Ins32, P.Asm32, [],
+  (outs VOPDstS64:$dst), P.Ins64, P.Asm64,
+  !if(P.HasModifiers,
+      [(set i1:$dst,
+          (AMDGPUfp_class (P.Src0VT (VOP3Mods0Clamp0OMod P.Src0VT:$src0, i32:$src0_modifiers)), P.Src1VT:$src1))],
+      [(set i1:$dst, (AMDGPUfp_class P.Src0VT:$src0, P.Src1VT:$src1))]),
+  P.HasModifiers, DefExec, opName
+>;
+
+
+multiclass VOPC_F32 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
+  VOPCInst <op, opName, VOP_F32_F32_F32, cond, revOp>;
+
+multiclass VOPC_F64 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
+  VOPCInst <op, opName, VOP_F64_F64_F64, cond, revOp>;
+
+multiclass VOPC_I32 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
+  VOPCInst <op, opName, VOP_I32_I32_I32, cond, revOp>;
+
+multiclass VOPC_I64 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
+  VOPCInst <op, opName, VOP_I64_I64_I64, cond, revOp>;
+
+
+multiclass VOPCX <vopc op, string opName, VOPProfile P,
+                  PatLeaf cond = COND_NULL,
+                  string revOp = "">
+  : VOPCInst <op, opName, P, cond, revOp, 1>;
+
+multiclass VOPCX_F32 <vopc op, string opName, string revOp = opName> :
+  VOPCX <op, opName, VOP_F32_F32_F32, COND_NULL, revOp>;
+
+multiclass VOPCX_F64 <vopc op, string opName, string revOp = opName> :
+  VOPCX <op, opName, VOP_F64_F64_F64, COND_NULL, revOp>;
+
+multiclass VOPCX_I32 <vopc op, string opName, string revOp = opName> :
+  VOPCX <op, opName, VOP_I32_I32_I32, COND_NULL, revOp>;
+
+multiclass VOPCX_I64 <vopc op, string opName, string revOp = opName> :
+  VOPCX <op, opName, VOP_I64_I64_I64, COND_NULL, revOp>;
+
+multiclass VOP3_Helper <vop3 op, string opName, dag outs, dag ins, string asm,
+                        list<dag> pat, int NumSrcArgs, bit HasMods> : VOP3_m <
+    op, outs, ins, opName#" "#asm, pat, opName, NumSrcArgs, HasMods
+>;
+
+multiclass VOPC_CLASS_F32 <vopc op, string opName> :
+  VOPCClassInst <op, opName, VOP_I1_F32_I32, 0>;
+
+multiclass VOPCX_CLASS_F32 <vopc op, string opName> :
+  VOPCClassInst <op, opName, VOP_I1_F32_I32, 1>;
+
+multiclass VOPC_CLASS_F64 <vopc op, string opName> :
+  VOPCClassInst <op, opName, VOP_I1_F64_I32, 0>;
+
+multiclass VOPCX_CLASS_F64 <vopc op, string opName> :
+  VOPCClassInst <op, opName, VOP_I1_F64_I32, 1>;
+
+multiclass VOP3Inst <vop3 op, string opName, VOPProfile P,
+                     SDPatternOperator node = null_frag> : VOP3_Helper <
+  op, opName, (outs P.DstRC.RegClass:$dst), P.Ins64, P.Asm64,
+  !if(!eq(P.NumSrcArgs, 3),
+    !if(P.HasModifiers,
+        [(set P.DstVT:$dst,
+            (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+                                       i1:$clamp, i32:$omod)),
+                  (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
+                  (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))))],
+        [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1,
+                                  P.Src2VT:$src2))]),
+  !if(!eq(P.NumSrcArgs, 2),
+    !if(P.HasModifiers,
+        [(set P.DstVT:$dst,
+            (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+                                       i1:$clamp, i32:$omod)),
+                  (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
+        [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))])
+  /* P.NumSrcArgs == 1 */,
+    !if(P.HasModifiers,
+        [(set P.DstVT:$dst,
+            (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+                                       i1:$clamp, i32:$omod))))],
+        [(set P.DstVT:$dst, (node P.Src0VT:$src0))]))),
+  P.NumSrcArgs, P.HasModifiers
+>;
+
+// Special case for v_div_fmas_{f32|f64}, since it seems to be the
+// only VOP instruction that implicitly reads VCC.
+multiclass VOP3_VCC_Inst <vop3 op, string opName,
+                          VOPProfile P,
+                          SDPatternOperator node = null_frag> : VOP3_Helper <
+  op, opName,
+  (outs P.DstRC.RegClass:$dst),
+  (ins InputModsNoDefault:$src0_modifiers, P.Src0RC64:$src0,
+       InputModsNoDefault:$src1_modifiers, P.Src1RC64:$src1,
+       InputModsNoDefault:$src2_modifiers, P.Src2RC64:$src2,
+       ClampMod:$clamp,
+       omod:$omod),
+  " $dst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod",
+  [(set P.DstVT:$dst,
+            (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+                                       i1:$clamp, i32:$omod)),
+                  (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
+                  (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers)),
+                  (i1 VCC)))],
+  3, 1
+>;
+
+multiclass VOP3b_Helper <vop op, RegisterClass vrc, RegisterOperand arc,
+                    string opName, list<dag> pattern> :
+  VOP3b_3_m <
+  op, (outs vrc:$vdst, SReg_64:$sdst),
+      (ins InputModsNoDefault:$src0_modifiers, arc:$src0,
+           InputModsNoDefault:$src1_modifiers, arc:$src1,
+           InputModsNoDefault:$src2_modifiers, arc:$src2,
+           ClampMod:$clamp, omod:$omod),
+  opName#" $vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod", pattern,
+  opName, opName, 1, 1
+>;
+
+multiclass VOP3b_64 <vop3 op, string opName, list<dag> pattern> :
+  VOP3b_Helper <op, VReg_64, VSrc_64, opName, pattern>;
+
+multiclass VOP3b_32 <vop3 op, string opName, list<dag> pattern> :
+  VOP3b_Helper <op, VGPR_32, VSrc_32, opName, pattern>;
+
+
+class Vop3ModPat<Instruction Inst, VOPProfile P, SDPatternOperator node> : Pat<
+  (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
+        (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
+        (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))),
+  (Inst i32:$src0_modifiers, P.Src0VT:$src0,
+        i32:$src1_modifiers, P.Src1VT:$src1,
+        i32:$src2_modifiers, P.Src2VT:$src2,
+        i1:$clamp,
+        i32:$omod)>;
+
+//===----------------------------------------------------------------------===//
+// Interpolation opcodes
+//===----------------------------------------------------------------------===//
+
+class VINTRP_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
+  VINTRPCommon <outs, ins, "", pattern>,
+  SIMCInstr<opName, SISubtarget.NONE> {
+  let isPseudo = 1;
+  let isCodeGenOnly = 1;
+}
+
+class VINTRP_Real_si <bits <2> op, string opName, dag outs, dag ins,
+                      string asm> :
+  VINTRPCommon <outs, ins, asm, []>,
+  VINTRPe <op>,
+  SIMCInstr<opName, SISubtarget.SI>;
+
+class VINTRP_Real_vi <bits <2> op, string opName, dag outs, dag ins,
+                      string asm> :
+  VINTRPCommon <outs, ins, asm, []>,
+  VINTRPe_vi <op>,
+  SIMCInstr<opName, SISubtarget.VI>;
+
+multiclass VINTRP_m <bits <2> op, dag outs, dag ins, string asm,
+                     list<dag> pattern = []> {
+  def "" : VINTRP_Pseudo <NAME, outs, ins, pattern>;
+
+  def _si : VINTRP_Real_si <op, NAME, outs, ins, asm>;
+
+  def _vi : VINTRP_Real_vi <op, NAME, outs, ins, asm>;
+}
+
+//===----------------------------------------------------------------------===//
+// Vector I/O classes
+//===----------------------------------------------------------------------===//
+
+class DS_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
+  DS <outs, ins, "", pattern>,
+  SIMCInstr <opName, SISubtarget.NONE> {
+  let isPseudo = 1;
+  let isCodeGenOnly = 1;
+}
+
+class DS_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> :
+  DS <outs, ins, asm, []>,
+  DSe <op>,
+  SIMCInstr <opName, SISubtarget.SI> {
+  let isCodeGenOnly = 0;
+}
+
+class DS_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
+  DS <outs, ins, asm, []>,
+  DSe_vi <op>,
+  SIMCInstr <opName, SISubtarget.VI>;
+
+class DS_Off16_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> :
+  DS_Real_si <op,opName, outs, ins, asm> {
+
+  // Single load interpret the 2 i8imm operands as a single i16 offset.
+  bits<16> offset;
+  let offset0 = offset{7-0};
+  let offset1 = offset{15-8};
+  let isCodeGenOnly = 0;
+}
+
+class DS_Off16_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
+  DS_Real_vi <op, opName, outs, ins, asm> {
+
+  // Single load interpret the 2 i8imm operands as a single i16 offset.
+  bits<16> offset;
+  let offset0 = offset{7-0};
+  let offset1 = offset{15-8};
+}
+
+multiclass DS_1A_RET <bits<8> op, string opName, RegisterClass rc,
+  dag outs = (outs rc:$vdst),
+  dag ins = (ins VGPR_32:$addr, ds_offset:$offset, gds:$gds),
+  string asm = opName#" $vdst, $addr"#"$offset$gds"> {
+
+  def "" : DS_Pseudo <opName, outs, ins, []>;
+
+  let data0 = 0, data1 = 0 in {
+    def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+    def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
+  }
+}
+
+multiclass DS_1A_Off8_RET <bits<8> op, string opName, RegisterClass rc,
+  dag outs = (outs rc:$vdst),
+  dag ins = (ins VGPR_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1,
+                 gds01:$gds),
+  string asm = opName#" $vdst, $addr"#"$offset0"#"$offset1$gds"> {
+
+  def "" : DS_Pseudo <opName, outs, ins, []>;
+
+  let data0 = 0, data1 = 0, AsmMatchConverter = "cvtDSOffset01" in {
+    def _si : DS_Real_si <op, opName, outs, ins, asm>;
+    def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
+  }
+}
+
+multiclass DS_1A1D_NORET <bits<8> op, string opName, RegisterClass rc,
+  dag outs = (outs),
+  dag ins = (ins VGPR_32:$addr, rc:$data0, ds_offset:$offset, gds:$gds),
+  string asm = opName#" $addr, $data0"#"$offset$gds"> {
+
+  def "" : DS_Pseudo <opName, outs, ins, []>,
+           AtomicNoRet<opName, 0>;
+
+  let data1 = 0, vdst = 0 in {
+    def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+    def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
+  }
+}
+
+multiclass DS_1A1D_Off8_NORET <bits<8> op, string opName, RegisterClass rc,
+  dag outs = (outs),
+  dag ins = (ins VGPR_32:$addr, rc:$data0, rc:$data1,
+              ds_offset0:$offset0, ds_offset1:$offset1, gds01:$gds),
+  string asm = opName#" $addr, $data0, $data1"#"$offset0"#"$offset1"#"$gds"> {
+
+  def "" : DS_Pseudo <opName, outs, ins, []>;
+
+  let vdst = 0, AsmMatchConverter = "cvtDSOffset01" in {
+    def _si : DS_Real_si <op, opName, outs, ins, asm>;
+    def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
+  }
+}
+
+multiclass DS_1A1D_RET <bits<8> op, string opName, RegisterClass rc,
+                        string noRetOp = "",
+  dag outs = (outs rc:$vdst),
+  dag ins = (ins VGPR_32:$addr, rc:$data0, ds_offset:$offset, gds:$gds),
+  string asm = opName#" $vdst, $addr, $data0"#"$offset$gds"> {
+
+  def "" : DS_Pseudo <opName, outs, ins, []>,
+           AtomicNoRet<noRetOp, 1>;
+
+  let data1 = 0 in {
+    def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+    def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
+  }
+}
+
+multiclass DS_1A2D_RET_m <bits<8> op, string opName, RegisterClass rc,
+                          string noRetOp = "", dag ins,
+  dag outs = (outs rc:$vdst),
+  string asm = opName#" $vdst, $addr, $data0, $data1"#"$offset"#"$gds"> {
+
+  def "" : DS_Pseudo <opName, outs, ins, []>,
+           AtomicNoRet<noRetOp, 1>;
+
+  def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+  def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
+}
+
+multiclass DS_1A2D_RET <bits<8> op, string asm, RegisterClass rc,
+                        string noRetOp = "", RegisterClass src = rc> :
+  DS_1A2D_RET_m <op, asm, rc, noRetOp,
+                 (ins VGPR_32:$addr, src:$data0, src:$data1,
+                      ds_offset:$offset, gds:$gds)
+>;
+
+multiclass DS_1A2D_NORET <bits<8> op, string opName, RegisterClass rc,
+                          string noRetOp = opName,
+  dag outs = (outs),
+  dag ins = (ins VGPR_32:$addr, rc:$data0, rc:$data1,
+                 ds_offset:$offset, gds:$gds),
+  string asm = opName#" $addr, $data0, $data1"#"$offset"#"$gds"> {
+
+  def "" : DS_Pseudo <opName, outs, ins, []>,
+           AtomicNoRet<noRetOp, 0>;
+
+  let vdst = 0 in {
+    def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+    def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
+  }
+}
+
+multiclass DS_0A_RET <bits<8> op, string opName,
+  dag outs = (outs VGPR_32:$vdst),
+  dag ins = (ins ds_offset:$offset, gds:$gds),
+  string asm = opName#" $vdst"#"$offset"#"$gds"> {
+
+  let mayLoad = 1, mayStore = 1 in {
+    def "" : DS_Pseudo <opName, outs, ins, []>;
+
+    let addr = 0, data0 = 0, data1 = 0 in {
+      def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+      def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
+    } // end addr = 0, data0 = 0, data1 = 0
+  } // end mayLoad = 1, mayStore = 1
+}
+
+multiclass DS_1A_RET_GDS <bits<8> op, string opName,
+  dag outs = (outs VGPR_32:$vdst),
+  dag ins = (ins VGPR_32:$addr, ds_offset_gds:$offset),
+  string asm = opName#" $vdst, $addr"#"$offset gds"> {
+
+  def "" : DS_Pseudo <opName, outs, ins, []>;
+
+  let data0 = 0, data1 = 0, gds = 1 in {
+    def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+    def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
+  } // end data0 = 0, data1 = 0, gds = 1
+}
+
+multiclass DS_1A_GDS <bits<8> op, string opName,
+  dag outs = (outs),
+  dag ins = (ins VGPR_32:$addr),
+  string asm = opName#" $addr gds"> {
+
+  def "" : DS_Pseudo <opName, outs, ins, []>;
+
+  let vdst = 0, data0 = 0, data1 = 0, offset0 = 0, offset1 = 0, gds = 1 in {
+    def _si : DS_Real_si <op, opName, outs, ins, asm>;
+    def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
+  } // end vdst = 0, data = 0, data1 = 0, gds = 1
+}
+
+multiclass DS_1A <bits<8> op, string opName,
+  dag outs = (outs),
+  dag ins = (ins VGPR_32:$addr, ds_offset:$offset, gds:$gds),
+  string asm = opName#" $addr"#"$offset"#"$gds"> {
+
+  let mayLoad = 1, mayStore = 1 in {
+    def "" : DS_Pseudo <opName, outs, ins, []>;
+
+    let vdst = 0, data0 = 0, data1 = 0 in {
+      def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+      def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
+    } // let vdst = 0, data0 = 0, data1 = 0
+  } // end mayLoad = 1, mayStore = 1
+}
+
+//===----------------------------------------------------------------------===//
+// MTBUF classes
+//===----------------------------------------------------------------------===//
+
+class MTBUF_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
+  MTBUF <outs, ins, "", pattern>,
+  SIMCInstr<opName, SISubtarget.NONE> {
+  let isPseudo = 1;
+  let isCodeGenOnly = 1;
+}
+
+class MTBUF_Real_si <bits<3> op, string opName, dag outs, dag ins,
+                    string asm> :
+  MTBUF <outs, ins, asm, []>,
+  MTBUFe <op>,
+  SIMCInstr<opName, SISubtarget.SI>;
+
+class MTBUF_Real_vi <bits<4> op, string opName, dag outs, dag ins, string asm> :
+  MTBUF <outs, ins, asm, []>,
+  MTBUFe_vi <op>,
+  SIMCInstr <opName, SISubtarget.VI>;
+
+multiclass MTBUF_m <bits<3> op, string opName, dag outs, dag ins, string asm,
+                    list<dag> pattern> {
+
+  def "" : MTBUF_Pseudo <opName, outs, ins, pattern>;
+
+  def _si : MTBUF_Real_si <op, opName, outs, ins, asm>;
+
+  def _vi : MTBUF_Real_vi <{0, op{2}, op{1}, op{0}}, opName, outs, ins, asm>;
+
+}
+
+let mayStore = 1, mayLoad = 0 in {
+
+multiclass MTBUF_Store_Helper <bits<3> op, string opName,
+                               RegisterClass regClass> : MTBUF_m <
+  op, opName, (outs),
+  (ins regClass:$vdata, u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc,
+   i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VGPR_32:$vaddr,
+   SReg_128:$srsrc, i1imm:$slc, i1imm:$tfe, SCSrc_32:$soffset),
+  opName#" $vdata, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"
+        #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset", []
+>;
+
+} // mayStore = 1, mayLoad = 0
+
+let mayLoad = 1, mayStore = 0 in {
+
+multiclass MTBUF_Load_Helper <bits<3> op, string opName,
+                              RegisterClass regClass> : MTBUF_m <
+  op, opName, (outs regClass:$dst),
+  (ins u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
+       i8imm:$dfmt, i8imm:$nfmt, VGPR_32:$vaddr, SReg_128:$srsrc,
+       i1imm:$slc, i1imm:$tfe, SCSrc_32:$soffset),
+  opName#" $dst, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"
+        #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset", []
+>;
+
+} // mayLoad = 1, mayStore = 0
+
+//===----------------------------------------------------------------------===//
+// MUBUF classes
+//===----------------------------------------------------------------------===//
+
+class mubuf <bits<7> si, bits<7> vi = si> {
+  field bits<7> SI = si;
+  field bits<7> VI = vi;
+}
+
+let isCodeGenOnly = 0 in {
+
+class MUBUF_si <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+  MUBUF <outs, ins, asm, pattern>, MUBUFe <op> {
+  let lds  = 0;
+}
+
+} // End let isCodeGenOnly = 0
+
+class MUBUF_vi <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+  MUBUF <outs, ins, asm, pattern>, MUBUFe_vi <op> {
+  let lds = 0;
+}
+
+class MUBUFAddr64Table <bit is_addr64, string suffix = ""> {
+  bit IsAddr64 = is_addr64;
+  string OpName = NAME # suffix;
+}
+
+class MUBUF_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
+  MUBUF <outs, ins, "", pattern>,
+  SIMCInstr<opName, SISubtarget.NONE> {
+  let isPseudo = 1;
+  let isCodeGenOnly = 1;
+
+  // dummy fields, so that we can use let statements around multiclasses
+  bits<1> offen;
+  bits<1> idxen;
+  bits<8> vaddr;
+  bits<1> glc;
+  bits<1> slc;
+  bits<1> tfe;
+  bits<8> soffset;
+}
+
+class MUBUF_Real_si <mubuf op, string opName, dag outs, dag ins,
+                     string asm> :
+  MUBUF <outs, ins, asm, []>,
+  MUBUFe <op.SI>,
+  SIMCInstr<opName, SISubtarget.SI> {
+  let lds = 0;
+}
+
+class MUBUF_Real_vi <mubuf op, string opName, dag outs, dag ins,
+                     string asm> :
+  MUBUF <outs, ins, asm, []>,
+  MUBUFe_vi <op.VI>,
+  SIMCInstr<opName, SISubtarget.VI> {
+  let lds = 0;
+}
+
+multiclass MUBUF_m <mubuf op, string opName, dag outs, dag ins, string asm,
+                    list<dag> pattern> {
+
+  def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
+           MUBUFAddr64Table <0>;
+
+  let addr64 = 0, isCodeGenOnly = 0 in {
+    def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
+  }
+
+  def _vi : MUBUF_Real_vi <op, opName, outs, ins, asm>;
+}
+
+multiclass MUBUFAddr64_m <mubuf op, string opName, dag outs,
+                          dag ins, string asm, list<dag> pattern> {
+
+  def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
+           MUBUFAddr64Table <1>;
+
+  let addr64 = 1, isCodeGenOnly = 0 in {
+    def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
+  }
+
+  // There is no VI version. If the pseudo is selected, it should be lowered
+  // for VI appropriately.
+}
+
+multiclass MUBUFAtomicOffset_m <mubuf op, string opName, dag outs, dag ins,
+                                string asm, list<dag> pattern, bit is_return> {
+
+  def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
+           MUBUFAddr64Table <0, !if(is_return, "_RTN", "")>,
+           AtomicNoRet<NAME#"_OFFSET", is_return>;
+
+  let offen = 0, idxen = 0, tfe = 0, vaddr = 0 in {
+    let addr64 = 0 in {
+      def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
+    }
+
+    def _vi : MUBUF_Real_vi <op, opName, outs, ins, asm>;
+  }
+}
+
+multiclass MUBUFAtomicAddr64_m <mubuf op, string opName, dag outs, dag ins,
+                                string asm, list<dag> pattern, bit is_return> {
+
+  def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
+           MUBUFAddr64Table <1, !if(is_return, "_RTN", "")>,
+           AtomicNoRet<NAME#"_ADDR64", is_return>;
+
+  let offen = 0, idxen = 0, addr64 = 1, tfe = 0 in {
+    def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
+  }
+
+  // There is no VI version. If the pseudo is selected, it should be lowered
+  // for VI appropriately.
+}
+
+multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc,
+                         ValueType vt, SDPatternOperator atomic> {
+
+  let mayStore = 1, mayLoad = 1, hasPostISelHook = 1 in {
+
+    // No return variants
+    let glc = 0 in {
+
+      defm _ADDR64 : MUBUFAtomicAddr64_m <
+        op, name#"_addr64", (outs),
+        (ins rc:$vdata, SReg_128:$srsrc, VReg_64:$vaddr,
+             SCSrc_32:$soffset, mbuf_offset:$offset, slc:$slc),
+        name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"#"$slc", [], 0
+      >;
+
+      defm _OFFSET : MUBUFAtomicOffset_m <
+        op, name#"_offset", (outs),
+        (ins rc:$vdata, SReg_128:$srsrc, SCSrc_32:$soffset, mbuf_offset:$offset,
+             slc:$slc),
+        name#" $vdata, $srsrc, $soffset"#"$offset"#"$slc", [], 0
+      >;
+    } // glc = 0
+
+    // Variant that return values
+    let glc = 1, Constraints = "$vdata = $vdata_in",
+        DisableEncoding = "$vdata_in"  in {
+
+      defm _RTN_ADDR64 : MUBUFAtomicAddr64_m <
+        op, name#"_rtn_addr64", (outs rc:$vdata),
+        (ins rc:$vdata_in, SReg_128:$srsrc, VReg_64:$vaddr,
+             SCSrc_32:$soffset, mbuf_offset:$offset, slc:$slc),
+        name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"#" glc"#"$slc",
+        [(set vt:$vdata,
+         (atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset,
+	                            i16:$offset, i1:$slc), vt:$vdata_in))], 1
+      >;
+
+      defm _RTN_OFFSET : MUBUFAtomicOffset_m <
+        op, name#"_rtn_offset", (outs rc:$vdata),
+        (ins rc:$vdata_in, SReg_128:$srsrc, SCSrc_32:$soffset,
+             mbuf_offset:$offset, slc:$slc),
+        name#" $vdata, $srsrc, $soffset"#"$offset"#" glc $slc",
+        [(set vt:$vdata,
+         (atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset,
+                                    i1:$slc), vt:$vdata_in))], 1
+      >;
+
+    } // glc = 1
+
+  } // mayStore = 1, mayLoad = 1, hasPostISelHook = 1
+}
+
+multiclass MUBUF_Load_Helper <mubuf op, string name, RegisterClass regClass,
+                              ValueType load_vt = i32,
+                              SDPatternOperator ld = null_frag> {
+
+  let mayLoad = 1, mayStore = 0 in {
+    let offen = 0, idxen = 0, vaddr = 0 in {
+      defm _OFFSET : MUBUF_m <op, name#"_offset", (outs regClass:$vdata),
+                           (ins SReg_128:$srsrc, SCSrc_32:$soffset,
+                           mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
+                           name#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe",
+                           [(set load_vt:$vdata, (ld (MUBUFOffset v4i32:$srsrc,
+                                                     i32:$soffset, i16:$offset,
+                                                     i1:$glc, i1:$slc, i1:$tfe)))]>;
+    }
+
+    let offen = 1, idxen = 0  in {
+      defm _OFFEN  : MUBUF_m <op, name#"_offen", (outs regClass:$vdata),
+                           (ins VGPR_32:$vaddr, SReg_128:$srsrc,
+                           SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc, slc:$slc,
+                           tfe:$tfe),
+                           name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
+    }
+
+    let offen = 0, idxen = 1 in {
+      defm _IDXEN  : MUBUF_m <op, name#"_idxen", (outs regClass:$vdata),
+                           (ins VGPR_32:$vaddr, SReg_128:$srsrc,
+                           SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc,
+                           slc:$slc, tfe:$tfe),
+                           name#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
+    }
+
+    let offen = 1, idxen = 1 in {
+      defm _BOTHEN : MUBUF_m <op, name#"_bothen", (outs regClass:$vdata),
+                           (ins VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
+                           mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
+                           name#" $vdata, $vaddr, $srsrc, $soffset idxen offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
+    }
+
+    let offen = 0, idxen = 0 in {
+      defm _ADDR64 : MUBUFAddr64_m <op, name#"_addr64", (outs regClass:$vdata),
+                           (ins VReg_64:$vaddr, SReg_128:$srsrc,
+                                SCSrc_32:$soffset, mbuf_offset:$offset,
+				glc:$glc, slc:$slc, tfe:$tfe),
+                           name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"#
+                                "$glc"#"$slc"#"$tfe",
+                           [(set load_vt:$vdata, (ld (MUBUFAddr64 v4i32:$srsrc,
+                                                  i64:$vaddr, i32:$soffset,
+                                                  i16:$offset, i1:$glc, i1:$slc,
+						  i1:$tfe)))]>;
+    }
+  }
+}
+
+multiclass MUBUF_Store_Helper <mubuf op, string name, RegisterClass vdataClass,
+                          ValueType store_vt = i32, SDPatternOperator st = null_frag> {
+  let mayLoad = 0, mayStore = 1 in {
+    defm : MUBUF_m <op, name, (outs),
+                    (ins vdataClass:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
+                    mbuf_offset:$offset, offen:$offen, idxen:$idxen, glc:$glc, slc:$slc,
+                    tfe:$tfe),
+                    name#" $vdata, $vaddr, $srsrc, $soffset"#"$offen"#"$idxen"#"$offset"#
+                         "$glc"#"$slc"#"$tfe", []>;
+
+    let offen = 0, idxen = 0, vaddr = 0 in {
+      defm _OFFSET : MUBUF_m <op, name#"_offset",(outs),
+                              (ins vdataClass:$vdata, SReg_128:$srsrc, SCSrc_32:$soffset,
+                              mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
+                              name#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe",
+                              [(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
+                                   i16:$offset, i1:$glc, i1:$slc, i1:$tfe))]>;
+    } // offen = 0, idxen = 0, vaddr = 0
+
+    let offen = 1, idxen = 0  in {
+      defm _OFFEN : MUBUF_m <op, name#"_offen", (outs),
+                             (ins vdataClass:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc,
+                              SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc,
+                              slc:$slc, tfe:$tfe),
+                             name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#
+                             "$glc"#"$slc"#"$tfe", []>;
+    } // end offen = 1, idxen = 0
+
+    let offen = 0, idxen = 1 in {
+      defm _IDXEN  : MUBUF_m <op, name#"_idxen", (outs),
+                           (ins vdataClass:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc,
+                           SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc,
+                           slc:$slc, tfe:$tfe),
+                           name#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
+    }
+
+    let offen = 1, idxen = 1 in {
+      defm _BOTHEN : MUBUF_m <op, name#"_bothen", (outs),
+                           (ins vdataClass:$vdata, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
+                           mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
+                           name#" $vdata, $vaddr, $srsrc, $soffset idxen offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
+    }
+
+    let offen = 0, idxen = 0 in {
+      defm _ADDR64 : MUBUFAddr64_m <op, name#"_addr64", (outs),
+                                    (ins vdataClass:$vdata, VReg_64:$vaddr, SReg_128:$srsrc,
+                                         SCSrc_32:$soffset,
+                                         mbuf_offset:$offset, glc:$glc, slc:$slc,
+                                         tfe:$tfe),
+                                    name#" $vdata, $vaddr, $srsrc, $soffset addr64"#
+                                         "$offset"#"$glc"#"$slc"#"$tfe",
+                                    [(st store_vt:$vdata,
+                                      (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr,
+                                                   i32:$soffset, i16:$offset,
+                                                   i1:$glc, i1:$slc, i1:$tfe))]>;
+    }
+  } // End mayLoad = 0, mayStore = 1
+}
+
+class FLAT_Load_Helper <bits<7> op, string asm, RegisterClass regClass> :
+      FLAT <op, (outs regClass:$vdst),
+                (ins VReg_64:$addr, glc_flat:$glc, slc_flat:$slc, tfe_flat:$tfe),
+            asm#" $vdst, $addr"#"$glc"#"$slc"#"$tfe", []> {
+  let data = 0;
+  let mayLoad = 1;
+}
+
+class FLAT_Store_Helper <bits<7> op, string name, RegisterClass vdataClass> :
+      FLAT <op, (outs), (ins vdataClass:$data, VReg_64:$addr,
+                             glc_flat:$glc, slc_flat:$slc, tfe_flat:$tfe),
+          name#" $data, $addr"#"$glc"#"$slc"#"$tfe",
+         []> {
+
+  let mayLoad = 0;
+  let mayStore = 1;
+
+  // Encoding
+  let vdst = 0;
+}
+
+multiclass FLAT_ATOMIC <bits<7> op, string name, RegisterClass vdst_rc,
+                        RegisterClass data_rc = vdst_rc> {
+
+  let mayLoad = 1, mayStore = 1 in {
+    def "" : FLAT <op, (outs),
+                  (ins VReg_64:$addr, data_rc:$data, slc_flat_atomic:$slc,
+                       tfe_flat_atomic:$tfe),
+                   name#" $addr, $data"#"$slc"#"$tfe", []>,
+             AtomicNoRet <NAME, 0> {
+      let glc = 0;
+      let vdst = 0;
+    }
+
+    def _RTN : FLAT <op, (outs vdst_rc:$vdst),
+                     (ins VReg_64:$addr, data_rc:$data, slc_flat_atomic:$slc,
+                          tfe_flat_atomic:$tfe),
+                     name#" $vdst, $addr, $data glc"#"$slc"#"$tfe", []>,
+               AtomicNoRet <NAME, 1> {
+      let glc = 1;
+    }
+  }
+}
+
+class MIMG_Mask <string op, int channels> {
+  string Op = op;
+  int Channels = channels;
+}
+
+class MIMG_NoSampler_Helper <bits<7> op, string asm,
+                             RegisterClass dst_rc,
+                             RegisterClass src_rc> : MIMG <
+  op,
+  (outs dst_rc:$vdata),
+  (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
+       i1imm:$tfe, i1imm:$lwe, i1imm:$slc, src_rc:$vaddr,
+       SReg_256:$srsrc),
+  asm#" $vdata, $dmask, $unorm, $glc, $da, $r128,"
+     #" $tfe, $lwe, $slc, $vaddr, $srsrc",
+  []> {
+  let ssamp = 0;
+  let mayLoad = 1;
+  let mayStore = 0;
+  let hasPostISelHook = 1;
+}
+
+multiclass MIMG_NoSampler_Src_Helper <bits<7> op, string asm,
+                                      RegisterClass dst_rc,
+                                      int channels> {
+  def _V1 : MIMG_NoSampler_Helper <op, asm, dst_rc, VGPR_32>,
+            MIMG_Mask<asm#"_V1", channels>;
+  def _V2 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_64>,
+            MIMG_Mask<asm#"_V2", channels>;
+  def _V4 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_128>,
+            MIMG_Mask<asm#"_V4", channels>;
+}
+
+multiclass MIMG_NoSampler <bits<7> op, string asm> {
+  defm _V1 : MIMG_NoSampler_Src_Helper <op, asm, VGPR_32, 1>;
+  defm _V2 : MIMG_NoSampler_Src_Helper <op, asm, VReg_64, 2>;
+  defm _V3 : MIMG_NoSampler_Src_Helper <op, asm, VReg_96, 3>;
+  defm _V4 : MIMG_NoSampler_Src_Helper <op, asm, VReg_128, 4>;
+}
+
+class MIMG_Sampler_Helper <bits<7> op, string asm,
+                           RegisterClass dst_rc,
+                           RegisterClass src_rc, int wqm> : MIMG <
+  op,
+  (outs dst_rc:$vdata),
+  (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
+       i1imm:$tfe, i1imm:$lwe, i1imm:$slc, src_rc:$vaddr,
+       SReg_256:$srsrc, SReg_128:$ssamp),
+  asm#" $vdata, $dmask, $unorm, $glc, $da, $r128,"
+     #" $tfe, $lwe, $slc, $vaddr, $srsrc, $ssamp",
+  []> {
+  let mayLoad = 1;
+  let mayStore = 0;
+  let hasPostISelHook = 1;
+  let WQM = wqm;
+}
+
+multiclass MIMG_Sampler_Src_Helper <bits<7> op, string asm,
+                                    RegisterClass dst_rc,
+                                    int channels, int wqm> {
+  def _V1 : MIMG_Sampler_Helper <op, asm, dst_rc, VGPR_32, wqm>,
+            MIMG_Mask<asm#"_V1", channels>;
+  def _V2 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_64, wqm>,
+            MIMG_Mask<asm#"_V2", channels>;
+  def _V4 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_128, wqm>,
+            MIMG_Mask<asm#"_V4", channels>;
+  def _V8 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_256, wqm>,
+            MIMG_Mask<asm#"_V8", channels>;
+  def _V16 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_512, wqm>,
+            MIMG_Mask<asm#"_V16", channels>;
+}
+
+multiclass MIMG_Sampler <bits<7> op, string asm> {
+  defm _V1 : MIMG_Sampler_Src_Helper<op, asm, VGPR_32, 1, 0>;
+  defm _V2 : MIMG_Sampler_Src_Helper<op, asm, VReg_64, 2, 0>;
+  defm _V3 : MIMG_Sampler_Src_Helper<op, asm, VReg_96, 3, 0>;
+  defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4, 0>;
+}
+
+multiclass MIMG_Sampler_WQM <bits<7> op, string asm> {
+  defm _V1 : MIMG_Sampler_Src_Helper<op, asm, VGPR_32, 1, 1>;
+  defm _V2 : MIMG_Sampler_Src_Helper<op, asm, VReg_64, 2, 1>;
+  defm _V3 : MIMG_Sampler_Src_Helper<op, asm, VReg_96, 3, 1>;
+  defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4, 1>;
+}
+
+class MIMG_Gather_Helper <bits<7> op, string asm,
+                          RegisterClass dst_rc,
+                          RegisterClass src_rc, int wqm> : MIMG <
+  op,
+  (outs dst_rc:$vdata),
+  (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
+       i1imm:$tfe, i1imm:$lwe, i1imm:$slc, src_rc:$vaddr,
+       SReg_256:$srsrc, SReg_128:$ssamp),
+  asm#" $vdata, $dmask, $unorm, $glc, $da, $r128,"
+     #" $tfe, $lwe, $slc, $vaddr, $srsrc, $ssamp",
+  []> {
+  let mayLoad = 1;
+  let mayStore = 0;
+
+  // DMASK was repurposed for GATHER4. 4 components are always
+  // returned and DMASK works like a swizzle - it selects
+  // the component to fetch. The only useful DMASK values are
+  // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
+  // (red,red,red,red) etc.) The ISA document doesn't mention
+  // this.
+  // Therefore, disable all code which updates DMASK by setting these two:
+  let MIMG = 0;
+  let hasPostISelHook = 0;
+  let WQM = wqm;
+}
+
+multiclass MIMG_Gather_Src_Helper <bits<7> op, string asm,
+                                    RegisterClass dst_rc,
+                                    int channels, int wqm> {
+  def _V1 : MIMG_Gather_Helper <op, asm, dst_rc, VGPR_32, wqm>,
+            MIMG_Mask<asm#"_V1", channels>;
+  def _V2 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_64, wqm>,
+            MIMG_Mask<asm#"_V2", channels>;
+  def _V4 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_128, wqm>,
+            MIMG_Mask<asm#"_V4", channels>;
+  def _V8 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_256, wqm>,
+            MIMG_Mask<asm#"_V8", channels>;
+  def _V16 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_512, wqm>,
+            MIMG_Mask<asm#"_V16", channels>;
+}
+
+multiclass MIMG_Gather <bits<7> op, string asm> {
+  defm _V1 : MIMG_Gather_Src_Helper<op, asm, VGPR_32, 1, 0>;
+  defm _V2 : MIMG_Gather_Src_Helper<op, asm, VReg_64, 2, 0>;
+  defm _V3 : MIMG_Gather_Src_Helper<op, asm, VReg_96, 3, 0>;
+  defm _V4 : MIMG_Gather_Src_Helper<op, asm, VReg_128, 4, 0>;
+}
+
+multiclass MIMG_Gather_WQM <bits<7> op, string asm> {
+  defm _V1 : MIMG_Gather_Src_Helper<op, asm, VGPR_32, 1, 1>;
+  defm _V2 : MIMG_Gather_Src_Helper<op, asm, VReg_64, 2, 1>;
+  defm _V3 : MIMG_Gather_Src_Helper<op, asm, VReg_96, 3, 1>;
+  defm _V4 : MIMG_Gather_Src_Helper<op, asm, VReg_128, 4, 1>;
+}
+
+//===----------------------------------------------------------------------===//
+// Vector instruction mappings
+//===----------------------------------------------------------------------===//
+
+// Maps an opcode in e32 form to its e64 equivalent
+def getVOPe64 : InstrMapping {
+  let FilterClass = "VOP";
+  let RowFields = ["OpName"];
+  let ColFields = ["Size"];
+  let KeyCol = ["4"];
+  let ValueCols = [["8"]];
+}
+
+// Maps an opcode in e64 form to its e32 equivalent
+def getVOPe32 : InstrMapping {
+  let FilterClass = "VOP";
+  let RowFields = ["OpName"];
+  let ColFields = ["Size"];
+  let KeyCol = ["8"];
+  let ValueCols = [["4"]];
+}
+
+def getMaskedMIMGOp : InstrMapping {
+  let FilterClass = "MIMG_Mask";
+  let RowFields = ["Op"];
+  let ColFields = ["Channels"];
+  let KeyCol = ["4"];
+  let ValueCols = [["1"], ["2"], ["3"] ];
+}
+
+// Maps an commuted opcode to its original version
+def getCommuteOrig : InstrMapping {
+  let FilterClass = "VOP2_REV";
+  let RowFields = ["RevOp"];
+  let ColFields = ["IsOrig"];
+  let KeyCol = ["0"];
+  let ValueCols = [["1"]];
+}
+
+// Maps an original opcode to its commuted version
+def getCommuteRev : InstrMapping {
+  let FilterClass = "VOP2_REV";
+  let RowFields = ["RevOp"];
+  let ColFields = ["IsOrig"];
+  let KeyCol = ["1"];
+  let ValueCols = [["0"]];
+}
+
+def getCommuteCmpOrig : InstrMapping {
+  let FilterClass = "VOP2_REV";
+  let RowFields = ["RevOp"];
+  let ColFields = ["IsOrig"];
+  let KeyCol = ["0"];
+  let ValueCols = [["1"]];
+}
+
+// Maps an original opcode to its commuted version
+def getCommuteCmpRev : InstrMapping {
+  let FilterClass = "VOP2_REV";
+  let RowFields = ["RevOp"];
+  let ColFields = ["IsOrig"];
+  let KeyCol = ["1"];
+  let ValueCols = [["0"]];
+}
+
+
+def getMCOpcodeGen : InstrMapping {
+  let FilterClass = "SIMCInstr";
+  let RowFields = ["PseudoInstr"];
+  let ColFields = ["Subtarget"];
+  let KeyCol = [!cast<string>(SISubtarget.NONE)];
+  let ValueCols = [[!cast<string>(SISubtarget.SI)],[!cast<string>(SISubtarget.VI)]];
+}
+
+def getAddr64Inst : InstrMapping {
+  let FilterClass = "MUBUFAddr64Table";
+  let RowFields = ["OpName"];
+  let ColFields = ["IsAddr64"];
+  let KeyCol = ["0"];
+  let ValueCols = [["1"]];
+}
+
+// Maps an atomic opcode to its version with a return value.
+def getAtomicRetOp : InstrMapping {
+  let FilterClass = "AtomicNoRet";
+  let RowFields = ["NoRetOp"];
+  let ColFields = ["IsRet"];
+  let KeyCol = ["0"];
+  let ValueCols = [["1"]];
+}
+
+// Maps an atomic opcode to its returnless version.
+def getAtomicNoRetOp : InstrMapping {
+  let FilterClass = "AtomicNoRet";
+  let RowFields = ["NoRetOp"];
+  let ColFields = ["IsRet"];
+  let KeyCol = ["1"];
+  let ValueCols = [["0"]];
+}
+
+include "SIInstructions.td"
+include "CIInstructions.td"
+include "VIInstructions.td"
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
new file mode 100644
index 0000000..8c8d836
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -0,0 +1,3327 @@
+//===-- SIInstructions.td - SI Instruction Defintions ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This file was originally auto-generated from a GPU register header file and
+// all the instruction definitions were originally commented out.  Instructions
+// that are not yet supported remain commented out.
+//===----------------------------------------------------------------------===//
+
+class InterpSlots {
+int P0 = 2;
+int P10 = 0;
+int P20 = 1;
+}
+def INTERP : InterpSlots;
+
+def InterpSlot : Operand<i32> {
+  let PrintMethod = "printInterpSlot";
+}
+
+def SendMsgImm : Operand<i32> {
+  let PrintMethod = "printSendMsg";
+}
+
+def isGCN : Predicate<"Subtarget->getGeneration() "
+                      ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">,
+            AssemblerPredicate<"FeatureGCN">;
+def isSI : Predicate<"Subtarget->getGeneration() "
+                      "== AMDGPUSubtarget::SOUTHERN_ISLANDS">;
+
+def has16BankLDS : Predicate<"Subtarget->getLDSBankCount() == 16">;
+def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">;
+
+def SWaitMatchClass : AsmOperandClass {
+  let Name = "SWaitCnt";
+  let RenderMethod = "addImmOperands";
+  let ParserMethod = "parseSWaitCntOps";
+}
+
+def WAIT_FLAG : InstFlag<"printWaitFlag"> {
+  let ParserMatchClass = SWaitMatchClass;
+}
+
+let SubtargetPredicate = isGCN in {
+
+//===----------------------------------------------------------------------===//
+// EXP Instructions
+//===----------------------------------------------------------------------===//
+
+defm EXP : EXP_m;
+
+//===----------------------------------------------------------------------===//
+// SMRD Instructions
+//===----------------------------------------------------------------------===//
+
+let mayLoad = 1 in {
+
+// We are using the SGPR_32 and not the SReg_32 register class for 32-bit
+// SMRD instructions, because the SGPR_32 register class does not include M0
+// and writing to M0 from an SMRD instruction will hang the GPU.
+defm S_LOAD_DWORD : SMRD_Helper <0x00, "s_load_dword", SReg_64, SGPR_32>;
+defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "s_load_dwordx2", SReg_64, SReg_64>;
+defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "s_load_dwordx4", SReg_64, SReg_128>;
+defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "s_load_dwordx8", SReg_64, SReg_256>;
+defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "s_load_dwordx16", SReg_64, SReg_512>;
+
+defm S_BUFFER_LOAD_DWORD : SMRD_Helper <
+  0x08, "s_buffer_load_dword", SReg_128, SGPR_32
+>;
+
+defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper <
+  0x09, "s_buffer_load_dwordx2", SReg_128, SReg_64
+>;
+
+defm S_BUFFER_LOAD_DWORDX4 : SMRD_Helper <
+  0x0a, "s_buffer_load_dwordx4", SReg_128, SReg_128
+>;
+
+defm S_BUFFER_LOAD_DWORDX8 : SMRD_Helper <
+  0x0b, "s_buffer_load_dwordx8", SReg_128, SReg_256
+>;
+
+defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
+  0x0c, "s_buffer_load_dwordx16", SReg_128, SReg_512
+>;
+
+} // mayLoad = 1
+
+//def S_MEMTIME : SMRD_ <0x0000001e, "s_memtime", []>;
+//def S_DCACHE_INV : SMRD_ <0x0000001f, "s_dcache_inv", []>;
+
+//===----------------------------------------------------------------------===//
+// SOP1 Instructions
+//===----------------------------------------------------------------------===//
+
+let isMoveImm = 1 in {
+  let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+    defm S_MOV_B32 : SOP1_32 <sop1<0x03, 0x00>, "s_mov_b32", []>;
+    defm S_MOV_B64 : SOP1_64 <sop1<0x04, 0x01>, "s_mov_b64", []>;
+  } // let isRematerializeable = 1
+
+  let Uses = [SCC] in {
+    defm S_CMOV_B32 : SOP1_32 <sop1<0x05, 0x02>, "s_cmov_b32", []>;
+    defm S_CMOV_B64 : SOP1_64 <sop1<0x06, 0x03>, "s_cmov_b64", []>;
+  } // End Uses = [SCC]
+} // End isMoveImm = 1
+
+let Defs = [SCC] in {
+  defm S_NOT_B32 : SOP1_32 <sop1<0x07, 0x04>, "s_not_b32",
+    [(set i32:$dst, (not i32:$src0))]
+  >;
+
+  defm S_NOT_B64 : SOP1_64 <sop1<0x08, 0x05>, "s_not_b64",
+    [(set i64:$dst, (not i64:$src0))]
+  >;
+  defm S_WQM_B32 : SOP1_32 <sop1<0x09, 0x06>, "s_wqm_b32", []>;
+  defm S_WQM_B64 : SOP1_64 <sop1<0x0a, 0x07>, "s_wqm_b64", []>;
+} // End Defs = [SCC]
+
+
+defm S_BREV_B32 : SOP1_32 <sop1<0x0b, 0x08>, "s_brev_b32",
+  [(set i32:$dst, (AMDGPUbrev i32:$src0))]
+>;
+defm S_BREV_B64 : SOP1_64 <sop1<0x0c, 0x09>, "s_brev_b64", []>;
+
+let Defs = [SCC] in {
+  defm S_BCNT0_I32_B32 : SOP1_32 <sop1<0x0d, 0x0a>, "s_bcnt0_i32_b32", []>;
+  defm S_BCNT0_I32_B64 : SOP1_32_64 <sop1<0x0e, 0x0b>, "s_bcnt0_i32_b64", []>;
+  defm S_BCNT1_I32_B32 : SOP1_32 <sop1<0x0f, 0x0c>, "s_bcnt1_i32_b32",
+    [(set i32:$dst, (ctpop i32:$src0))]
+  >;
+  defm S_BCNT1_I32_B64 : SOP1_32_64 <sop1<0x10, 0x0d>, "s_bcnt1_i32_b64", []>;
+} // End Defs = [SCC]
+
+defm S_FF0_I32_B32 : SOP1_32 <sop1<0x11, 0x0e>, "s_ff0_i32_b32", []>;
+defm S_FF0_I32_B64 : SOP1_32_64 <sop1<0x12, 0x0f>, "s_ff0_i32_b64", []>;
+defm S_FF1_I32_B32 : SOP1_32 <sop1<0x13, 0x10>, "s_ff1_i32_b32",
+  [(set i32:$dst, (cttz_zero_undef i32:$src0))]
+>;
+defm S_FF1_I32_B64 : SOP1_32_64 <sop1<0x14, 0x11>, "s_ff1_i32_b64", []>;
+
+defm S_FLBIT_I32_B32 : SOP1_32 <sop1<0x15, 0x12>, "s_flbit_i32_b32",
+  [(set i32:$dst, (ctlz_zero_undef i32:$src0))]
+>;
+
+defm S_FLBIT_I32_B64 : SOP1_32_64 <sop1<0x16, 0x13>, "s_flbit_i32_b64", []>;
+defm S_FLBIT_I32 : SOP1_32 <sop1<0x17, 0x14>, "s_flbit_i32",
+  [(set i32:$dst, (int_AMDGPU_flbit_i32 i32:$src0))]
+>;
+defm S_FLBIT_I32_I64 : SOP1_32_64 <sop1<0x18, 0x15>, "s_flbit_i32_i64", []>;
+defm S_SEXT_I32_I8 : SOP1_32 <sop1<0x19, 0x16>, "s_sext_i32_i8",
+  [(set i32:$dst, (sext_inreg i32:$src0, i8))]
+>;
+defm S_SEXT_I32_I16 : SOP1_32 <sop1<0x1a, 0x17>, "s_sext_i32_i16",
+  [(set i32:$dst, (sext_inreg i32:$src0, i16))]
+>;
+
+defm S_BITSET0_B32 : SOP1_32 <sop1<0x1b, 0x18>, "s_bitset0_b32", []>;
+defm S_BITSET0_B64 : SOP1_64 <sop1<0x1c, 0x19>, "s_bitset0_b64", []>;
+defm S_BITSET1_B32 : SOP1_32 <sop1<0x1d, 0x1a>, "s_bitset1_b32", []>;
+defm S_BITSET1_B64 : SOP1_64 <sop1<0x1e, 0x1b>, "s_bitset1_b64", []>;
+defm S_GETPC_B64 : SOP1_64_0 <sop1<0x1f, 0x1c>, "s_getpc_b64", []>;
+defm S_SETPC_B64 : SOP1_64 <sop1<0x20, 0x1d>, "s_setpc_b64", []>;
+defm S_SWAPPC_B64 : SOP1_64 <sop1<0x21, 0x1e>, "s_swappc_b64", []>;
+defm S_RFE_B64 : SOP1_64 <sop1<0x22, 0x1f>, "s_rfe_b64", []>;
+
+let hasSideEffects = 1, Uses = [EXEC], Defs = [EXEC, SCC] in {
+
+defm S_AND_SAVEEXEC_B64 : SOP1_64 <sop1<0x24, 0x20>, "s_and_saveexec_b64", []>;
+defm S_OR_SAVEEXEC_B64 : SOP1_64 <sop1<0x25, 0x21>, "s_or_saveexec_b64", []>;
+defm S_XOR_SAVEEXEC_B64 : SOP1_64 <sop1<0x26, 0x22>, "s_xor_saveexec_b64", []>;
+defm S_ANDN2_SAVEEXEC_B64 : SOP1_64 <sop1<0x27, 0x23>, "s_andn2_saveexec_b64", []>;
+defm S_ORN2_SAVEEXEC_B64 : SOP1_64 <sop1<0x28, 0x24>, "s_orn2_saveexec_b64", []>;
+defm S_NAND_SAVEEXEC_B64 : SOP1_64 <sop1<0x29, 0x25>, "s_nand_saveexec_b64", []>;
+defm S_NOR_SAVEEXEC_B64 : SOP1_64 <sop1<0x2a, 0x26>, "s_nor_saveexec_b64", []>;
+defm S_XNOR_SAVEEXEC_B64 : SOP1_64 <sop1<0x2b, 0x27>, "s_xnor_saveexec_b64", []>;
+
+} // End hasSideEffects = 1, Uses = [EXEC], Defs = [EXEC, SCC]
+
+defm S_QUADMASK_B32 : SOP1_32 <sop1<0x2c, 0x28>, "s_quadmask_b32", []>;
+defm S_QUADMASK_B64 : SOP1_64 <sop1<0x2d, 0x29>, "s_quadmask_b64", []>;
+defm S_MOVRELS_B32 : SOP1_32 <sop1<0x2e, 0x2a>, "s_movrels_b32", []>;
+defm S_MOVRELS_B64 : SOP1_64 <sop1<0x2f, 0x2b>, "s_movrels_b64", []>;
+defm S_MOVRELD_B32 : SOP1_32 <sop1<0x30, 0x2c>, "s_movreld_b32", []>;
+defm S_MOVRELD_B64 : SOP1_64 <sop1<0x31, 0x2d>, "s_movreld_b64", []>;
+defm S_CBRANCH_JOIN : SOP1_1 <sop1<0x32, 0x2e>, "s_cbranch_join", []>;
+defm S_MOV_REGRD_B32 : SOP1_32 <sop1<0x33, 0x2f>, "s_mov_regrd_b32", []>;
+let Defs = [SCC] in {
+  defm S_ABS_I32 : SOP1_32 <sop1<0x34, 0x30>, "s_abs_i32", []>;
+} // End Defs = [SCC]
+defm S_MOV_FED_B32 : SOP1_32 <sop1<0x35, 0x31>, "s_mov_fed_b32", []>;
+
+//===----------------------------------------------------------------------===//
+// SOP2 Instructions
+//===----------------------------------------------------------------------===//
+
+let Defs = [SCC] in { // Carry out goes to SCC
+let isCommutable = 1 in {
+defm S_ADD_U32 : SOP2_32 <sop2<0x00>, "s_add_u32", []>;
+defm S_ADD_I32 : SOP2_32 <sop2<0x02>, "s_add_i32",
+  [(set i32:$dst, (add SSrc_32:$src0, SSrc_32:$src1))]
+>;
+} // End isCommutable = 1
+
+defm S_SUB_U32 : SOP2_32 <sop2<0x01>, "s_sub_u32", []>;
+defm S_SUB_I32 : SOP2_32 <sop2<0x03>, "s_sub_i32",
+  [(set i32:$dst, (sub SSrc_32:$src0, SSrc_32:$src1))]
+>;
+
+let Uses = [SCC] in { // Carry in comes from SCC
+let isCommutable = 1 in {
+defm S_ADDC_U32 : SOP2_32 <sop2<0x04>, "s_addc_u32",
+  [(set i32:$dst, (adde (i32 SSrc_32:$src0), (i32 SSrc_32:$src1)))]>;
+} // End isCommutable = 1
+
+defm S_SUBB_U32 : SOP2_32 <sop2<0x05>, "s_subb_u32",
+  [(set i32:$dst, (sube (i32 SSrc_32:$src0), (i32 SSrc_32:$src1)))]>;
+} // End Uses = [SCC]
+
+defm S_MIN_I32 : SOP2_32 <sop2<0x06>, "s_min_i32",
+  [(set i32:$dst, (smin i32:$src0, i32:$src1))]
+>;
+defm S_MIN_U32 : SOP2_32 <sop2<0x07>, "s_min_u32",
+  [(set i32:$dst, (umin i32:$src0, i32:$src1))]
+>;
+defm S_MAX_I32 : SOP2_32 <sop2<0x08>, "s_max_i32",
+  [(set i32:$dst, (smax i32:$src0, i32:$src1))]
+>;
+defm S_MAX_U32 : SOP2_32 <sop2<0x09>, "s_max_u32",
+  [(set i32:$dst, (umax i32:$src0, i32:$src1))]
+>;
+} // End Defs = [SCC]
+
+
+let Uses = [SCC] in {
+  defm S_CSELECT_B32 : SOP2_32 <sop2<0x0a>, "s_cselect_b32", []>;
+  defm S_CSELECT_B64 : SOP2_64 <sop2<0x0b>, "s_cselect_b64", []>;
+} // End Uses = [SCC]
+
+let Defs = [SCC] in {
+defm S_AND_B32 : SOP2_32 <sop2<0x0e, 0x0c>, "s_and_b32",
+  [(set i32:$dst, (and i32:$src0, i32:$src1))]
+>;
+
+defm S_AND_B64 : SOP2_64 <sop2<0x0f, 0x0d>, "s_and_b64",
+  [(set i64:$dst, (and i64:$src0, i64:$src1))]
+>;
+
+defm S_OR_B32 : SOP2_32 <sop2<0x10, 0x0e>, "s_or_b32",
+  [(set i32:$dst, (or i32:$src0, i32:$src1))]
+>;
+
+defm S_OR_B64 : SOP2_64 <sop2<0x11, 0x0f>, "s_or_b64",
+  [(set i64:$dst, (or i64:$src0, i64:$src1))]
+>;
+
+defm S_XOR_B32 : SOP2_32 <sop2<0x12, 0x10>, "s_xor_b32",
+  [(set i32:$dst, (xor i32:$src0, i32:$src1))]
+>;
+
+defm S_XOR_B64 : SOP2_64 <sop2<0x13, 0x11>, "s_xor_b64",
+  [(set i64:$dst, (xor i64:$src0, i64:$src1))]
+>;
+defm S_ANDN2_B32 : SOP2_32 <sop2<0x14, 0x12>, "s_andn2_b32", []>;
+defm S_ANDN2_B64 : SOP2_64 <sop2<0x15, 0x13>, "s_andn2_b64", []>;
+defm S_ORN2_B32 : SOP2_32 <sop2<0x16, 0x14>, "s_orn2_b32", []>;
+defm S_ORN2_B64 : SOP2_64 <sop2<0x17, 0x15>, "s_orn2_b64", []>;
+defm S_NAND_B32 : SOP2_32 <sop2<0x18, 0x16>, "s_nand_b32", []>;
+defm S_NAND_B64 : SOP2_64 <sop2<0x19, 0x17>, "s_nand_b64", []>;
+defm S_NOR_B32 : SOP2_32 <sop2<0x1a, 0x18>, "s_nor_b32", []>;
+defm S_NOR_B64 : SOP2_64 <sop2<0x1b, 0x19>, "s_nor_b64", []>;
+defm S_XNOR_B32 : SOP2_32 <sop2<0x1c, 0x1a>, "s_xnor_b32", []>;
+defm S_XNOR_B64 : SOP2_64 <sop2<0x1d, 0x1b>, "s_xnor_b64", []>;
+} // End Defs = [SCC]
+
+// Use added complexity so these patterns are preferred to the VALU patterns.
+let AddedComplexity = 1 in {
+let Defs = [SCC] in {
+
+defm S_LSHL_B32 : SOP2_32 <sop2<0x1e, 0x1c>, "s_lshl_b32",
+  [(set i32:$dst, (shl i32:$src0, i32:$src1))]
+>;
+defm S_LSHL_B64 : SOP2_64_32 <sop2<0x1f, 0x1d>, "s_lshl_b64",
+  [(set i64:$dst, (shl i64:$src0, i32:$src1))]
+>;
+defm S_LSHR_B32 : SOP2_32 <sop2<0x20, 0x1e>, "s_lshr_b32",
+  [(set i32:$dst, (srl i32:$src0, i32:$src1))]
+>;
+defm S_LSHR_B64 : SOP2_64_32 <sop2<0x21, 0x1f>, "s_lshr_b64",
+  [(set i64:$dst, (srl i64:$src0, i32:$src1))]
+>;
+defm S_ASHR_I32 : SOP2_32 <sop2<0x22, 0x20>, "s_ashr_i32",
+  [(set i32:$dst, (sra i32:$src0, i32:$src1))]
+>;
+defm S_ASHR_I64 : SOP2_64_32 <sop2<0x23, 0x21>, "s_ashr_i64",
+  [(set i64:$dst, (sra i64:$src0, i32:$src1))]
+>;
+} // End Defs = [SCC]
+
+defm S_BFM_B32 : SOP2_32 <sop2<0x24, 0x22>, "s_bfm_b32",
+  [(set i32:$dst, (AMDGPUbfm i32:$src0, i32:$src1))]>;
+defm S_BFM_B64 : SOP2_64 <sop2<0x25, 0x23>, "s_bfm_b64", []>;
+defm S_MUL_I32 : SOP2_32 <sop2<0x26, 0x24>, "s_mul_i32",
+  [(set i32:$dst, (mul i32:$src0, i32:$src1))]
+>;
+
+} // End AddedComplexity = 1
+
+let Defs = [SCC] in {
+defm S_BFE_U32 : SOP2_32 <sop2<0x27, 0x25>, "s_bfe_u32", []>;
+defm S_BFE_I32 : SOP2_32 <sop2<0x28, 0x26>, "s_bfe_i32", []>;
+defm S_BFE_U64 : SOP2_64 <sop2<0x29, 0x27>, "s_bfe_u64", []>;
+defm S_BFE_I64 : SOP2_64_32 <sop2<0x2a, 0x28>, "s_bfe_i64", []>;
+} // End Defs = [SCC]
+
+let sdst = 0 in {
+defm S_CBRANCH_G_FORK : SOP2_m <
+  sop2<0x2b, 0x29>, "s_cbranch_g_fork", (outs),
+  (ins SReg_64:$src0, SReg_64:$src1), "s_cbranch_g_fork $src0, $src1", []
+>;
+}
+
+let Defs = [SCC] in {
+defm S_ABSDIFF_I32 : SOP2_32 <sop2<0x2c, 0x2a>, "s_absdiff_i32", []>;
+} // End Defs = [SCC]
+
+//===----------------------------------------------------------------------===//
+// SOPC Instructions
+//===----------------------------------------------------------------------===//
+
+def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "s_cmp_eq_i32">;
+def S_CMP_LG_I32 : SOPC_32 <0x00000001, "s_cmp_lg_i32">;
+def S_CMP_GT_I32 : SOPC_32 <0x00000002, "s_cmp_gt_i32">;
+def S_CMP_GE_I32 : SOPC_32 <0x00000003, "s_cmp_ge_i32">;
+def S_CMP_LT_I32 : SOPC_32 <0x00000004, "s_cmp_lt_i32">;
+def S_CMP_LE_I32 : SOPC_32 <0x00000005, "s_cmp_le_i32">;
+def S_CMP_EQ_U32 : SOPC_32 <0x00000006, "s_cmp_eq_u32">;
+def S_CMP_LG_U32 : SOPC_32 <0x00000007, "s_cmp_lg_u32">;
+def S_CMP_GT_U32 : SOPC_32 <0x00000008, "s_cmp_gt_u32">;
+def S_CMP_GE_U32 : SOPC_32 <0x00000009, "s_cmp_ge_u32">;
+def S_CMP_LT_U32 : SOPC_32 <0x0000000a, "s_cmp_lt_u32">;
+def S_CMP_LE_U32 : SOPC_32 <0x0000000b, "s_cmp_le_u32">;
+////def S_BITCMP0_B32 : SOPC_BITCMP0 <0x0000000c, "s_bitcmp0_b32", []>;
+////def S_BITCMP1_B32 : SOPC_BITCMP1 <0x0000000d, "s_bitcmp1_b32", []>;
+////def S_BITCMP0_B64 : SOPC_BITCMP0 <0x0000000e, "s_bitcmp0_b64", []>;
+////def S_BITCMP1_B64 : SOPC_BITCMP1 <0x0000000f, "s_bitcmp1_b64", []>;
+//def S_SETVSKIP : SOPC_ <0x00000010, "s_setvskip", []>;
+
+//===----------------------------------------------------------------------===//
+// SOPK Instructions
+//===----------------------------------------------------------------------===//
+
+let isReMaterializable = 1 in {
+defm S_MOVK_I32 : SOPK_32 <sopk<0x00>, "s_movk_i32", []>;
+} // End isReMaterializable = 1
+let Uses = [SCC] in {
+  defm S_CMOVK_I32 : SOPK_32 <sopk<0x02, 0x01>, "s_cmovk_i32", []>;
+}
+
+let isCompare = 1 in {
+
+/*
+This instruction is disabled for now until we can figure out how to teach
+the instruction selector to correctly use the  S_CMP* vs V_CMP*
+instructions.
+
+When this instruction is enabled the code generator sometimes produces this
+invalid sequence:
+
+SCC = S_CMPK_EQ_I32 SGPR0, imm
+VCC = COPY SCC
+VGPR0 = V_CNDMASK VCC, VGPR0, VGPR1
+
+defm S_CMPK_EQ_I32 : SOPK_SCC <sopk<0x03, 0x02>, "s_cmpk_eq_i32",
+  [(set i1:$dst, (setcc i32:$src0, imm:$src1, SETEQ))]
+>;
+*/
+
+defm S_CMPK_EQ_I32 : SOPK_SCC <sopk<0x03, 0x02>, "s_cmpk_eq_i32", []>;
+defm S_CMPK_LG_I32 : SOPK_SCC <sopk<0x04, 0x03>, "s_cmpk_lg_i32", []>;
+defm S_CMPK_GT_I32 : SOPK_SCC <sopk<0x05, 0x04>, "s_cmpk_gt_i32", []>;
+defm S_CMPK_GE_I32 : SOPK_SCC <sopk<0x06, 0x05>, "s_cmpk_ge_i32", []>;
+defm S_CMPK_LT_I32 : SOPK_SCC <sopk<0x07, 0x06>, "s_cmpk_lt_i32", []>;
+defm S_CMPK_LE_I32 : SOPK_SCC <sopk<0x08, 0x07>, "s_cmpk_le_i32", []>;
+defm S_CMPK_EQ_U32 : SOPK_SCC <sopk<0x09, 0x08>, "s_cmpk_eq_u32", []>;
+defm S_CMPK_LG_U32 : SOPK_SCC <sopk<0x0a, 0x09>, "s_cmpk_lg_u32", []>;
+defm S_CMPK_GT_U32 : SOPK_SCC <sopk<0x0b, 0x0a>, "s_cmpk_gt_u32", []>;
+defm S_CMPK_GE_U32 : SOPK_SCC <sopk<0x0c, 0x0b>, "s_cmpk_ge_u32", []>;
+defm S_CMPK_LT_U32 : SOPK_SCC <sopk<0x0d, 0x0c>, "s_cmpk_lt_u32", []>;
+defm S_CMPK_LE_U32 : SOPK_SCC <sopk<0x0e, 0x0d>, "s_cmpk_le_u32", []>;
+} // End isCompare = 1
+
+let Defs = [SCC], isCommutable = 1, DisableEncoding = "$src0",
+    Constraints = "$sdst = $src0" in {
+  defm S_ADDK_I32 : SOPK_32TIE <sopk<0x0f, 0x0e>, "s_addk_i32", []>;
+  defm S_MULK_I32 : SOPK_32TIE <sopk<0x10, 0x0f>, "s_mulk_i32", []>;
+}
+
+defm S_CBRANCH_I_FORK : SOPK_m <
+  sopk<0x11, 0x10>, "s_cbranch_i_fork", (outs),
+  (ins SReg_64:$sdst, u16imm:$simm16), " $sdst, $simm16"
+>;
+defm S_GETREG_B32 : SOPK_32 <sopk<0x12, 0x11>, "s_getreg_b32", []>;
+defm S_SETREG_B32 : SOPK_m <
+  sopk<0x13, 0x12>, "s_setreg_b32", (outs),
+  (ins SReg_32:$sdst, u16imm:$simm16), " $sdst, $simm16"
+>;
+// FIXME: Not on SI?
+//defm S_GETREG_REGRD_B32 : SOPK_32 <sopk<0x14, 0x13>, "s_getreg_regrd_b32", []>;
+defm S_SETREG_IMM32_B32 : SOPK_IMM32 <
+  sopk<0x15, 0x14>, "s_setreg_imm32_b32", (outs),
+  (ins i32imm:$imm, u16imm:$simm16), " $imm, $simm16"
+>;
+
+//===----------------------------------------------------------------------===//
+// SOPP Instructions
+//===----------------------------------------------------------------------===//
+
+def S_NOP : SOPP <0x00000000, (ins i16imm:$simm16), "s_nop $simm16">;
+
+let isTerminator = 1 in {
+
+def S_ENDPGM : SOPP <0x00000001, (ins), "s_endpgm",
+  [(IL_retflag)]> {
+  let simm16 = 0;
+  let isBarrier = 1;
+  let hasCtrlDep = 1;
+}
+
+let isBranch = 1 in {
+def S_BRANCH : SOPP <
+  0x00000002, (ins sopp_brtarget:$simm16), "s_branch $simm16",
+  [(br bb:$simm16)]> {
+  let isBarrier = 1;
+}
+
+let DisableEncoding = "$scc" in {
+def S_CBRANCH_SCC0 : SOPP <
+  0x00000004, (ins sopp_brtarget:$simm16, SCCReg:$scc),
+  "s_cbranch_scc0 $simm16"
+>;
+def S_CBRANCH_SCC1 : SOPP <
+  0x00000005, (ins sopp_brtarget:$simm16, SCCReg:$scc),
+  "s_cbranch_scc1 $simm16"
+>;
+} // End DisableEncoding = "$scc"
+
+def S_CBRANCH_VCCZ : SOPP <
+  0x00000006, (ins sopp_brtarget:$simm16, VCCReg:$vcc),
+  "s_cbranch_vccz $simm16"
+>;
+def S_CBRANCH_VCCNZ : SOPP <
+  0x00000007, (ins sopp_brtarget:$simm16, VCCReg:$vcc),
+  "s_cbranch_vccnz $simm16"
+>;
+
+let DisableEncoding = "$exec" in {
+def S_CBRANCH_EXECZ : SOPP <
+  0x00000008, (ins sopp_brtarget:$simm16, EXECReg:$exec),
+  "s_cbranch_execz $simm16"
+>;
+def S_CBRANCH_EXECNZ : SOPP <
+  0x00000009, (ins sopp_brtarget:$simm16, EXECReg:$exec),
+  "s_cbranch_execnz $simm16"
+>;
+} // End DisableEncoding = "$exec"
+
+
+} // End isBranch = 1
+} // End isTerminator = 1
+
+let hasSideEffects = 1 in {
+def S_BARRIER : SOPP <0x0000000a, (ins), "s_barrier",
+  [(int_AMDGPU_barrier_local)]
+> {
+  let simm16 = 0;
+  let isBarrier = 1;
+  let hasCtrlDep = 1;
+  let mayLoad = 1;
+  let mayStore = 1;
+}
+
+def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16">;
+def S_SETHALT : SOPP <0x0000000d, (ins i16imm:$simm16), "s_sethalt $simm16">;
+def S_SLEEP : SOPP <0x0000000e, (ins i16imm:$simm16), "s_sleep $simm16">;
+def S_SETPRIO : SOPP <0x0000000f, (ins i16imm:$sim16), "s_setprio $sim16">;
+
+let Uses = [EXEC, M0] in {
+  def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16",
+      [(AMDGPUsendmsg (i32 imm:$simm16))]
+  >;
+} // End Uses = [EXEC, M0]
+
+def S_SENDMSGHALT : SOPP <0x00000011, (ins i16imm:$simm16), "s_sendmsghalt $simm16">;
+def S_TRAP : SOPP <0x00000012, (ins i16imm:$simm16), "s_trap $simm16">;
+def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> {
+	let simm16 = 0;
+}
+def S_INCPERFLEVEL : SOPP <0x00000014, (ins i16imm:$simm16), "s_incperflevel $simm16">;
+def S_DECPERFLEVEL : SOPP <0x00000015, (ins i16imm:$simm16), "s_decperflevel $simm16">;
+def S_TTRACEDATA : SOPP <0x00000016, (ins), "s_ttracedata"> {
+  let simm16 = 0;
+}
+} // End hasSideEffects
+
+//===----------------------------------------------------------------------===//
+// VOPC Instructions
+//===----------------------------------------------------------------------===//
+
+let isCompare = 1, isCommutable = 1 in {
+
+defm V_CMP_F_F32 : VOPC_F32 <vopc<0x0, 0x40>, "v_cmp_f_f32">;
+defm V_CMP_LT_F32 : VOPC_F32 <vopc<0x1, 0x41>, "v_cmp_lt_f32", COND_OLT, "v_cmp_gt_f32">;
+defm V_CMP_EQ_F32 : VOPC_F32 <vopc<0x2, 0x42>, "v_cmp_eq_f32", COND_OEQ>;
+defm V_CMP_LE_F32 : VOPC_F32 <vopc<0x3, 0x43>, "v_cmp_le_f32", COND_OLE, "v_cmp_ge_f32">;
+defm V_CMP_GT_F32 : VOPC_F32 <vopc<0x4, 0x44>, "v_cmp_gt_f32", COND_OGT>;
+defm V_CMP_LG_F32 : VOPC_F32 <vopc<0x5, 0x45>, "v_cmp_lg_f32", COND_ONE>;
+defm V_CMP_GE_F32 : VOPC_F32 <vopc<0x6, 0x46>, "v_cmp_ge_f32", COND_OGE>;
+defm V_CMP_O_F32 : VOPC_F32 <vopc<0x7, 0x47>, "v_cmp_o_f32", COND_O>;
+defm V_CMP_U_F32 : VOPC_F32 <vopc<0x8, 0x48>, "v_cmp_u_f32", COND_UO>;
+defm V_CMP_NGE_F32 : VOPC_F32 <vopc<0x9, 0x49>, "v_cmp_nge_f32",  COND_ULT, "v_cmp_nle_f32">;
+defm V_CMP_NLG_F32 : VOPC_F32 <vopc<0xa, 0x4a>, "v_cmp_nlg_f32", COND_UEQ>;
+defm V_CMP_NGT_F32 : VOPC_F32 <vopc<0xb, 0x4b>, "v_cmp_ngt_f32", COND_ULE, "v_cmp_nlt_f32">;
+defm V_CMP_NLE_F32 : VOPC_F32 <vopc<0xc, 0x4c>, "v_cmp_nle_f32", COND_UGT>;
+defm V_CMP_NEQ_F32 : VOPC_F32 <vopc<0xd, 0x4d>, "v_cmp_neq_f32", COND_UNE>;
+defm V_CMP_NLT_F32 : VOPC_F32 <vopc<0xe, 0x4e>, "v_cmp_nlt_f32", COND_UGE>;
+defm V_CMP_TRU_F32 : VOPC_F32 <vopc<0xf, 0x4f>, "v_cmp_tru_f32">;
+
+
+defm V_CMPX_F_F32 : VOPCX_F32 <vopc<0x10, 0x50>, "v_cmpx_f_f32">;
+defm V_CMPX_LT_F32 : VOPCX_F32 <vopc<0x11, 0x51>, "v_cmpx_lt_f32", "v_cmpx_gt_f32">;
+defm V_CMPX_EQ_F32 : VOPCX_F32 <vopc<0x12, 0x52>, "v_cmpx_eq_f32">;
+defm V_CMPX_LE_F32 : VOPCX_F32 <vopc<0x13, 0x53>, "v_cmpx_le_f32", "v_cmpx_ge_f32">;
+defm V_CMPX_GT_F32 : VOPCX_F32 <vopc<0x14, 0x54>, "v_cmpx_gt_f32">;
+defm V_CMPX_LG_F32 : VOPCX_F32 <vopc<0x15, 0x55>, "v_cmpx_lg_f32">;
+defm V_CMPX_GE_F32 : VOPCX_F32 <vopc<0x16, 0x56>, "v_cmpx_ge_f32">;
+defm V_CMPX_O_F32 : VOPCX_F32 <vopc<0x17, 0x57>, "v_cmpx_o_f32">;
+defm V_CMPX_U_F32 : VOPCX_F32 <vopc<0x18, 0x58>, "v_cmpx_u_f32">;
+defm V_CMPX_NGE_F32 : VOPCX_F32 <vopc<0x19, 0x59>, "v_cmpx_nge_f32">;
+defm V_CMPX_NLG_F32 : VOPCX_F32 <vopc<0x1a, 0x5a>, "v_cmpx_nlg_f32">;
+defm V_CMPX_NGT_F32 : VOPCX_F32 <vopc<0x1b, 0x5b>, "v_cmpx_ngt_f32">;
+defm V_CMPX_NLE_F32 : VOPCX_F32 <vopc<0x1c, 0x5c>, "v_cmpx_nle_f32">;
+defm V_CMPX_NEQ_F32 : VOPCX_F32 <vopc<0x1d, 0x5d>, "v_cmpx_neq_f32">;
+defm V_CMPX_NLT_F32 : VOPCX_F32 <vopc<0x1e, 0x5e>, "v_cmpx_nlt_f32">;
+defm V_CMPX_TRU_F32 : VOPCX_F32 <vopc<0x1f, 0x5f>, "v_cmpx_tru_f32">;
+
+
+defm V_CMP_F_F64 : VOPC_F64 <vopc<0x20, 0x60>, "v_cmp_f_f64">;
+defm V_CMP_LT_F64 : VOPC_F64 <vopc<0x21, 0x61>, "v_cmp_lt_f64", COND_OLT, "v_cmp_gt_f64">;
+defm V_CMP_EQ_F64 : VOPC_F64 <vopc<0x22, 0x62>, "v_cmp_eq_f64", COND_OEQ>;
+defm V_CMP_LE_F64 : VOPC_F64 <vopc<0x23, 0x63>, "v_cmp_le_f64", COND_OLE, "v_cmp_ge_f64">;
+defm V_CMP_GT_F64 : VOPC_F64 <vopc<0x24, 0x64>, "v_cmp_gt_f64", COND_OGT>;
+defm V_CMP_LG_F64 : VOPC_F64 <vopc<0x25, 0x65>, "v_cmp_lg_f64", COND_ONE>;
+defm V_CMP_GE_F64 : VOPC_F64 <vopc<0x26, 0x66>, "v_cmp_ge_f64", COND_OGE>;
+defm V_CMP_O_F64 : VOPC_F64 <vopc<0x27, 0x67>, "v_cmp_o_f64", COND_O>;
+defm V_CMP_U_F64 : VOPC_F64 <vopc<0x28, 0x68>, "v_cmp_u_f64", COND_UO>;
+defm V_CMP_NGE_F64 : VOPC_F64 <vopc<0x29, 0x69>, "v_cmp_nge_f64", COND_ULT, "v_cmp_nle_f64">;
+defm V_CMP_NLG_F64 : VOPC_F64 <vopc<0x2a, 0x6a>, "v_cmp_nlg_f64", COND_UEQ>;
+defm V_CMP_NGT_F64 : VOPC_F64 <vopc<0x2b, 0x6b>, "v_cmp_ngt_f64", COND_ULE, "v_cmp_nlt_f64">;
+defm V_CMP_NLE_F64 : VOPC_F64 <vopc<0x2c, 0x6c>, "v_cmp_nle_f64", COND_UGT>;
+defm V_CMP_NEQ_F64 : VOPC_F64 <vopc<0x2d, 0x6d>, "v_cmp_neq_f64", COND_UNE>;
+defm V_CMP_NLT_F64 : VOPC_F64 <vopc<0x2e, 0x6e>, "v_cmp_nlt_f64", COND_UGE>;
+defm V_CMP_TRU_F64 : VOPC_F64 <vopc<0x2f, 0x6f>, "v_cmp_tru_f64">;
+
+
+defm V_CMPX_F_F64 : VOPCX_F64 <vopc<0x30, 0x70>, "v_cmpx_f_f64">;
+defm V_CMPX_LT_F64 : VOPCX_F64 <vopc<0x31, 0x71>, "v_cmpx_lt_f64", "v_cmpx_gt_f64">;
+defm V_CMPX_EQ_F64 : VOPCX_F64 <vopc<0x32, 0x72>, "v_cmpx_eq_f64">;
+defm V_CMPX_LE_F64 : VOPCX_F64 <vopc<0x33, 0x73>, "v_cmpx_le_f64", "v_cmpx_ge_f64">;
+defm V_CMPX_GT_F64 : VOPCX_F64 <vopc<0x34, 0x74>, "v_cmpx_gt_f64">;
+defm V_CMPX_LG_F64 : VOPCX_F64 <vopc<0x35, 0x75>, "v_cmpx_lg_f64">;
+defm V_CMPX_GE_F64 : VOPCX_F64 <vopc<0x36, 0x76>, "v_cmpx_ge_f64">;
+defm V_CMPX_O_F64 : VOPCX_F64 <vopc<0x37, 0x77>, "v_cmpx_o_f64">;
+defm V_CMPX_U_F64 : VOPCX_F64 <vopc<0x38, 0x78>, "v_cmpx_u_f64">;
+defm V_CMPX_NGE_F64 : VOPCX_F64 <vopc<0x39, 0x79>, "v_cmpx_nge_f64", "v_cmpx_nle_f64">;
+defm V_CMPX_NLG_F64 : VOPCX_F64 <vopc<0x3a, 0x7a>, "v_cmpx_nlg_f64">;
+defm V_CMPX_NGT_F64 : VOPCX_F64 <vopc<0x3b, 0x7b>, "v_cmpx_ngt_f64", "v_cmpx_nlt_f64">;
+defm V_CMPX_NLE_F64 : VOPCX_F64 <vopc<0x3c, 0x7c>, "v_cmpx_nle_f64">;
+defm V_CMPX_NEQ_F64 : VOPCX_F64 <vopc<0x3d, 0x7d>, "v_cmpx_neq_f64">;
+defm V_CMPX_NLT_F64 : VOPCX_F64 <vopc<0x3e, 0x7e>, "v_cmpx_nlt_f64">;
+defm V_CMPX_TRU_F64 : VOPCX_F64 <vopc<0x3f, 0x7f>, "v_cmpx_tru_f64">;
+
+
+let SubtargetPredicate = isSICI in {
+
+defm V_CMPS_F_F32 : VOPC_F32 <vopc<0x40>, "v_cmps_f_f32">;
+defm V_CMPS_LT_F32 : VOPC_F32 <vopc<0x41>, "v_cmps_lt_f32", COND_NULL, "v_cmps_gt_f32">;
+defm V_CMPS_EQ_F32 : VOPC_F32 <vopc<0x42>, "v_cmps_eq_f32">;
+defm V_CMPS_LE_F32 : VOPC_F32 <vopc<0x43>, "v_cmps_le_f32", COND_NULL, "v_cmps_ge_f32">;
+defm V_CMPS_GT_F32 : VOPC_F32 <vopc<0x44>, "v_cmps_gt_f32">;
+defm V_CMPS_LG_F32 : VOPC_F32 <vopc<0x45>, "v_cmps_lg_f32">;
+defm V_CMPS_GE_F32 : VOPC_F32 <vopc<0x46>, "v_cmps_ge_f32">;
+defm V_CMPS_O_F32 : VOPC_F32 <vopc<0x47>, "v_cmps_o_f32">;
+defm V_CMPS_U_F32 : VOPC_F32 <vopc<0x48>, "v_cmps_u_f32">;
+defm V_CMPS_NGE_F32 : VOPC_F32 <vopc<0x49>, "v_cmps_nge_f32", COND_NULL, "v_cmps_nle_f32">;
+defm V_CMPS_NLG_F32 : VOPC_F32 <vopc<0x4a>, "v_cmps_nlg_f32">;
+defm V_CMPS_NGT_F32 : VOPC_F32 <vopc<0x4b>, "v_cmps_ngt_f32", COND_NULL, "v_cmps_nlt_f32">;
+defm V_CMPS_NLE_F32 : VOPC_F32 <vopc<0x4c>, "v_cmps_nle_f32">;
+defm V_CMPS_NEQ_F32 : VOPC_F32 <vopc<0x4d>, "v_cmps_neq_f32">;
+defm V_CMPS_NLT_F32 : VOPC_F32 <vopc<0x4e>, "v_cmps_nlt_f32">;
+defm V_CMPS_TRU_F32 : VOPC_F32 <vopc<0x4f>, "v_cmps_tru_f32">;
+
+
+defm V_CMPSX_F_F32 : VOPCX_F32 <vopc<0x50>, "v_cmpsx_f_f32">;
+defm V_CMPSX_LT_F32 : VOPCX_F32 <vopc<0x51>, "v_cmpsx_lt_f32", "v_cmpsx_gt_f32">;
+defm V_CMPSX_EQ_F32 : VOPCX_F32 <vopc<0x52>, "v_cmpsx_eq_f32">;
+defm V_CMPSX_LE_F32 : VOPCX_F32 <vopc<0x53>, "v_cmpsx_le_f32", "v_cmpsx_ge_f32">;
+defm V_CMPSX_GT_F32 : VOPCX_F32 <vopc<0x54>, "v_cmpsx_gt_f32">;
+defm V_CMPSX_LG_F32 : VOPCX_F32 <vopc<0x55>, "v_cmpsx_lg_f32">;
+defm V_CMPSX_GE_F32 : VOPCX_F32 <vopc<0x56>, "v_cmpsx_ge_f32">;
+defm V_CMPSX_O_F32 : VOPCX_F32 <vopc<0x57>, "v_cmpsx_o_f32">;
+defm V_CMPSX_U_F32 : VOPCX_F32 <vopc<0x58>, "v_cmpsx_u_f32">;
+defm V_CMPSX_NGE_F32 : VOPCX_F32 <vopc<0x59>, "v_cmpsx_nge_f32", "v_cmpsx_nle_f32">;
+defm V_CMPSX_NLG_F32 : VOPCX_F32 <vopc<0x5a>, "v_cmpsx_nlg_f32">;
+defm V_CMPSX_NGT_F32 : VOPCX_F32 <vopc<0x5b>, "v_cmpsx_ngt_f32", "v_cmpsx_nlt_f32">;
+defm V_CMPSX_NLE_F32 : VOPCX_F32 <vopc<0x5c>, "v_cmpsx_nle_f32">;
+defm V_CMPSX_NEQ_F32 : VOPCX_F32 <vopc<0x5d>, "v_cmpsx_neq_f32">;
+defm V_CMPSX_NLT_F32 : VOPCX_F32 <vopc<0x5e>, "v_cmpsx_nlt_f32">;
+defm V_CMPSX_TRU_F32 : VOPCX_F32 <vopc<0x5f>, "v_cmpsx_tru_f32">;
+
+
+defm V_CMPS_F_F64 : VOPC_F64 <vopc<0x60>, "v_cmps_f_f64">;
+defm V_CMPS_LT_F64 : VOPC_F64 <vopc<0x61>, "v_cmps_lt_f64", COND_NULL, "v_cmps_gt_f64">;
+defm V_CMPS_EQ_F64 : VOPC_F64 <vopc<0x62>, "v_cmps_eq_f64">;
+defm V_CMPS_LE_F64 : VOPC_F64 <vopc<0x63>, "v_cmps_le_f64", COND_NULL, "v_cmps_ge_f64">;
+defm V_CMPS_GT_F64 : VOPC_F64 <vopc<0x64>, "v_cmps_gt_f64">;
+defm V_CMPS_LG_F64 : VOPC_F64 <vopc<0x65>, "v_cmps_lg_f64">;
+defm V_CMPS_GE_F64 : VOPC_F64 <vopc<0x66>, "v_cmps_ge_f64">;
+defm V_CMPS_O_F64 : VOPC_F64 <vopc<0x67>, "v_cmps_o_f64">;
+defm V_CMPS_U_F64 : VOPC_F64 <vopc<0x68>, "v_cmps_u_f64">;
+defm V_CMPS_NGE_F64 : VOPC_F64 <vopc<0x69>, "v_cmps_nge_f64", COND_NULL, "v_cmps_nle_f64">;
+defm V_CMPS_NLG_F64 : VOPC_F64 <vopc<0x6a>, "v_cmps_nlg_f64">;
+defm V_CMPS_NGT_F64 : VOPC_F64 <vopc<0x6b>, "v_cmps_ngt_f64", COND_NULL, "v_cmps_nlt_f64">;
+defm V_CMPS_NLE_F64 : VOPC_F64 <vopc<0x6c>, "v_cmps_nle_f64">;
+defm V_CMPS_NEQ_F64 : VOPC_F64 <vopc<0x6d>, "v_cmps_neq_f64">;
+defm V_CMPS_NLT_F64 : VOPC_F64 <vopc<0x6e>, "v_cmps_nlt_f64">;
+defm V_CMPS_TRU_F64 : VOPC_F64 <vopc<0x6f>, "v_cmps_tru_f64">;
+
+
+defm V_CMPSX_F_F64 : VOPCX_F64 <vopc<0x70>, "v_cmpsx_f_f64">;
+defm V_CMPSX_LT_F64 : VOPCX_F64 <vopc<0x71>, "v_cmpsx_lt_f64", "v_cmpsx_gt_f64">;
+defm V_CMPSX_EQ_F64 : VOPCX_F64 <vopc<0x72>, "v_cmpsx_eq_f64">;
+defm V_CMPSX_LE_F64 : VOPCX_F64 <vopc<0x73>, "v_cmpsx_le_f64", "v_cmpsx_ge_f64">;
+defm V_CMPSX_GT_F64 : VOPCX_F64 <vopc<0x74>, "v_cmpsx_gt_f64">;
+defm V_CMPSX_LG_F64 : VOPCX_F64 <vopc<0x75>, "v_cmpsx_lg_f64">;
+defm V_CMPSX_GE_F64 : VOPCX_F64 <vopc<0x76>, "v_cmpsx_ge_f64">;
+defm V_CMPSX_O_F64 : VOPCX_F64 <vopc<0x77>, "v_cmpsx_o_f64">;
+defm V_CMPSX_U_F64 : VOPCX_F64 <vopc<0x78>, "v_cmpsx_u_f64">;
+defm V_CMPSX_NGE_F64 : VOPCX_F64 <vopc<0x79>, "v_cmpsx_nge_f64", "v_cmpsx_nle_f64">;
+defm V_CMPSX_NLG_F64 : VOPCX_F64 <vopc<0x7a>, "v_cmpsx_nlg_f64">;
+defm V_CMPSX_NGT_F64 : VOPCX_F64 <vopc<0x7b>, "v_cmpsx_ngt_f64", "v_cmpsx_nlt_f64">;
+defm V_CMPSX_NLE_F64 : VOPCX_F64 <vopc<0x7c>, "v_cmpsx_nle_f64">;
+defm V_CMPSX_NEQ_F64 : VOPCX_F64 <vopc<0x7d>, "v_cmpsx_neq_f64">;
+defm V_CMPSX_NLT_F64 : VOPCX_F64 <vopc<0x7e>, "v_cmpsx_nlt_f64">;
+defm V_CMPSX_TRU_F64 : VOPCX_F64 <vopc<0x7f>, "v_cmpsx_tru_f64">;
+
+} // End SubtargetPredicate = isSICI
+
+defm V_CMP_F_I32 : VOPC_I32 <vopc<0x80, 0xc0>, "v_cmp_f_i32">;
+defm V_CMP_LT_I32 : VOPC_I32 <vopc<0x81, 0xc1>, "v_cmp_lt_i32", COND_SLT, "v_cmp_gt_i32">;
+defm V_CMP_EQ_I32 : VOPC_I32 <vopc<0x82, 0xc2>, "v_cmp_eq_i32", COND_EQ>;
+defm V_CMP_LE_I32 : VOPC_I32 <vopc<0x83, 0xc3>, "v_cmp_le_i32", COND_SLE, "v_cmp_ge_i32">;
+defm V_CMP_GT_I32 : VOPC_I32 <vopc<0x84, 0xc4>, "v_cmp_gt_i32", COND_SGT>;
+defm V_CMP_NE_I32 : VOPC_I32 <vopc<0x85, 0xc5>, "v_cmp_ne_i32", COND_NE>;
+defm V_CMP_GE_I32 : VOPC_I32 <vopc<0x86, 0xc6>, "v_cmp_ge_i32", COND_SGE>;
+defm V_CMP_T_I32 : VOPC_I32 <vopc<0x87, 0xc7>, "v_cmp_t_i32">;
+
+
+defm V_CMPX_F_I32 : VOPCX_I32 <vopc<0x90, 0xd0>, "v_cmpx_f_i32">;
+defm V_CMPX_LT_I32 : VOPCX_I32 <vopc<0x91, 0xd1>, "v_cmpx_lt_i32", "v_cmpx_gt_i32">;
+defm V_CMPX_EQ_I32 : VOPCX_I32 <vopc<0x92, 0xd2>, "v_cmpx_eq_i32">;
+defm V_CMPX_LE_I32 : VOPCX_I32 <vopc<0x93, 0xd3>, "v_cmpx_le_i32", "v_cmpx_ge_i32">;
+defm V_CMPX_GT_I32 : VOPCX_I32 <vopc<0x94, 0xd4>, "v_cmpx_gt_i32">;
+defm V_CMPX_NE_I32 : VOPCX_I32 <vopc<0x95, 0xd5>, "v_cmpx_ne_i32">;
+defm V_CMPX_GE_I32 : VOPCX_I32 <vopc<0x96, 0xd6>, "v_cmpx_ge_i32">;
+defm V_CMPX_T_I32 : VOPCX_I32 <vopc<0x97, 0xd7>, "v_cmpx_t_i32">;
+
+
+defm V_CMP_F_I64 : VOPC_I64 <vopc<0xa0, 0xe0>, "v_cmp_f_i64">;
+defm V_CMP_LT_I64 : VOPC_I64 <vopc<0xa1, 0xe1>, "v_cmp_lt_i64", COND_SLT, "v_cmp_gt_i64">;
+defm V_CMP_EQ_I64 : VOPC_I64 <vopc<0xa2, 0xe2>, "v_cmp_eq_i64", COND_EQ>;
+defm V_CMP_LE_I64 : VOPC_I64 <vopc<0xa3, 0xe3>, "v_cmp_le_i64", COND_SLE, "v_cmp_ge_i64">;
+defm V_CMP_GT_I64 : VOPC_I64 <vopc<0xa4, 0xe4>, "v_cmp_gt_i64", COND_SGT>;
+defm V_CMP_NE_I64 : VOPC_I64 <vopc<0xa5, 0xe5>, "v_cmp_ne_i64", COND_NE>;
+defm V_CMP_GE_I64 : VOPC_I64 <vopc<0xa6, 0xe6>, "v_cmp_ge_i64", COND_SGE>;
+defm V_CMP_T_I64 : VOPC_I64 <vopc<0xa7, 0xe7>, "v_cmp_t_i64">;
+
+
+defm V_CMPX_F_I64 : VOPCX_I64 <vopc<0xb0, 0xf0>, "v_cmpx_f_i64">;
+defm V_CMPX_LT_I64 : VOPCX_I64 <vopc<0xb1, 0xf1>, "v_cmpx_lt_i64", "v_cmpx_gt_i64">;
+defm V_CMPX_EQ_I64 : VOPCX_I64 <vopc<0xb2, 0xf2>, "v_cmpx_eq_i64">;
+defm V_CMPX_LE_I64 : VOPCX_I64 <vopc<0xb3, 0xf3>, "v_cmpx_le_i64", "v_cmpx_ge_i64">;
+defm V_CMPX_GT_I64 : VOPCX_I64 <vopc<0xb4, 0xf4>, "v_cmpx_gt_i64">;
+defm V_CMPX_NE_I64 : VOPCX_I64 <vopc<0xb5, 0xf5>, "v_cmpx_ne_i64">;
+defm V_CMPX_GE_I64 : VOPCX_I64 <vopc<0xb6, 0xf6>, "v_cmpx_ge_i64">;
+defm V_CMPX_T_I64 : VOPCX_I64 <vopc<0xb7, 0xf7>, "v_cmpx_t_i64">;
+
+
+defm V_CMP_F_U32 : VOPC_I32 <vopc<0xc0, 0xc8>, "v_cmp_f_u32">;
+defm V_CMP_LT_U32 : VOPC_I32 <vopc<0xc1, 0xc9>, "v_cmp_lt_u32", COND_ULT, "v_cmp_gt_u32">;
+defm V_CMP_EQ_U32 : VOPC_I32 <vopc<0xc2, 0xca>, "v_cmp_eq_u32", COND_EQ>;
+defm V_CMP_LE_U32 : VOPC_I32 <vopc<0xc3, 0xcb>, "v_cmp_le_u32", COND_ULE, "v_cmp_ge_u32">;
+defm V_CMP_GT_U32 : VOPC_I32 <vopc<0xc4, 0xcc>, "v_cmp_gt_u32", COND_UGT>;
+defm V_CMP_NE_U32 : VOPC_I32 <vopc<0xc5, 0xcd>, "v_cmp_ne_u32", COND_NE>;
+defm V_CMP_GE_U32 : VOPC_I32 <vopc<0xc6, 0xce>, "v_cmp_ge_u32", COND_UGE>;
+defm V_CMP_T_U32 : VOPC_I32 <vopc<0xc7, 0xcf>, "v_cmp_t_u32">;
+
+
+defm V_CMPX_F_U32 : VOPCX_I32 <vopc<0xd0, 0xd8>, "v_cmpx_f_u32">;
+defm V_CMPX_LT_U32 : VOPCX_I32 <vopc<0xd1, 0xd9>, "v_cmpx_lt_u32", "v_cmpx_gt_u32">;
+defm V_CMPX_EQ_U32 : VOPCX_I32 <vopc<0xd2, 0xda>, "v_cmpx_eq_u32">;
+defm V_CMPX_LE_U32 : VOPCX_I32 <vopc<0xd3, 0xdb>, "v_cmpx_le_u32", "v_cmpx_le_u32">;
+defm V_CMPX_GT_U32 : VOPCX_I32 <vopc<0xd4, 0xdc>, "v_cmpx_gt_u32">;
+defm V_CMPX_NE_U32 : VOPCX_I32 <vopc<0xd5, 0xdd>, "v_cmpx_ne_u32">;
+defm V_CMPX_GE_U32 : VOPCX_I32 <vopc<0xd6, 0xde>, "v_cmpx_ge_u32">;
+defm V_CMPX_T_U32 : VOPCX_I32 <vopc<0xd7, 0xdf>, "v_cmpx_t_u32">;
+
+
+defm V_CMP_F_U64 : VOPC_I64 <vopc<0xe0, 0xe8>, "v_cmp_f_u64">;
+defm V_CMP_LT_U64 : VOPC_I64 <vopc<0xe1, 0xe9>, "v_cmp_lt_u64", COND_ULT, "v_cmp_gt_u64">;
+defm V_CMP_EQ_U64 : VOPC_I64 <vopc<0xe2, 0xea>, "v_cmp_eq_u64", COND_EQ>;
+defm V_CMP_LE_U64 : VOPC_I64 <vopc<0xe3, 0xeb>, "v_cmp_le_u64", COND_ULE, "v_cmp_ge_u64">;
+defm V_CMP_GT_U64 : VOPC_I64 <vopc<0xe4, 0xec>, "v_cmp_gt_u64", COND_UGT>;
+defm V_CMP_NE_U64 : VOPC_I64 <vopc<0xe5, 0xed>, "v_cmp_ne_u64", COND_NE>;
+defm V_CMP_GE_U64 : VOPC_I64 <vopc<0xe6, 0xee>, "v_cmp_ge_u64", COND_UGE>;
+defm V_CMP_T_U64 : VOPC_I64 <vopc<0xe7, 0xef>, "v_cmp_t_u64">;
+
+defm V_CMPX_F_U64 : VOPCX_I64 <vopc<0xf0, 0xf8>, "v_cmpx_f_u64">;
+defm V_CMPX_LT_U64 : VOPCX_I64 <vopc<0xf1, 0xf9>, "v_cmpx_lt_u64", "v_cmpx_gt_u64">;
+defm V_CMPX_EQ_U64 : VOPCX_I64 <vopc<0xf2, 0xfa>, "v_cmpx_eq_u64">;
+defm V_CMPX_LE_U64 : VOPCX_I64 <vopc<0xf3, 0xfb>, "v_cmpx_le_u64", "v_cmpx_ge_u64">;
+defm V_CMPX_GT_U64 : VOPCX_I64 <vopc<0xf4, 0xfc>, "v_cmpx_gt_u64">;
+defm V_CMPX_NE_U64 : VOPCX_I64 <vopc<0xf5, 0xfd>, "v_cmpx_ne_u64">;
+defm V_CMPX_GE_U64 : VOPCX_I64 <vopc<0xf6, 0xfe>, "v_cmpx_ge_u64">;
+defm V_CMPX_T_U64 : VOPCX_I64 <vopc<0xf7, 0xff>, "v_cmpx_t_u64">;
+
+} // End isCompare = 1, isCommutable = 1
+
+defm V_CMP_CLASS_F32 : VOPC_CLASS_F32 <vopc<0x88, 0x10>, "v_cmp_class_f32">;
+defm V_CMPX_CLASS_F32 : VOPCX_CLASS_F32 <vopc<0x98, 0x11>, "v_cmpx_class_f32">;
+defm V_CMP_CLASS_F64 : VOPC_CLASS_F64 <vopc<0xa8, 0x12>, "v_cmp_class_f64">;
+defm V_CMPX_CLASS_F64 : VOPCX_CLASS_F64 <vopc<0xb8, 0x13>, "v_cmpx_class_f64">;
+
+//===----------------------------------------------------------------------===//
+// DS Instructions
+//===----------------------------------------------------------------------===//
+
+defm DS_ADD_U32 : DS_1A1D_NORET <0x0, "ds_add_u32", VGPR_32>;
+defm DS_SUB_U32 : DS_1A1D_NORET <0x1, "ds_sub_u32", VGPR_32>;
+defm DS_RSUB_U32 : DS_1A1D_NORET <0x2, "ds_rsub_u32", VGPR_32>;
+defm DS_INC_U32 : DS_1A1D_NORET <0x3, "ds_inc_u32", VGPR_32>;
+defm DS_DEC_U32 : DS_1A1D_NORET <0x4, "ds_dec_u32", VGPR_32>;
+defm DS_MIN_I32 : DS_1A1D_NORET <0x5, "ds_min_i32", VGPR_32>;
+defm DS_MAX_I32 : DS_1A1D_NORET <0x6, "ds_max_i32", VGPR_32>;
+defm DS_MIN_U32 : DS_1A1D_NORET <0x7, "ds_min_u32", VGPR_32>;
+defm DS_MAX_U32 : DS_1A1D_NORET <0x8, "ds_max_u32", VGPR_32>;
+defm DS_AND_B32 : DS_1A1D_NORET <0x9, "ds_and_b32", VGPR_32>;
+defm DS_OR_B32 : DS_1A1D_NORET <0xa, "ds_or_b32", VGPR_32>;
+defm DS_XOR_B32 : DS_1A1D_NORET <0xb, "ds_xor_b32", VGPR_32>;
+defm DS_MSKOR_B32 : DS_1A2D_NORET <0xc, "ds_mskor_b32", VGPR_32>;
+let mayLoad = 0 in {
+defm DS_WRITE_B32 : DS_1A1D_NORET <0xd, "ds_write_b32", VGPR_32>;
+defm DS_WRITE2_B32 : DS_1A1D_Off8_NORET <0xe, "ds_write2_b32", VGPR_32>;
+defm DS_WRITE2ST64_B32 : DS_1A1D_Off8_NORET <0xf, "ds_write2st64_b32", VGPR_32>;
+}
+defm DS_CMPST_B32 : DS_1A2D_NORET <0x10, "ds_cmpst_b32", VGPR_32>;
+defm DS_CMPST_F32 : DS_1A2D_NORET <0x11, "ds_cmpst_f32", VGPR_32>;
+defm DS_MIN_F32 : DS_1A2D_NORET <0x12, "ds_min_f32", VGPR_32>;
+defm DS_MAX_F32 : DS_1A2D_NORET <0x13, "ds_max_f32", VGPR_32>;
+
+defm DS_GWS_INIT : DS_1A_GDS <0x19, "ds_gws_init">;
+defm DS_GWS_SEMA_V : DS_1A_GDS <0x1a, "ds_gws_sema_v">;
+defm DS_GWS_SEMA_BR : DS_1A_GDS <0x1b, "ds_gws_sema_br">;
+defm DS_GWS_SEMA_P : DS_1A_GDS <0x1c, "ds_gws_sema_p">;
+defm DS_GWS_BARRIER : DS_1A_GDS <0x1d, "ds_gws_barrier">;
+let mayLoad = 0 in {
+defm DS_WRITE_B8 : DS_1A1D_NORET <0x1e, "ds_write_b8", VGPR_32>;
+defm DS_WRITE_B16 : DS_1A1D_NORET <0x1f, "ds_write_b16", VGPR_32>;
+}
+defm DS_ADD_RTN_U32 : DS_1A1D_RET <0x20, "ds_add_rtn_u32", VGPR_32, "ds_add_u32">;
+defm DS_SUB_RTN_U32 : DS_1A1D_RET <0x21, "ds_sub_rtn_u32", VGPR_32, "ds_sub_u32">;
+defm DS_RSUB_RTN_U32 : DS_1A1D_RET <0x22, "ds_rsub_rtn_u32", VGPR_32, "ds_rsub_u32">;
+defm DS_INC_RTN_U32 : DS_1A1D_RET <0x23, "ds_inc_rtn_u32", VGPR_32, "ds_inc_u32">;
+defm DS_DEC_RTN_U32 : DS_1A1D_RET <0x24, "ds_dec_rtn_u32", VGPR_32, "ds_dec_u32">;
+defm DS_MIN_RTN_I32 : DS_1A1D_RET <0x25, "ds_min_rtn_i32", VGPR_32, "ds_min_i32">;
+defm DS_MAX_RTN_I32 : DS_1A1D_RET <0x26, "ds_max_rtn_i32", VGPR_32, "ds_max_i32">;
+defm DS_MIN_RTN_U32 : DS_1A1D_RET <0x27, "ds_min_rtn_u32", VGPR_32, "ds_min_u32">;
+defm DS_MAX_RTN_U32 : DS_1A1D_RET <0x28, "ds_max_rtn_u32", VGPR_32, "ds_max_u32">;
+defm DS_AND_RTN_B32 : DS_1A1D_RET <0x29, "ds_and_rtn_b32", VGPR_32, "ds_and_b32">;
+defm DS_OR_RTN_B32 : DS_1A1D_RET <0x2a, "ds_or_rtn_b32", VGPR_32, "ds_or_b32">;
+defm DS_XOR_RTN_B32 : DS_1A1D_RET <0x2b, "ds_xor_rtn_b32", VGPR_32, "ds_xor_b32">;
+defm DS_MSKOR_RTN_B32 : DS_1A2D_RET <0x2c, "ds_mskor_rtn_b32", VGPR_32, "ds_mskor_b32">;
+defm DS_WRXCHG_RTN_B32 : DS_1A1D_RET <0x2d, "ds_wrxchg_rtn_b32", VGPR_32>;
+defm DS_WRXCHG2_RTN_B32 : DS_1A2D_RET <
+  0x2e, "ds_wrxchg2_rtn_b32", VReg_64, "", VGPR_32
+>;
+defm DS_WRXCHG2ST64_RTN_B32 : DS_1A2D_RET <
+  0x2f, "ds_wrxchg2st64_rtn_b32", VReg_64, "", VGPR_32
+>;
+defm DS_CMPST_RTN_B32 : DS_1A2D_RET <0x30, "ds_cmpst_rtn_b32", VGPR_32, "ds_cmpst_b32">;
+defm DS_CMPST_RTN_F32 : DS_1A2D_RET <0x31, "ds_cmpst_rtn_f32", VGPR_32, "ds_cmpst_f32">;
+defm DS_MIN_RTN_F32 : DS_1A2D_RET <0x32, "ds_min_rtn_f32", VGPR_32, "ds_min_f32">;
+defm DS_MAX_RTN_F32 : DS_1A2D_RET <0x33, "ds_max_rtn_f32", VGPR_32, "ds_max_f32">;
+let SubtargetPredicate = isCI in {
+defm DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "ds_wrap_rtn_f32", VGPR_32, "ds_wrap_f32">;
+} // End isCI
+defm DS_SWIZZLE_B32 : DS_1A_RET <0x35, "ds_swizzle_b32", VGPR_32>;
+let mayStore = 0 in {
+defm DS_READ_B32 : DS_1A_RET <0x36, "ds_read_b32", VGPR_32>;
+defm DS_READ2_B32 : DS_1A_Off8_RET <0x37, "ds_read2_b32", VReg_64>;
+defm DS_READ2ST64_B32 : DS_1A_Off8_RET <0x38, "ds_read2st64_b32", VReg_64>;
+defm DS_READ_I8 : DS_1A_RET <0x39, "ds_read_i8", VGPR_32>;
+defm DS_READ_U8 : DS_1A_RET <0x3a, "ds_read_u8", VGPR_32>;
+defm DS_READ_I16 : DS_1A_RET <0x3b, "ds_read_i16", VGPR_32>;
+defm DS_READ_U16 : DS_1A_RET <0x3c, "ds_read_u16", VGPR_32>;
+}
+defm DS_CONSUME : DS_0A_RET <0x3d, "ds_consume">;
+defm DS_APPEND : DS_0A_RET <0x3e, "ds_append">;
+defm DS_ORDERED_COUNT : DS_1A_RET_GDS <0x3f, "ds_ordered_count">;
+defm DS_ADD_U64 : DS_1A1D_NORET <0x40, "ds_add_u64", VReg_64>;
+defm DS_SUB_U64 : DS_1A1D_NORET <0x41, "ds_sub_u64", VReg_64>;
+defm DS_RSUB_U64 : DS_1A1D_NORET <0x42, "ds_rsub_u64", VReg_64>;
+defm DS_INC_U64 : DS_1A1D_NORET <0x43, "ds_inc_u64", VReg_64>;
+defm DS_DEC_U64 : DS_1A1D_NORET <0x44, "ds_dec_u64", VReg_64>;
+defm DS_MIN_I64 : DS_1A1D_NORET <0x45, "ds_min_i64", VReg_64>;
+defm DS_MAX_I64 : DS_1A1D_NORET <0x46, "ds_max_i64", VReg_64>;
+defm DS_MIN_U64 : DS_1A1D_NORET <0x47, "ds_min_u64", VReg_64>;
+defm DS_MAX_U64 : DS_1A1D_NORET <0x48, "ds_max_u64", VReg_64>;
+defm DS_AND_B64 : DS_1A1D_NORET <0x49, "ds_and_b64", VReg_64>;
+defm DS_OR_B64 : DS_1A1D_NORET <0x4a, "ds_or_b64", VReg_64>;
+defm DS_XOR_B64 : DS_1A1D_NORET <0x4b, "ds_xor_b64", VReg_64>;
+defm DS_MSKOR_B64 : DS_1A2D_NORET <0x4c, "ds_mskor_b64", VReg_64>;
+let mayLoad = 0 in {
+defm DS_WRITE_B64 : DS_1A1D_NORET <0x4d, "ds_write_b64", VReg_64>;
+defm DS_WRITE2_B64 : DS_1A1D_Off8_NORET <0x4E, "ds_write2_b64", VReg_64>;
+defm DS_WRITE2ST64_B64 : DS_1A1D_Off8_NORET <0x4f, "ds_write2st64_b64", VReg_64>;
+}
+defm DS_CMPST_B64 : DS_1A2D_NORET <0x50, "ds_cmpst_b64", VReg_64>;
+defm DS_CMPST_F64 : DS_1A2D_NORET <0x51, "ds_cmpst_f64", VReg_64>;
+defm DS_MIN_F64 : DS_1A1D_NORET <0x52, "ds_min_f64", VReg_64>;
+defm DS_MAX_F64 : DS_1A1D_NORET <0x53, "ds_max_f64", VReg_64>;
+
+defm DS_ADD_RTN_U64 : DS_1A1D_RET <0x60, "ds_add_rtn_u64", VReg_64, "ds_add_u64">;
+defm DS_SUB_RTN_U64 : DS_1A1D_RET <0x61, "ds_sub_rtn_u64", VReg_64, "ds_sub_u64">;
+defm DS_RSUB_RTN_U64 : DS_1A1D_RET <0x62, "ds_rsub_rtn_u64", VReg_64, "ds_rsub_u64">;
+defm DS_INC_RTN_U64 : DS_1A1D_RET <0x63, "ds_inc_rtn_u64", VReg_64, "ds_inc_u64">;
+defm DS_DEC_RTN_U64 : DS_1A1D_RET <0x64, "ds_dec_rtn_u64", VReg_64, "ds_dec_u64">;
+defm DS_MIN_RTN_I64 : DS_1A1D_RET <0x65, "ds_min_rtn_i64", VReg_64, "ds_min_i64">;
+defm DS_MAX_RTN_I64 : DS_1A1D_RET <0x66, "ds_max_rtn_i64", VReg_64, "ds_max_i64">;
+defm DS_MIN_RTN_U64 : DS_1A1D_RET <0x67, "ds_min_rtn_u64", VReg_64, "ds_min_u64">;
+defm DS_MAX_RTN_U64 : DS_1A1D_RET <0x68, "ds_max_rtn_u64", VReg_64, "ds_max_u64">;
+defm DS_AND_RTN_B64 : DS_1A1D_RET <0x69, "ds_and_rtn_b64", VReg_64, "ds_and_b64">;
+defm DS_OR_RTN_B64 : DS_1A1D_RET <0x6a, "ds_or_rtn_b64", VReg_64, "ds_or_b64">;
+defm DS_XOR_RTN_B64 : DS_1A1D_RET <0x6b, "ds_xor_rtn_b64", VReg_64, "ds_xor_b64">;
+defm DS_MSKOR_RTN_B64 : DS_1A2D_RET <0x6c, "ds_mskor_rtn_b64", VReg_64, "ds_mskor_b64">;
+defm DS_WRXCHG_RTN_B64 : DS_1A1D_RET <0x6d, "ds_wrxchg_rtn_b64", VReg_64, "ds_wrxchg_b64">;
+defm DS_WRXCHG2_RTN_B64 : DS_1A2D_RET <0x6e, "ds_wrxchg2_rtn_b64", VReg_128, "ds_wrxchg2_b64", VReg_64>;
+defm DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_RET <0x6f, "ds_wrxchg2st64_rtn_b64", VReg_128, "ds_wrxchg2st64_b64", VReg_64>;
+defm DS_CMPST_RTN_B64 : DS_1A2D_RET <0x70, "ds_cmpst_rtn_b64", VReg_64, "ds_cmpst_b64">;
+defm DS_CMPST_RTN_F64 : DS_1A2D_RET <0x71, "ds_cmpst_rtn_f64", VReg_64, "ds_cmpst_f64">;
+defm DS_MIN_RTN_F64 : DS_1A1D_RET <0x72, "ds_min_rtn_f64", VReg_64, "ds_min_f64">;
+defm DS_MAX_RTN_F64 : DS_1A1D_RET <0x73, "ds_max_rtn_f64", VReg_64, "ds_max_f64">;
+
+let mayStore = 0 in {
+defm DS_READ_B64 : DS_1A_RET <0x76, "ds_read_b64", VReg_64>;
+defm DS_READ2_B64 : DS_1A_Off8_RET <0x77, "ds_read2_b64", VReg_128>;
+defm DS_READ2ST64_B64 : DS_1A_Off8_RET <0x78, "ds_read2st64_b64", VReg_128>;
+}
+
+defm DS_ADD_SRC2_U32 : DS_1A <0x80, "ds_add_src2_u32">;
+defm DS_SUB_SRC2_U32 : DS_1A <0x81, "ds_sub_src2_u32">;
+defm DS_RSUB_SRC2_U32 : DS_1A <0x82, "ds_rsub_src2_u32">;
+defm DS_INC_SRC2_U32 : DS_1A <0x83, "ds_inc_src2_u32">;
+defm DS_DEC_SRC2_U32 : DS_1A <0x84, "ds_dec_src2_u32">;
+defm DS_MIN_SRC2_I32 : DS_1A <0x85, "ds_min_src2_i32">;
+defm DS_MAX_SRC2_I32 : DS_1A <0x86, "ds_max_src2_i32">;
+defm DS_MIN_SRC2_U32 : DS_1A <0x87, "ds_min_src2_u32">;
+defm DS_MAX_SRC2_U32 : DS_1A <0x88, "ds_max_src2_u32">;
+defm DS_AND_SRC2_B32 : DS_1A <0x89, "ds_and_src_b32">;
+defm DS_OR_SRC2_B32 : DS_1A <0x8a, "ds_or_src2_b32">;
+defm DS_XOR_SRC2_B32 : DS_1A <0x8b, "ds_xor_src2_b32">;
+defm DS_WRITE_SRC2_B32 : DS_1A <0x8c, "ds_write_src2_b32">;
+
+defm DS_MIN_SRC2_F32 : DS_1A <0x92, "ds_min_src2_f32">;
+defm DS_MAX_SRC2_F32 : DS_1A <0x93, "ds_max_src2_f32">;
+
+defm DS_ADD_SRC2_U64 : DS_1A <0xc0, "ds_add_src2_u64">;
+defm DS_SUB_SRC2_U64 : DS_1A <0xc1, "ds_sub_src2_u64">;
+defm DS_RSUB_SRC2_U64 : DS_1A <0xc2, "ds_rsub_src2_u64">;
+defm DS_INC_SRC2_U64 : DS_1A <0xc3, "ds_inc_src2_u64">;
+defm DS_DEC_SRC2_U64 : DS_1A <0xc4, "ds_dec_src2_u64">;
+defm DS_MIN_SRC2_I64 : DS_1A <0xc5, "ds_min_src2_i64">;
+defm DS_MAX_SRC2_I64 : DS_1A <0xc6, "ds_max_src2_i64">;
+defm DS_MIN_SRC2_U64 : DS_1A <0xc7, "ds_min_src2_u64">;
+defm DS_MAX_SRC2_U64 : DS_1A <0xc8, "ds_max_src2_u64">;
+defm DS_AND_SRC2_B64 : DS_1A <0xc9, "ds_and_src2_b64">;
+defm DS_OR_SRC2_B64 : DS_1A <0xca, "ds_or_src2_b64">;
+defm DS_XOR_SRC2_B64 : DS_1A <0xcb, "ds_xor_src2_b64">;
+defm DS_WRITE_SRC2_B64 : DS_1A <0xcc, "ds_write_src2_b64">;
+
+defm DS_MIN_SRC2_F64 : DS_1A <0xd2, "ds_min_src2_f64">;
+defm DS_MAX_SRC2_F64 : DS_1A <0xd3, "ds_max_src2_f64">;
+
+//let SubtargetPredicate = isCI in {
+// DS_CONDXCHG32_RTN_B64
+// DS_CONDXCHG32_RTN_B128
+//} // End isCI
+
+//===----------------------------------------------------------------------===//
+// MUBUF Instructions
+//===----------------------------------------------------------------------===//
+
+defm BUFFER_LOAD_FORMAT_X : MUBUF_Load_Helper <
+  mubuf<0x00>, "buffer_load_format_x", VGPR_32
+>;
+defm BUFFER_LOAD_FORMAT_XY : MUBUF_Load_Helper <
+  mubuf<0x01>, "buffer_load_format_xy", VReg_64
+>;
+defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Load_Helper <
+  mubuf<0x02>, "buffer_load_format_xyz", VReg_96
+>;
+defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <
+  mubuf<0x03>, "buffer_load_format_xyzw", VReg_128
+>;
+defm BUFFER_STORE_FORMAT_X : MUBUF_Store_Helper <
+  mubuf<0x04>, "buffer_store_format_x", VGPR_32
+>;
+defm BUFFER_STORE_FORMAT_XY : MUBUF_Store_Helper <
+  mubuf<0x05>, "buffer_store_format_xy", VReg_64
+>;
+defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Store_Helper <
+  mubuf<0x06>, "buffer_store_format_xyz", VReg_96
+>;
+defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Store_Helper <
+  mubuf<0x07>, "buffer_store_format_xyzw", VReg_128
+>;
+defm BUFFER_LOAD_UBYTE : MUBUF_Load_Helper <
+  mubuf<0x08, 0x10>, "buffer_load_ubyte", VGPR_32, i32, az_extloadi8_global
+>;
+defm BUFFER_LOAD_SBYTE : MUBUF_Load_Helper <
+  mubuf<0x09, 0x11>, "buffer_load_sbyte", VGPR_32, i32, sextloadi8_global
+>;
+defm BUFFER_LOAD_USHORT : MUBUF_Load_Helper <
+  mubuf<0x0a, 0x12>, "buffer_load_ushort", VGPR_32, i32, az_extloadi16_global
+>;
+defm BUFFER_LOAD_SSHORT : MUBUF_Load_Helper <
+  mubuf<0x0b, 0x13>, "buffer_load_sshort", VGPR_32, i32, sextloadi16_global
+>;
+defm BUFFER_LOAD_DWORD : MUBUF_Load_Helper <
+  mubuf<0x0c, 0x14>, "buffer_load_dword", VGPR_32, i32, global_load
+>;
+defm BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <
+  mubuf<0x0d, 0x15>, "buffer_load_dwordx2", VReg_64, v2i32, global_load
+>;
+defm BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <
+  mubuf<0x0e, 0x17>, "buffer_load_dwordx4", VReg_128, v4i32, global_load
+>;
+
+defm BUFFER_STORE_BYTE : MUBUF_Store_Helper <
+  mubuf<0x18>, "buffer_store_byte", VGPR_32, i32, truncstorei8_global
+>;
+
+defm BUFFER_STORE_SHORT : MUBUF_Store_Helper <
+  mubuf<0x1a>, "buffer_store_short", VGPR_32, i32, truncstorei16_global
+>;
+
+defm BUFFER_STORE_DWORD : MUBUF_Store_Helper <
+  mubuf<0x1c>, "buffer_store_dword", VGPR_32, i32, global_store
+>;
+
+defm BUFFER_STORE_DWORDX2 : MUBUF_Store_Helper <
+  mubuf<0x1d>, "buffer_store_dwordx2", VReg_64, v2i32, global_store
+>;
+
+defm BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper <
+  mubuf<0x1e, 0x1f>, "buffer_store_dwordx4", VReg_128, v4i32, global_store
+>;
+
+defm BUFFER_ATOMIC_SWAP : MUBUF_Atomic <
+  mubuf<0x30, 0x40>, "buffer_atomic_swap", VGPR_32, i32, atomic_swap_global
+>;
+//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <mubuf<0x31, 0x41>, "buffer_atomic_cmpswap", []>;
+defm BUFFER_ATOMIC_ADD : MUBUF_Atomic <
+  mubuf<0x32, 0x42>, "buffer_atomic_add", VGPR_32, i32, atomic_add_global
+>;
+defm BUFFER_ATOMIC_SUB : MUBUF_Atomic <
+  mubuf<0x33, 0x43>, "buffer_atomic_sub", VGPR_32, i32, atomic_sub_global
+>;
+//def BUFFER_ATOMIC_RSUB : MUBUF_ <mubuf<0x34>, "buffer_atomic_rsub", []>; // isn't on CI & VI
+defm BUFFER_ATOMIC_SMIN : MUBUF_Atomic <
+  mubuf<0x35, 0x44>, "buffer_atomic_smin", VGPR_32, i32, atomic_min_global
+>;
+defm BUFFER_ATOMIC_UMIN : MUBUF_Atomic <
+  mubuf<0x36, 0x45>, "buffer_atomic_umin", VGPR_32, i32, atomic_umin_global
+>;
+defm BUFFER_ATOMIC_SMAX : MUBUF_Atomic <
+  mubuf<0x37, 0x46>, "buffer_atomic_smax", VGPR_32, i32, atomic_max_global
+>;
+defm BUFFER_ATOMIC_UMAX : MUBUF_Atomic <
+  mubuf<0x38, 0x47>, "buffer_atomic_umax", VGPR_32, i32, atomic_umax_global
+>;
+defm BUFFER_ATOMIC_AND : MUBUF_Atomic <
+  mubuf<0x39, 0x48>, "buffer_atomic_and", VGPR_32, i32, atomic_and_global
+>;
+defm BUFFER_ATOMIC_OR : MUBUF_Atomic <
+  mubuf<0x3a, 0x49>, "buffer_atomic_or", VGPR_32, i32, atomic_or_global
+>;
+defm BUFFER_ATOMIC_XOR : MUBUF_Atomic <
+  mubuf<0x3b, 0x4a>, "buffer_atomic_xor", VGPR_32, i32, atomic_xor_global
+>;
+//def BUFFER_ATOMIC_INC : MUBUF_ <mubuf<0x3c, 0x4b>, "buffer_atomic_inc", []>;
+//def BUFFER_ATOMIC_DEC : MUBUF_ <mubuf<0x3d, 0x4c>, "buffer_atomic_dec", []>;
+//def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ <mubuf<0x3e>, "buffer_atomic_fcmpswap", []>; // isn't on VI
+//def BUFFER_ATOMIC_FMIN : MUBUF_ <mubuf<0x3f>, "buffer_atomic_fmin", []>; // isn't on VI
+//def BUFFER_ATOMIC_FMAX : MUBUF_ <mubuf<0x40>, "buffer_atomic_fmax", []>; // isn't on VI
+//def BUFFER_ATOMIC_SWAP_X2 : MUBUF_X2 <mubuf<0x50, 0x60>, "buffer_atomic_swap_x2", []>;
+//def BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_X2 <mubuf<0x51, 0x61>, "buffer_atomic_cmpswap_x2", []>;
+//def BUFFER_ATOMIC_ADD_X2 : MUBUF_X2 <mubuf<0x52, 0x62>, "buffer_atomic_add_x2", []>;
+//def BUFFER_ATOMIC_SUB_X2 : MUBUF_X2 <mubuf<0x53, 0x63>, "buffer_atomic_sub_x2", []>;
+//def BUFFER_ATOMIC_RSUB_X2 : MUBUF_X2 <mubuf<0x54>, "buffer_atomic_rsub_x2", []>; // isn't on CI & VI
+//def BUFFER_ATOMIC_SMIN_X2 : MUBUF_X2 <mubuf<0x55, 0x64>, "buffer_atomic_smin_x2", []>;
+//def BUFFER_ATOMIC_UMIN_X2 : MUBUF_X2 <mubuf<0x56, 0x65>, "buffer_atomic_umin_x2", []>;
+//def BUFFER_ATOMIC_SMAX_X2 : MUBUF_X2 <mubuf<0x57, 0x66>, "buffer_atomic_smax_x2", []>;
+//def BUFFER_ATOMIC_UMAX_X2 : MUBUF_X2 <mubuf<0x58, 0x67>, "buffer_atomic_umax_x2", []>;
+//def BUFFER_ATOMIC_AND_X2 : MUBUF_X2 <mubuf<0x59, 0x68>, "buffer_atomic_and_x2", []>;
+//def BUFFER_ATOMIC_OR_X2 : MUBUF_X2 <mubuf<0x5a, 0x69>, "buffer_atomic_or_x2", []>;
+//def BUFFER_ATOMIC_XOR_X2 : MUBUF_X2 <mubuf<0x5b, 0x6a>, "buffer_atomic_xor_x2", []>;
+//def BUFFER_ATOMIC_INC_X2 : MUBUF_X2 <mubuf<0x5c, 0x6b>, "buffer_atomic_inc_x2", []>;
+//def BUFFER_ATOMIC_DEC_X2 : MUBUF_X2 <mubuf<0x5d, 0x6c>, "buffer_atomic_dec_x2", []>;
+//def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <mubuf<0x5e>, "buffer_atomic_fcmpswap_x2", []>; // isn't on VI
+//def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <mubuf<0x5f>, "buffer_atomic_fmin_x2", []>; // isn't on VI
+//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <mubuf<0x60>, "buffer_atomic_fmax_x2", []>; // isn't on VI
+//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <mubuf<0x70>, "buffer_wbinvl1_sc", []>; // isn't on CI & VI
+//def BUFFER_WBINVL1_VOL : MUBUF_WBINVL1 <mubuf<0x70, 0x3f>, "buffer_wbinvl1_vol", []>; // isn't on SI
+//def BUFFER_WBINVL1 : MUBUF_WBINVL1 <mubuf<0x71, 0x3e>, "buffer_wbinvl1", []>;
+
+//===----------------------------------------------------------------------===//
+// MTBUF Instructions
+//===----------------------------------------------------------------------===//
+
+//def TBUFFER_LOAD_FORMAT_X : MTBUF_ <0x00000000, "tbuffer_load_format_x", []>;
+//def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "tbuffer_load_format_xy", []>;
+//def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, "tbuffer_load_format_xyz", []>;
+defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "tbuffer_load_format_xyzw", VReg_128>;
+defm TBUFFER_STORE_FORMAT_X : MTBUF_Store_Helper <0x00000004, "tbuffer_store_format_x", VGPR_32>;
+defm TBUFFER_STORE_FORMAT_XY : MTBUF_Store_Helper <0x00000005, "tbuffer_store_format_xy", VReg_64>;
+defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Store_Helper <0x00000006, "tbuffer_store_format_xyz", VReg_128>;
+defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Store_Helper <0x00000007, "tbuffer_store_format_xyzw", VReg_128>;
+
+//===----------------------------------------------------------------------===//
+// MIMG Instructions
+//===----------------------------------------------------------------------===//
+
+defm IMAGE_LOAD : MIMG_NoSampler <0x00000000, "image_load">;
+defm IMAGE_LOAD_MIP : MIMG_NoSampler <0x00000001, "image_load_mip">;
+//def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"image_load_pck", 0x00000002>;
+//def IMAGE_LOAD_PCK_SGN : MIMG_NoPattern_ <"image_load_pck_sgn", 0x00000003>;
+//def IMAGE_LOAD_MIP_PCK : MIMG_NoPattern_ <"image_load_mip_pck", 0x00000004>;
+//def IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoPattern_ <"image_load_mip_pck_sgn", 0x00000005>;
+//def IMAGE_STORE : MIMG_NoPattern_ <"image_store", 0x00000008>;
+//def IMAGE_STORE_MIP : MIMG_NoPattern_ <"image_store_mip", 0x00000009>;
+//def IMAGE_STORE_PCK : MIMG_NoPattern_ <"image_store_pck", 0x0000000a>;
+//def IMAGE_STORE_MIP_PCK : MIMG_NoPattern_ <"image_store_mip_pck", 0x0000000b>;
+defm IMAGE_GET_RESINFO : MIMG_NoSampler <0x0000000e, "image_get_resinfo">;
+//def IMAGE_ATOMIC_SWAP : MIMG_NoPattern_ <"image_atomic_swap", 0x0000000f>;
+//def IMAGE_ATOMIC_CMPSWAP : MIMG_NoPattern_ <"image_atomic_cmpswap", 0x00000010>;
+//def IMAGE_ATOMIC_ADD : MIMG_NoPattern_ <"image_atomic_add", 0x00000011>;
+//def IMAGE_ATOMIC_SUB : MIMG_NoPattern_ <"image_atomic_sub", 0x00000012>;
+//def IMAGE_ATOMIC_RSUB : MIMG_NoPattern_ <"image_atomic_rsub", 0x00000013>;
+//def IMAGE_ATOMIC_SMIN : MIMG_NoPattern_ <"image_atomic_smin", 0x00000014>;
+//def IMAGE_ATOMIC_UMIN : MIMG_NoPattern_ <"image_atomic_umin", 0x00000015>;
+//def IMAGE_ATOMIC_SMAX : MIMG_NoPattern_ <"image_atomic_smax", 0x00000016>;
+//def IMAGE_ATOMIC_UMAX : MIMG_NoPattern_ <"image_atomic_umax", 0x00000017>;
+//def IMAGE_ATOMIC_AND : MIMG_NoPattern_ <"image_atomic_and", 0x00000018>;
+//def IMAGE_ATOMIC_OR : MIMG_NoPattern_ <"image_atomic_or", 0x00000019>;
+//def IMAGE_ATOMIC_XOR : MIMG_NoPattern_ <"image_atomic_xor", 0x0000001a>;
+//def IMAGE_ATOMIC_INC : MIMG_NoPattern_ <"image_atomic_inc", 0x0000001b>;
+//def IMAGE_ATOMIC_DEC : MIMG_NoPattern_ <"image_atomic_dec", 0x0000001c>;
+//def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"image_atomic_fcmpswap", 0x0000001d>;
+//def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"image_atomic_fmin", 0x0000001e>;
+//def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"image_atomic_fmax", 0x0000001f>;
+defm IMAGE_SAMPLE           : MIMG_Sampler_WQM <0x00000020, "image_sample">;
+defm IMAGE_SAMPLE_CL        : MIMG_Sampler_WQM <0x00000021, "image_sample_cl">;
+defm IMAGE_SAMPLE_D         : MIMG_Sampler <0x00000022, "image_sample_d">;
+defm IMAGE_SAMPLE_D_CL      : MIMG_Sampler <0x00000023, "image_sample_d_cl">;
+defm IMAGE_SAMPLE_L         : MIMG_Sampler <0x00000024, "image_sample_l">;
+defm IMAGE_SAMPLE_B         : MIMG_Sampler_WQM <0x00000025, "image_sample_b">;
+defm IMAGE_SAMPLE_B_CL      : MIMG_Sampler_WQM <0x00000026, "image_sample_b_cl">;
+defm IMAGE_SAMPLE_LZ        : MIMG_Sampler <0x00000027, "image_sample_lz">;
+defm IMAGE_SAMPLE_C         : MIMG_Sampler_WQM <0x00000028, "image_sample_c">;
+defm IMAGE_SAMPLE_C_CL      : MIMG_Sampler_WQM <0x00000029, "image_sample_c_cl">;
+defm IMAGE_SAMPLE_C_D       : MIMG_Sampler <0x0000002a, "image_sample_c_d">;
+defm IMAGE_SAMPLE_C_D_CL    : MIMG_Sampler <0x0000002b, "image_sample_c_d_cl">;
+defm IMAGE_SAMPLE_C_L       : MIMG_Sampler <0x0000002c, "image_sample_c_l">;
+defm IMAGE_SAMPLE_C_B       : MIMG_Sampler_WQM <0x0000002d, "image_sample_c_b">;
+defm IMAGE_SAMPLE_C_B_CL    : MIMG_Sampler_WQM <0x0000002e, "image_sample_c_b_cl">;
+defm IMAGE_SAMPLE_C_LZ      : MIMG_Sampler <0x0000002f, "image_sample_c_lz">;
+defm IMAGE_SAMPLE_O         : MIMG_Sampler_WQM <0x00000030, "image_sample_o">;
+defm IMAGE_SAMPLE_CL_O      : MIMG_Sampler_WQM <0x00000031, "image_sample_cl_o">;
+defm IMAGE_SAMPLE_D_O       : MIMG_Sampler <0x00000032, "image_sample_d_o">;
+defm IMAGE_SAMPLE_D_CL_O    : MIMG_Sampler <0x00000033, "image_sample_d_cl_o">;
+defm IMAGE_SAMPLE_L_O       : MIMG_Sampler <0x00000034, "image_sample_l_o">;
+defm IMAGE_SAMPLE_B_O       : MIMG_Sampler_WQM <0x00000035, "image_sample_b_o">;
+defm IMAGE_SAMPLE_B_CL_O    : MIMG_Sampler_WQM <0x00000036, "image_sample_b_cl_o">;
+defm IMAGE_SAMPLE_LZ_O      : MIMG_Sampler <0x00000037, "image_sample_lz_o">;
+defm IMAGE_SAMPLE_C_O       : MIMG_Sampler_WQM <0x00000038, "image_sample_c_o">;
+defm IMAGE_SAMPLE_C_CL_O    : MIMG_Sampler_WQM <0x00000039, "image_sample_c_cl_o">;
+defm IMAGE_SAMPLE_C_D_O     : MIMG_Sampler <0x0000003a, "image_sample_c_d_o">;
+defm IMAGE_SAMPLE_C_D_CL_O  : MIMG_Sampler <0x0000003b, "image_sample_c_d_cl_o">;
+defm IMAGE_SAMPLE_C_L_O     : MIMG_Sampler <0x0000003c, "image_sample_c_l_o">;
+defm IMAGE_SAMPLE_C_B_O     : MIMG_Sampler_WQM <0x0000003d, "image_sample_c_b_o">;
+defm IMAGE_SAMPLE_C_B_CL_O  : MIMG_Sampler_WQM <0x0000003e, "image_sample_c_b_cl_o">;
+defm IMAGE_SAMPLE_C_LZ_O    : MIMG_Sampler <0x0000003f, "image_sample_c_lz_o">;
+defm IMAGE_GATHER4          : MIMG_Gather_WQM <0x00000040, "image_gather4">;
+defm IMAGE_GATHER4_CL       : MIMG_Gather_WQM <0x00000041, "image_gather4_cl">;
+defm IMAGE_GATHER4_L        : MIMG_Gather <0x00000044, "image_gather4_l">;
+defm IMAGE_GATHER4_B        : MIMG_Gather_WQM <0x00000045, "image_gather4_b">;
+defm IMAGE_GATHER4_B_CL     : MIMG_Gather_WQM <0x00000046, "image_gather4_b_cl">;
+defm IMAGE_GATHER4_LZ       : MIMG_Gather <0x00000047, "image_gather4_lz">;
+defm IMAGE_GATHER4_C        : MIMG_Gather_WQM <0x00000048, "image_gather4_c">;
+defm IMAGE_GATHER4_C_CL     : MIMG_Gather_WQM <0x00000049, "image_gather4_c_cl">;
+defm IMAGE_GATHER4_C_L      : MIMG_Gather <0x0000004c, "image_gather4_c_l">;
+defm IMAGE_GATHER4_C_B      : MIMG_Gather_WQM <0x0000004d, "image_gather4_c_b">;
+defm IMAGE_GATHER4_C_B_CL   : MIMG_Gather_WQM <0x0000004e, "image_gather4_c_b_cl">;
+defm IMAGE_GATHER4_C_LZ     : MIMG_Gather <0x0000004f, "image_gather4_c_lz">;
+defm IMAGE_GATHER4_O        : MIMG_Gather_WQM <0x00000050, "image_gather4_o">;
+defm IMAGE_GATHER4_CL_O     : MIMG_Gather_WQM <0x00000051, "image_gather4_cl_o">;
+defm IMAGE_GATHER4_L_O      : MIMG_Gather <0x00000054, "image_gather4_l_o">;
+defm IMAGE_GATHER4_B_O      : MIMG_Gather_WQM <0x00000055, "image_gather4_b_o">;
+defm IMAGE_GATHER4_B_CL_O   : MIMG_Gather <0x00000056, "image_gather4_b_cl_o">;
+defm IMAGE_GATHER4_LZ_O     : MIMG_Gather <0x00000057, "image_gather4_lz_o">;
+defm IMAGE_GATHER4_C_O      : MIMG_Gather_WQM <0x00000058, "image_gather4_c_o">;
+defm IMAGE_GATHER4_C_CL_O   : MIMG_Gather_WQM <0x00000059, "image_gather4_c_cl_o">;
+defm IMAGE_GATHER4_C_L_O    : MIMG_Gather <0x0000005c, "image_gather4_c_l_o">;
+defm IMAGE_GATHER4_C_B_O    : MIMG_Gather_WQM <0x0000005d, "image_gather4_c_b_o">;
+defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather_WQM <0x0000005e, "image_gather4_c_b_cl_o">;
+defm IMAGE_GATHER4_C_LZ_O   : MIMG_Gather <0x0000005f, "image_gather4_c_lz_o">;
+defm IMAGE_GET_LOD          : MIMG_Sampler_WQM <0x00000060, "image_get_lod">;
+defm IMAGE_SAMPLE_CD        : MIMG_Sampler <0x00000068, "image_sample_cd">;
+defm IMAGE_SAMPLE_CD_CL     : MIMG_Sampler <0x00000069, "image_sample_cd_cl">;
+defm IMAGE_SAMPLE_C_CD      : MIMG_Sampler <0x0000006a, "image_sample_c_cd">;
+defm IMAGE_SAMPLE_C_CD_CL   : MIMG_Sampler <0x0000006b, "image_sample_c_cd_cl">;
+defm IMAGE_SAMPLE_CD_O      : MIMG_Sampler <0x0000006c, "image_sample_cd_o">;
+defm IMAGE_SAMPLE_CD_CL_O   : MIMG_Sampler <0x0000006d, "image_sample_cd_cl_o">;
+defm IMAGE_SAMPLE_C_CD_O    : MIMG_Sampler <0x0000006e, "image_sample_c_cd_o">;
+defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <0x0000006f, "image_sample_c_cd_cl_o">;
+//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"image_rsrc256", 0x0000007e>;
+//def IMAGE_SAMPLER : MIMG_NoPattern_ <"image_sampler", 0x0000007f>;
+
+//===----------------------------------------------------------------------===//
+// VOP1 Instructions
+//===----------------------------------------------------------------------===//
+
+let vdst = 0, src0 = 0 in {
+defm V_NOP : VOP1_m <vop1<0x0>, (outs), (ins), "v_nop", [], "v_nop">;
+}
+
+let isMoveImm = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+defm V_MOV_B32 : VOP1Inst <vop1<0x1>, "v_mov_b32", VOP_I32_I32>;
+} // End isMoveImm = 1
+
+let Uses = [EXEC] in {
+
+// FIXME: Specify SchedRW for READFIRSTLANE_B32
+
+def V_READFIRSTLANE_B32 : VOP1 <
+  0x00000002,
+  (outs SReg_32:$vdst),
+  (ins VGPR_32:$src0),
+  "v_readfirstlane_b32 $vdst, $src0",
+  []
+>;
+
+}
+
+let SchedRW = [WriteQuarterRate32] in {
+
+defm V_CVT_I32_F64 : VOP1Inst <vop1<0x3>, "v_cvt_i32_f64",
+  VOP_I32_F64, fp_to_sint
+>;
+defm V_CVT_F64_I32 : VOP1Inst <vop1<0x4>, "v_cvt_f64_i32",
+  VOP_F64_I32, sint_to_fp
+>;
+defm V_CVT_F32_I32 : VOP1Inst <vop1<0x5>, "v_cvt_f32_i32",
+  VOP_F32_I32, sint_to_fp
+>;
+defm V_CVT_F32_U32 : VOP1Inst <vop1<0x6>, "v_cvt_f32_u32",
+  VOP_F32_I32, uint_to_fp
+>;
+defm V_CVT_U32_F32 : VOP1Inst <vop1<0x7>, "v_cvt_u32_f32",
+  VOP_I32_F32, fp_to_uint
+>;
+defm V_CVT_I32_F32 : VOP1Inst <vop1<0x8>, "v_cvt_i32_f32",
+  VOP_I32_F32, fp_to_sint
+>;
+defm V_CVT_F16_F32 : VOP1Inst <vop1<0xa>, "v_cvt_f16_f32",
+  VOP_I32_F32, fp_to_f16
+>;
+defm V_CVT_F32_F16 : VOP1Inst <vop1<0xb>, "v_cvt_f32_f16",
+  VOP_F32_I32, f16_to_fp
+>;
+defm V_CVT_RPI_I32_F32 : VOP1Inst <vop1<0xc>, "v_cvt_rpi_i32_f32",
+  VOP_I32_F32, cvt_rpi_i32_f32>;
+defm V_CVT_FLR_I32_F32 : VOP1Inst <vop1<0xd>, "v_cvt_flr_i32_f32",
+  VOP_I32_F32, cvt_flr_i32_f32>;
+defm V_CVT_OFF_F32_I4 : VOP1Inst  <vop1<0x0e>, "v_cvt_off_f32_i4", VOP_F32_I32>;
+defm V_CVT_F32_F64 : VOP1Inst <vop1<0xf>, "v_cvt_f32_f64",
+  VOP_F32_F64, fround
+>;
+defm V_CVT_F64_F32 : VOP1Inst <vop1<0x10>, "v_cvt_f64_f32",
+  VOP_F64_F32, fextend
+>;
+defm V_CVT_F32_UBYTE0 : VOP1Inst <vop1<0x11>, "v_cvt_f32_ubyte0",
+  VOP_F32_I32, AMDGPUcvt_f32_ubyte0
+>;
+defm V_CVT_F32_UBYTE1 : VOP1Inst <vop1<0x12>, "v_cvt_f32_ubyte1",
+  VOP_F32_I32, AMDGPUcvt_f32_ubyte1
+>;
+defm V_CVT_F32_UBYTE2 : VOP1Inst <vop1<0x13>, "v_cvt_f32_ubyte2",
+  VOP_F32_I32, AMDGPUcvt_f32_ubyte2
+>;
+defm V_CVT_F32_UBYTE3 : VOP1Inst <vop1<0x14>, "v_cvt_f32_ubyte3",
+  VOP_F32_I32, AMDGPUcvt_f32_ubyte3
+>;
+defm V_CVT_U32_F64 : VOP1Inst <vop1<0x15>, "v_cvt_u32_f64",
+  VOP_I32_F64, fp_to_uint
+>;
+defm V_CVT_F64_U32 : VOP1Inst <vop1<0x16>, "v_cvt_f64_u32",
+  VOP_F64_I32, uint_to_fp
+>;
+
+} // let SchedRW = [WriteQuarterRate32]
+
+defm V_FRACT_F32 : VOP1Inst <vop1<0x20, 0x1b>, "v_fract_f32",
+  VOP_F32_F32, AMDGPUfract
+>;
+defm V_TRUNC_F32 : VOP1Inst <vop1<0x21, 0x1c>, "v_trunc_f32",
+  VOP_F32_F32, ftrunc
+>;
+defm V_CEIL_F32 : VOP1Inst <vop1<0x22, 0x1d>, "v_ceil_f32",
+  VOP_F32_F32, fceil
+>;
+defm V_RNDNE_F32 : VOP1Inst <vop1<0x23, 0x1e>, "v_rndne_f32",
+  VOP_F32_F32, frint
+>;
+defm V_FLOOR_F32 : VOP1Inst <vop1<0x24, 0x1f>, "v_floor_f32",
+  VOP_F32_F32, ffloor
+>;
+defm V_EXP_F32 : VOP1Inst <vop1<0x25, 0x20>, "v_exp_f32",
+  VOP_F32_F32, fexp2
+>;
+
+let SchedRW = [WriteQuarterRate32] in {
+
+defm V_LOG_F32 : VOP1Inst <vop1<0x27, 0x21>, "v_log_f32",
+  VOP_F32_F32, flog2
+>;
+defm V_RCP_F32 : VOP1Inst <vop1<0x2a, 0x22>, "v_rcp_f32",
+  VOP_F32_F32, AMDGPUrcp
+>;
+defm V_RCP_IFLAG_F32 : VOP1Inst <vop1<0x2b, 0x23>, "v_rcp_iflag_f32",
+  VOP_F32_F32
+>;
+defm V_RSQ_F32 : VOP1Inst <vop1<0x2e, 0x24>, "v_rsq_f32",
+  VOP_F32_F32, AMDGPUrsq
+>;
+
+} //let SchedRW = [WriteQuarterRate32]
+
+let SchedRW = [WriteDouble] in {
+
+defm V_RCP_F64 : VOP1Inst <vop1<0x2f, 0x25>, "v_rcp_f64",
+  VOP_F64_F64, AMDGPUrcp
+>;
+defm V_RSQ_F64 : VOP1Inst <vop1<0x31, 0x26>, "v_rsq_f64",
+  VOP_F64_F64, AMDGPUrsq
+>;
+
+} // let SchedRW = [WriteDouble];
+
+defm V_SQRT_F32 : VOP1Inst <vop1<0x33, 0x27>, "v_sqrt_f32",
+  VOP_F32_F32, fsqrt
+>;
+
+let SchedRW = [WriteDouble] in {
+
+defm V_SQRT_F64 : VOP1Inst <vop1<0x34, 0x28>, "v_sqrt_f64",
+  VOP_F64_F64, fsqrt
+>;
+
+} // let SchedRW = [WriteDouble]
+
+defm V_SIN_F32 : VOP1Inst <vop1<0x35, 0x29>, "v_sin_f32",
+  VOP_F32_F32, AMDGPUsin
+>;
+defm V_COS_F32 : VOP1Inst <vop1<0x36, 0x2a>, "v_cos_f32",
+  VOP_F32_F32, AMDGPUcos
+>;
+defm V_NOT_B32 : VOP1Inst <vop1<0x37, 0x2b>, "v_not_b32", VOP_I32_I32>;
+defm V_BFREV_B32 : VOP1Inst <vop1<0x38, 0x2c>, "v_bfrev_b32", VOP_I32_I32>;
+defm V_FFBH_U32 : VOP1Inst <vop1<0x39, 0x2d>, "v_ffbh_u32", VOP_I32_I32>;
+defm V_FFBL_B32 : VOP1Inst <vop1<0x3a, 0x2e>, "v_ffbl_b32", VOP_I32_I32>;
+defm V_FFBH_I32 : VOP1Inst <vop1<0x3b, 0x2f>, "v_ffbh_i32", VOP_I32_I32>;
+defm V_FREXP_EXP_I32_F64 : VOP1Inst <vop1<0x3c,0x30>, "v_frexp_exp_i32_f64",
+  VOP_I32_F64
+>;
+defm V_FREXP_MANT_F64 : VOP1Inst <vop1<0x3d, 0x31>, "v_frexp_mant_f64",
+  VOP_F64_F64
+>;
+defm V_FRACT_F64 : VOP1Inst <vop1<0x3e, 0x32>, "v_fract_f64", VOP_F64_F64>;
+defm V_FREXP_EXP_I32_F32 : VOP1Inst <vop1<0x3f, 0x33>, "v_frexp_exp_i32_f32",
+  VOP_I32_F32
+>;
+defm V_FREXP_MANT_F32 : VOP1Inst <vop1<0x40, 0x34>, "v_frexp_mant_f32",
+  VOP_F32_F32
+>;
+let vdst = 0, src0 = 0 in {
+defm V_CLREXCP : VOP1_m <vop1<0x41,0x35>, (outs), (ins), "v_clrexcp", [],
+  "v_clrexcp"
+>;
+}
+defm V_MOVRELD_B32 : VOP1Inst <vop1<0x42, 0x36>, "v_movreld_b32", VOP_I32_I32>;
+defm V_MOVRELS_B32 : VOP1Inst <vop1<0x43, 0x37>, "v_movrels_b32", VOP_I32_I32>;
+defm V_MOVRELSD_B32 : VOP1Inst <vop1<0x44, 0x38>, "v_movrelsd_b32", VOP_I32_I32>;
+
+// These instruction only exist on SI and CI
+let SubtargetPredicate = isSICI in {
+
+let SchedRW = [WriteQuarterRate32] in {
+
+defm V_MOV_FED_B32 : VOP1InstSI <vop1<0x9>, "v_mov_fed_b32", VOP_I32_I32>;
+defm V_LOG_CLAMP_F32 : VOP1InstSI <vop1<0x26>, "v_log_clamp_f32", VOP_F32_F32>;
+defm V_RCP_CLAMP_F32 : VOP1InstSI <vop1<0x28>, "v_rcp_clamp_f32", VOP_F32_F32>;
+defm V_RCP_LEGACY_F32 : VOP1InstSI <vop1<0x29>, "v_rcp_legacy_f32", VOP_F32_F32>;
+defm V_RSQ_CLAMP_F32 : VOP1InstSI <vop1<0x2c>, "v_rsq_clamp_f32",
+  VOP_F32_F32, AMDGPUrsq_clamped
+>;
+defm V_RSQ_LEGACY_F32 : VOP1InstSI <vop1<0x2d>, "v_rsq_legacy_f32",
+  VOP_F32_F32, AMDGPUrsq_legacy
+>;
+
+} // End let SchedRW = [WriteQuarterRate32]
+
+let SchedRW = [WriteDouble] in {
+
+defm V_RCP_CLAMP_F64 : VOP1InstSI <vop1<0x30>, "v_rcp_clamp_f64", VOP_F64_F64>;
+defm V_RSQ_CLAMP_F64 : VOP1InstSI <vop1<0x32>, "v_rsq_clamp_f64",
+  VOP_F64_F64, AMDGPUrsq_clamped
+>;
+
+} // End SchedRW = [WriteDouble]
+
+} // End SubtargetPredicate = isSICI
+
+//===----------------------------------------------------------------------===//
+// VINTRP Instructions
+//===----------------------------------------------------------------------===//
+
+let Uses = [M0] in {
+
+// FIXME: Specify SchedRW for VINTRP insturctions.
+
+multiclass V_INTERP_P1_F32_m : VINTRP_m <
+  0x00000000,
+  (outs VGPR_32:$dst),
+  (ins VGPR_32:$i, i32imm:$attr_chan, i32imm:$attr),
+  "v_interp_p1_f32 $dst, $i, $attr_chan, $attr, [m0]",
+  [(set f32:$dst, (AMDGPUinterp_p1 i32:$i, (i32 imm:$attr_chan),
+                                           (i32 imm:$attr)))]
+>;
+
+let OtherPredicates = [has32BankLDS] in {
+
+defm V_INTERP_P1_F32 : V_INTERP_P1_F32_m;
+
+} // End OtherPredicates = [has32BankLDS]
+
+let OtherPredicates = [has16BankLDS], Constraints = "@earlyclobber $dst" in {
+
+defm V_INTERP_P1_F32_16bank : V_INTERP_P1_F32_m;
+
+} // End OtherPredicates = [has32BankLDS], Constraints = "@earlyclobber $dst"
+
+let DisableEncoding = "$src0", Constraints = "$src0 = $dst" in {
+
+defm V_INTERP_P2_F32 : VINTRP_m <
+  0x00000001,
+  (outs VGPR_32:$dst),
+  (ins VGPR_32:$src0, VGPR_32:$j, i32imm:$attr_chan, i32imm:$attr),
+  "v_interp_p2_f32 $dst, [$src0], $j, $attr_chan, $attr, [m0]",
+  [(set f32:$dst, (AMDGPUinterp_p2 f32:$src0, i32:$j, (i32 imm:$attr_chan),
+                                                     (i32 imm:$attr)))]>;
+
+} // End DisableEncoding = "$src0", Constraints = "$src0 = $dst"
+
+defm V_INTERP_MOV_F32 : VINTRP_m <
+  0x00000002,
+  (outs VGPR_32:$dst),
+  (ins InterpSlot:$src0, i32imm:$attr_chan, i32imm:$attr),
+  "v_interp_mov_f32 $dst, $src0, $attr_chan, $attr, [m0]",
+  [(set f32:$dst, (AMDGPUinterp_mov (i32 imm:$src0), (i32 imm:$attr_chan),
+                                    (i32 imm:$attr)))]>;
+
+} // End Uses = [M0]
+
+//===----------------------------------------------------------------------===//
+// VOP2 Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass V_CNDMASK <vop2 op, string name> {
+  defm _e32 : VOP2_m <
+      op, VOP_CNDMASK.Outs, VOP_CNDMASK.Ins32, VOP_CNDMASK.Asm32, [],
+      name, name>;
+
+  defm _e64  : VOP3_m <
+      op, VOP_CNDMASK.Outs, VOP_CNDMASK.Ins64,
+      name#!cast<string>(VOP_CNDMASK.Asm64), [], name, 3>;
+}
+
+defm V_CNDMASK_B32 : V_CNDMASK<vop2<0x0>, "v_cndmask_b32">;
+
+let isCommutable = 1 in {
+defm V_ADD_F32 : VOP2Inst <vop2<0x3, 0x1>, "v_add_f32",
+  VOP_F32_F32_F32, fadd
+>;
+
+defm V_SUB_F32 : VOP2Inst <vop2<0x4, 0x2>, "v_sub_f32", VOP_F32_F32_F32, fsub>;
+defm V_SUBREV_F32 : VOP2Inst <vop2<0x5, 0x3>, "v_subrev_f32",
+  VOP_F32_F32_F32, null_frag, "v_sub_f32"
+>;
+} // End isCommutable = 1
+
+let isCommutable = 1 in {
+
+defm V_MUL_LEGACY_F32 : VOP2Inst <vop2<0x7, 0x4>, "v_mul_legacy_f32",
+  VOP_F32_F32_F32, int_AMDGPU_mul
+>;
+
+defm V_MUL_F32 : VOP2Inst <vop2<0x8, 0x5>, "v_mul_f32",
+  VOP_F32_F32_F32, fmul
+>;
+
+defm V_MUL_I32_I24 : VOP2Inst <vop2<0x9, 0x6>, "v_mul_i32_i24",
+  VOP_I32_I32_I32, AMDGPUmul_i24
+>;
+
+defm V_MUL_HI_I32_I24 : VOP2Inst <vop2<0xa,0x7>, "v_mul_hi_i32_i24",
+  VOP_I32_I32_I32
+>;
+
+defm V_MUL_U32_U24 : VOP2Inst <vop2<0xb, 0x8>, "v_mul_u32_u24",
+  VOP_I32_I32_I32, AMDGPUmul_u24
+>;
+
+defm V_MUL_HI_U32_U24 : VOP2Inst <vop2<0xc,0x9>, "v_mul_hi_u32_u24",
+ VOP_I32_I32_I32
+>;
+
+defm V_MIN_F32 : VOP2Inst <vop2<0xf, 0xa>, "v_min_f32", VOP_F32_F32_F32,
+  fminnum>;
+defm V_MAX_F32 : VOP2Inst <vop2<0x10, 0xb>, "v_max_f32", VOP_F32_F32_F32,
+  fmaxnum>;
+defm V_MIN_I32 : VOP2Inst <vop2<0x11, 0xc>, "v_min_i32", VOP_I32_I32_I32>;
+defm V_MAX_I32 : VOP2Inst <vop2<0x12, 0xd>, "v_max_i32", VOP_I32_I32_I32>;
+defm V_MIN_U32 : VOP2Inst <vop2<0x13, 0xe>, "v_min_u32", VOP_I32_I32_I32>;
+defm V_MAX_U32 : VOP2Inst <vop2<0x14, 0xf>, "v_max_u32", VOP_I32_I32_I32>;
+
+defm V_LSHRREV_B32 : VOP2Inst <
+  vop2<0x16, 0x10>, "v_lshrrev_b32", VOP_I32_I32_I32, null_frag,
+    "v_lshr_b32"
+>;
+
+defm V_ASHRREV_I32 : VOP2Inst <
+  vop2<0x18, 0x11>, "v_ashrrev_i32", VOP_I32_I32_I32, null_frag,
+    "v_ashr_i32"
+>;
+
+defm V_LSHLREV_B32 : VOP2Inst <
+  vop2<0x1a, 0x12>, "v_lshlrev_b32", VOP_I32_I32_I32, null_frag,
+    "v_lshl_b32"
+>;
+
+defm V_AND_B32 : VOP2Inst <vop2<0x1b, 0x13>, "v_and_b32", VOP_I32_I32_I32>;
+defm V_OR_B32 : VOP2Inst <vop2<0x1c, 0x14>, "v_or_b32", VOP_I32_I32_I32>;
+defm V_XOR_B32 : VOP2Inst <vop2<0x1d, 0x15>, "v_xor_b32", VOP_I32_I32_I32>;
+
+defm V_MAC_F32 : VOP2Inst <vop2<0x1f, 0x16>, "v_mac_f32", VOP_F32_F32_F32>;
+} // End isCommutable = 1
+
+defm V_MADMK_F32 : VOP2MADK <vop2<0x20, 0x17>, "v_madmk_f32">;
+
+let isCommutable = 1 in {
+defm V_MADAK_F32 : VOP2MADK <vop2<0x21, 0x18>, "v_madak_f32">;
+} // End isCommutable = 1
+
+let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
+// No patterns so that the scalar instructions are always selected.
+// The scalar versions will be replaced with vector when needed later.
+
+// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in VI,
+// but the VI instructions behave the same as the SI versions.
+defm V_ADD_I32 : VOP2bInst <vop2<0x25, 0x19>, "v_add_i32",
+  VOP_I32_I32_I32, add
+>;
+defm V_SUB_I32 : VOP2bInst <vop2<0x26, 0x1a>, "v_sub_i32", VOP_I32_I32_I32>;
+
+defm V_SUBREV_I32 : VOP2bInst <vop2<0x27, 0x1b>, "v_subrev_i32",
+  VOP_I32_I32_I32, null_frag, "v_sub_i32"
+>;
+
+let Uses = [VCC] in { // Carry-in comes from VCC
+defm V_ADDC_U32 : VOP2bInst <vop2<0x28, 0x1c>, "v_addc_u32",
+  VOP_I32_I32_I32_VCC
+>;
+defm V_SUBB_U32 : VOP2bInst <vop2<0x29, 0x1d>, "v_subb_u32",
+  VOP_I32_I32_I32_VCC
+>;
+defm V_SUBBREV_U32 : VOP2bInst <vop2<0x2a, 0x1e>, "v_subbrev_u32",
+  VOP_I32_I32_I32_VCC, null_frag, "v_subb_u32"
+>;
+
+} // End Uses = [VCC]
+} // End isCommutable = 1, Defs = [VCC]
+
+defm V_READLANE_B32 : VOP2SI_3VI_m <
+  vop3 <0x001, 0x289>,
+  "v_readlane_b32",
+  (outs SReg_32:$vdst),
+  (ins VGPR_32:$src0, SCSrc_32:$src1),
+  "v_readlane_b32 $vdst, $src0, $src1"
+>;
+
+defm V_WRITELANE_B32 : VOP2SI_3VI_m <
+  vop3 <0x002, 0x28a>,
+  "v_writelane_b32",
+  (outs VGPR_32:$vdst),
+  (ins SReg_32:$src0, SCSrc_32:$src1),
+  "v_writelane_b32 $vdst, $src0, $src1"
+>;
+
+// These instructions only exist on SI and CI
+let SubtargetPredicate = isSICI in {
+
+defm V_MIN_LEGACY_F32 : VOP2InstSI <vop2<0xd>, "v_min_legacy_f32",
+  VOP_F32_F32_F32, AMDGPUfmin_legacy
+>;
+defm V_MAX_LEGACY_F32 : VOP2InstSI <vop2<0xe>, "v_max_legacy_f32",
+  VOP_F32_F32_F32, AMDGPUfmax_legacy
+>;
+
+let isCommutable = 1 in {
+defm V_LSHR_B32 : VOP2InstSI <vop2<0x15>, "v_lshr_b32", VOP_I32_I32_I32>;
+defm V_ASHR_I32 : VOP2InstSI <vop2<0x17>, "v_ashr_i32", VOP_I32_I32_I32>;
+defm V_LSHL_B32 : VOP2InstSI <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32>;
+} // End isCommutable = 1
+} // End let SubtargetPredicate = SICI
+
+let isCommutable = 1 in {
+defm V_MAC_LEGACY_F32 : VOP2_VI3_Inst <vop23<0x6, 0x28e>, "v_mac_legacy_f32",
+  VOP_F32_F32_F32
+>;
+} // End isCommutable = 1
+
+defm V_BFM_B32 : VOP2_VI3_Inst <vop23<0x1e, 0x293>, "v_bfm_b32",
+  VOP_I32_I32_I32
+>;
+defm V_BCNT_U32_B32 : VOP2_VI3_Inst <vop23<0x22, 0x28b>, "v_bcnt_u32_b32",
+  VOP_I32_I32_I32
+>;
+defm V_MBCNT_LO_U32_B32 : VOP2_VI3_Inst <vop23<0x23, 0x28c>, "v_mbcnt_lo_u32_b32",
+  VOP_I32_I32_I32
+>;
+defm V_MBCNT_HI_U32_B32 : VOP2_VI3_Inst <vop23<0x24, 0x28d>, "v_mbcnt_hi_u32_b32",
+  VOP_I32_I32_I32
+>;
+defm V_LDEXP_F32 : VOP2_VI3_Inst <vop23<0x2b, 0x288>, "v_ldexp_f32",
+  VOP_F32_F32_I32, AMDGPUldexp
+>;
+
+defm V_CVT_PKACCUM_U8_F32 : VOP2_VI3_Inst <vop23<0x2c, 0x1f0>, "v_cvt_pkaccum_u8_f32",
+  VOP_I32_F32_I32>; // TODO: set "Uses = dst"
+
+defm V_CVT_PKNORM_I16_F32 : VOP2_VI3_Inst <vop23<0x2d, 0x294>, "v_cvt_pknorm_i16_f32",
+  VOP_I32_F32_F32
+>;
+defm V_CVT_PKNORM_U16_F32 : VOP2_VI3_Inst <vop23<0x2e, 0x295>, "v_cvt_pknorm_u16_f32",
+  VOP_I32_F32_F32
+>;
+defm V_CVT_PKRTZ_F16_F32 : VOP2_VI3_Inst <vop23<0x2f, 0x296>, "v_cvt_pkrtz_f16_f32",
+  VOP_I32_F32_F32, int_SI_packf16
+>;
+defm V_CVT_PK_U16_U32 : VOP2_VI3_Inst <vop23<0x30, 0x297>, "v_cvt_pk_u16_u32",
+  VOP_I32_I32_I32
+>;
+defm V_CVT_PK_I16_I32 : VOP2_VI3_Inst <vop23<0x31, 0x298>, "v_cvt_pk_i16_i32",
+  VOP_I32_I32_I32
+>;
+
+//===----------------------------------------------------------------------===//
+// VOP3 Instructions
+//===----------------------------------------------------------------------===//
+
+let isCommutable = 1 in {
+defm V_MAD_LEGACY_F32 : VOP3Inst <vop3<0x140, 0x1c0>, "v_mad_legacy_f32",
+  VOP_F32_F32_F32_F32
+>;
+
+defm V_MAD_F32 : VOP3Inst <vop3<0x141, 0x1c1>, "v_mad_f32",
+  VOP_F32_F32_F32_F32, fmad
+>;
+
+defm V_MAD_I32_I24 : VOP3Inst <vop3<0x142, 0x1c2>, "v_mad_i32_i24",
+  VOP_I32_I32_I32_I32, AMDGPUmad_i24
+>;
+defm V_MAD_U32_U24 : VOP3Inst <vop3<0x143, 0x1c3>, "v_mad_u32_u24",
+  VOP_I32_I32_I32_I32, AMDGPUmad_u24
+>;
+} // End isCommutable = 1
+
+defm V_CUBEID_F32 : VOP3Inst <vop3<0x144, 0x1c4>, "v_cubeid_f32",
+  VOP_F32_F32_F32_F32
+>;
+defm V_CUBESC_F32 : VOP3Inst <vop3<0x145, 0x1c5>, "v_cubesc_f32",
+  VOP_F32_F32_F32_F32
+>;
+defm V_CUBETC_F32 : VOP3Inst <vop3<0x146, 0x1c6>, "v_cubetc_f32",
+  VOP_F32_F32_F32_F32
+>;
+defm V_CUBEMA_F32 : VOP3Inst <vop3<0x147, 0x1c7>, "v_cubema_f32",
+  VOP_F32_F32_F32_F32
+>;
+
+defm V_BFE_U32 : VOP3Inst <vop3<0x148, 0x1c8>, "v_bfe_u32",
+  VOP_I32_I32_I32_I32, AMDGPUbfe_u32
+>;
+defm V_BFE_I32 : VOP3Inst <vop3<0x149, 0x1c9>, "v_bfe_i32",
+  VOP_I32_I32_I32_I32, AMDGPUbfe_i32
+>;
+
+defm V_BFI_B32 : VOP3Inst <vop3<0x14a, 0x1ca>, "v_bfi_b32",
+  VOP_I32_I32_I32_I32, AMDGPUbfi
+>;
+
+let isCommutable = 1 in {
+defm V_FMA_F32 : VOP3Inst <vop3<0x14b, 0x1cb>, "v_fma_f32",
+  VOP_F32_F32_F32_F32, fma
+>;
+defm V_FMA_F64 : VOP3Inst <vop3<0x14c, 0x1cc>, "v_fma_f64",
+  VOP_F64_F64_F64_F64, fma
+>;
+} // End isCommutable = 1
+
+//def V_LERP_U8 : VOP3_U8 <0x0000014d, "v_lerp_u8", []>;
+defm V_ALIGNBIT_B32 : VOP3Inst <vop3<0x14e, 0x1ce>, "v_alignbit_b32",
+  VOP_I32_I32_I32_I32
+>;
+defm V_ALIGNBYTE_B32 : VOP3Inst <vop3<0x14f, 0x1cf>, "v_alignbyte_b32",
+  VOP_I32_I32_I32_I32
+>;
+
+defm V_MIN3_F32 : VOP3Inst <vop3<0x151, 0x1d0>, "v_min3_f32",
+  VOP_F32_F32_F32_F32, AMDGPUfmin3>;
+
+defm V_MIN3_I32 : VOP3Inst <vop3<0x152, 0x1d1>, "v_min3_i32",
+  VOP_I32_I32_I32_I32, AMDGPUsmin3
+>;
+defm V_MIN3_U32 : VOP3Inst <vop3<0x153, 0x1d2>, "v_min3_u32",
+  VOP_I32_I32_I32_I32, AMDGPUumin3
+>;
+defm V_MAX3_F32 : VOP3Inst <vop3<0x154, 0x1d3>, "v_max3_f32",
+  VOP_F32_F32_F32_F32, AMDGPUfmax3
+>;
+defm V_MAX3_I32 : VOP3Inst <vop3<0x155, 0x1d4>, "v_max3_i32",
+  VOP_I32_I32_I32_I32, AMDGPUsmax3
+>;
+defm V_MAX3_U32 : VOP3Inst <vop3<0x156, 0x1d5>, "v_max3_u32",
+  VOP_I32_I32_I32_I32, AMDGPUumax3
+>;
+defm V_MED3_F32 : VOP3Inst <vop3<0x157, 0x1d6>, "v_med3_f32",
+  VOP_F32_F32_F32_F32
+>;
+defm V_MED3_I32 : VOP3Inst <vop3<0x158, 0x1d7>, "v_med3_i32",
+  VOP_I32_I32_I32_I32
+>;
+defm V_MED3_U32 : VOP3Inst <vop3<0x159, 0x1d8>, "v_med3_u32",
+  VOP_I32_I32_I32_I32
+>;
+
+//def V_SAD_U8 : VOP3_U8 <0x0000015a, "v_sad_u8", []>;
+//def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "v_sad_hi_u8", []>;
+//def V_SAD_U16 : VOP3_U16 <0x0000015c, "v_sad_u16", []>;
+defm V_SAD_U32 : VOP3Inst <vop3<0x15d, 0x1dc>, "v_sad_u32",
+  VOP_I32_I32_I32_I32
+>;
+////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "v_cvt_pk_u8_f32", []>;
+defm V_DIV_FIXUP_F32 : VOP3Inst <
+  vop3<0x15f, 0x1de>, "v_div_fixup_f32", VOP_F32_F32_F32_F32, AMDGPUdiv_fixup
+>;
+
+let SchedRW = [WriteDouble] in {
+
+defm V_DIV_FIXUP_F64 : VOP3Inst <
+  vop3<0x160, 0x1df>, "v_div_fixup_f64", VOP_F64_F64_F64_F64, AMDGPUdiv_fixup
+>;
+
+} // let SchedRW = [WriteDouble]
+
+let SchedRW = [WriteDouble] in {
+let isCommutable = 1 in {
+
+defm V_ADD_F64 : VOP3Inst <vop3<0x164, 0x280>, "v_add_f64",
+  VOP_F64_F64_F64, fadd
+>;
+defm V_MUL_F64 : VOP3Inst <vop3<0x165, 0x281>, "v_mul_f64",
+  VOP_F64_F64_F64, fmul
+>;
+
+defm V_MIN_F64 : VOP3Inst <vop3<0x166, 0x282>, "v_min_f64",
+  VOP_F64_F64_F64, fminnum
+>;
+defm V_MAX_F64 : VOP3Inst <vop3<0x167, 0x283>, "v_max_f64",
+  VOP_F64_F64_F64, fmaxnum
+>;
+
+} // isCommutable = 1
+
+defm V_LDEXP_F64 : VOP3Inst <vop3<0x168, 0x284>, "v_ldexp_f64",
+  VOP_F64_F64_I32, AMDGPUldexp
+>;
+
+} // let SchedRW = [WriteDouble]
+
+let isCommutable = 1, SchedRW = [WriteQuarterRate32] in {
+
+defm V_MUL_LO_U32 : VOP3Inst <vop3<0x169, 0x285>, "v_mul_lo_u32",
+  VOP_I32_I32_I32
+>;
+defm V_MUL_HI_U32 : VOP3Inst <vop3<0x16a, 0x286>, "v_mul_hi_u32",
+  VOP_I32_I32_I32
+>;
+
+defm V_MUL_LO_I32 : VOP3Inst <vop3<0x16b, 0x285>, "v_mul_lo_i32",
+  VOP_I32_I32_I32
+>;
+defm V_MUL_HI_I32 : VOP3Inst <vop3<0x16c, 0x287>, "v_mul_hi_i32",
+  VOP_I32_I32_I32
+>;
+
+} // isCommutable = 1, SchedRW = [WriteQuarterRate32]
+
+let SchedRW = [WriteFloatFMA, WriteSALU] in {
+defm V_DIV_SCALE_F32 : VOP3b_32 <vop3<0x16d, 0x1e0>, "v_div_scale_f32", []>;
+}
+
+let SchedRW = [WriteDouble, WriteSALU] in {
+// Double precision division pre-scale.
+defm V_DIV_SCALE_F64 : VOP3b_64 <vop3<0x16e, 0x1e1>, "v_div_scale_f64", []>;
+} // let SchedRW = [WriteDouble]
+
+let isCommutable = 1, Uses = [VCC] in {
+
+// v_div_fmas_f32:
+//   result = src0 * src1 + src2
+//   if (vcc)
+//     result *= 2^32
+//
+defm V_DIV_FMAS_F32 : VOP3_VCC_Inst <vop3<0x16f, 0x1e2>, "v_div_fmas_f32",
+  VOP_F32_F32_F32_F32, AMDGPUdiv_fmas
+>;
+
+let SchedRW = [WriteDouble] in {
+// v_div_fmas_f64:
+//   result = src0 * src1 + src2
+//   if (vcc)
+//     result *= 2^64
+//
+defm V_DIV_FMAS_F64 : VOP3_VCC_Inst <vop3<0x170, 0x1e3>, "v_div_fmas_f64",
+  VOP_F64_F64_F64_F64, AMDGPUdiv_fmas
+>;
+
+} // End SchedRW = [WriteDouble]
+} // End isCommutable = 1
+
+//def V_MSAD_U8 : VOP3_U8 <0x00000171, "v_msad_u8", []>;
+//def V_QSAD_U8 : VOP3_U8 <0x00000172, "v_qsad_u8", []>;
+//def V_MQSAD_U8 : VOP3_U8 <0x00000173, "v_mqsad_u8", []>;
+
+let SchedRW = [WriteDouble] in {
+defm V_TRIG_PREOP_F64 : VOP3Inst <
+  vop3<0x174, 0x292>, "v_trig_preop_f64", VOP_F64_F64_I32, AMDGPUtrig_preop
+>;
+
+} // let SchedRW = [WriteDouble]
+
+// These instructions only exist on SI and CI
+let SubtargetPredicate = isSICI in {
+
+defm V_LSHL_B64 : VOP3Inst <vop3<0x161>, "v_lshl_b64", VOP_I64_I64_I32>;
+defm V_LSHR_B64 : VOP3Inst <vop3<0x162>, "v_lshr_b64", VOP_I64_I64_I32>;
+defm V_ASHR_I64 : VOP3Inst <vop3<0x163>, "v_ashr_i64", VOP_I64_I64_I32>;
+
+defm V_MULLIT_F32 : VOP3Inst <vop3<0x150>, "v_mullit_f32",
+  VOP_F32_F32_F32_F32>;
+
+} // End SubtargetPredicate = isSICI
+
+let SubtargetPredicate = isVI in {
+
+defm V_LSHLREV_B64 : VOP3Inst <vop3<0, 0x28f>, "v_lshlrev_b64",
+  VOP_I64_I32_I64
+>;
+defm V_LSHRREV_B64 : VOP3Inst <vop3<0, 0x290>, "v_lshrrev_b64",
+  VOP_I64_I32_I64
+>;
+defm V_ASHRREV_I64 : VOP3Inst <vop3<0, 0x291>, "v_ashrrev_i64",
+  VOP_I64_I32_I64
+>;
+
+} // End SubtargetPredicate = isVI
+
+//===----------------------------------------------------------------------===//
+// Pseudo Instructions
+//===----------------------------------------------------------------------===//
+let isCodeGenOnly = 1, isPseudo = 1 in {
+
+// For use in patterns
+def V_CNDMASK_B64_PSEUDO : VOP3Common <(outs VReg_64:$dst),
+  (ins VSrc_64:$src0, VSrc_64:$src1, SSrc_64:$src2), "", []
+>;
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+// 64-bit vector move instruction.  This is mainly used by the SIFoldOperands
+// pass to enable folding of inline immediates.
+def V_MOV_B64_PSEUDO : InstSI <(outs VReg_64:$dst), (ins VSrc_64:$src0), "", []>;
+} // end let hasSideEffects = 0, mayLoad = 0, mayStore = 0
+
+let hasSideEffects = 1 in {
+def SGPR_USE : InstSI <(outs),(ins), "", []>;
+}
+
+// SI pseudo instructions. These are used by the CFG structurizer pass
+// and should be lowered to ISA instructions prior to codegen.
+
+let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in {
+let Uses = [EXEC], Defs = [EXEC] in {
+
+let isBranch = 1, isTerminator = 1 in {
+
+def SI_IF: InstSI <
+  (outs SReg_64:$dst),
+  (ins SReg_64:$vcc, brtarget:$target),
+  "",
+  [(set i64:$dst, (int_SI_if i1:$vcc, bb:$target))]
+>;
+
+def SI_ELSE : InstSI <
+  (outs SReg_64:$dst),
+  (ins SReg_64:$src, brtarget:$target),
+  "",
+  [(set i64:$dst, (int_SI_else i64:$src, bb:$target))]
+> {
+  let Constraints = "$src = $dst";
+}
+
+def SI_LOOP : InstSI <
+  (outs),
+  (ins SReg_64:$saved, brtarget:$target),
+  "si_loop $saved, $target",
+  [(int_SI_loop i64:$saved, bb:$target)]
+>;
+
+} // end isBranch = 1, isTerminator = 1
+
+def SI_BREAK : InstSI <
+  (outs SReg_64:$dst),
+  (ins SReg_64:$src),
+  "si_else $dst, $src",
+  [(set i64:$dst, (int_SI_break i64:$src))]
+>;
+
+def SI_IF_BREAK : InstSI <
+  (outs SReg_64:$dst),
+  (ins SReg_64:$vcc, SReg_64:$src),
+  "si_if_break $dst, $vcc, $src",
+  [(set i64:$dst, (int_SI_if_break i1:$vcc, i64:$src))]
+>;
+
+def SI_ELSE_BREAK : InstSI <
+  (outs SReg_64:$dst),
+  (ins SReg_64:$src0, SReg_64:$src1),
+  "si_else_break $dst, $src0, $src1",
+  [(set i64:$dst, (int_SI_else_break i64:$src0, i64:$src1))]
+>;
+
+def SI_END_CF : InstSI <
+  (outs),
+  (ins SReg_64:$saved),
+  "si_end_cf $saved",
+  [(int_SI_end_cf i64:$saved)]
+>;
+
+} // End Uses = [EXEC], Defs = [EXEC]
+
+let Uses = [EXEC], Defs = [EXEC,VCC] in {
+def SI_KILL : InstSI <
+  (outs),
+  (ins VSrc_32:$src),
+  "si_kill $src",
+  [(int_AMDGPU_kill f32:$src)]
+>;
+} // End Uses = [EXEC], Defs = [EXEC,VCC]
+
+} // end mayLoad = 1, mayStore = 1, hasSideEffects = 1
+
+let Uses = [EXEC], Defs = [EXEC,VCC,M0] in {
+
+//defm SI_ : RegisterLoadStore <VGPR_32, FRAMEri, ADDRIndirect>;
+
+let UseNamedOperandTable = 1 in {
+
+def SI_RegisterLoad : InstSI <
+  (outs VGPR_32:$dst, SReg_64:$temp),
+  (ins FRAMEri32:$addr, i32imm:$chan),
+  "", []
+> {
+  let isRegisterLoad = 1;
+  let mayLoad = 1;
+}
+
+class SIRegStore<dag outs> : InstSI <
+  outs,
+  (ins VGPR_32:$val, FRAMEri32:$addr, i32imm:$chan),
+  "", []
+> {
+  let isRegisterStore = 1;
+  let mayStore = 1;
+}
+
+let usesCustomInserter = 1 in {
+def SI_RegisterStorePseudo : SIRegStore<(outs)>;
+} // End usesCustomInserter = 1
+def SI_RegisterStore : SIRegStore<(outs SReg_64:$temp)>;
+
+
+} // End UseNamedOperandTable = 1
+
+def SI_INDIRECT_SRC : InstSI <
+  (outs VGPR_32:$dst, SReg_64:$temp),
+  (ins unknown:$src, VSrc_32:$idx, i32imm:$off),
+  "si_indirect_src $dst, $temp, $src, $idx, $off",
+  []
+>;
+
+class SI_INDIRECT_DST<RegisterClass rc> : InstSI <
+  (outs rc:$dst, SReg_64:$temp),
+  (ins unknown:$src, VSrc_32:$idx, i32imm:$off, VGPR_32:$val),
+  "si_indirect_dst $dst, $temp, $src, $idx, $off, $val",
+  []
+> {
+  let Constraints = "$src = $dst";
+}
+
+def SI_INDIRECT_DST_V1 : SI_INDIRECT_DST<VGPR_32>;
+def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST<VReg_64>;
+def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>;
+def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>;
+def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
+
+} // Uses = [EXEC,VCC,M0], Defs = [EXEC,VCC,M0]
+
+multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
+
+  let UseNamedOperandTable = 1 in {
+    def _SAVE : InstSI <
+      (outs),
+      (ins sgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
+           SReg_32:$scratch_offset),
+      "", []
+    >;
+
+    def _RESTORE : InstSI <
+      (outs sgpr_class:$dst),
+      (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
+      "", []
+    >;
+  } // End UseNamedOperandTable = 1
+}
+
+// It's unclear whether you can use M0 as the output of v_readlane_b32
+// instructions, so use SGPR_32 register class for spills to prevent
+// this from happening.
+defm SI_SPILL_S32  : SI_SPILL_SGPR <SGPR_32>;
+defm SI_SPILL_S64  : SI_SPILL_SGPR <SReg_64>;
+defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>;
+defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
+defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
+
+multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
+  let UseNamedOperandTable = 1, VGPRSpill = 1 in {
+    def _SAVE : InstSI <
+      (outs),
+      (ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
+           SReg_32:$scratch_offset),
+      "", []
+    >;
+
+    def _RESTORE : InstSI <
+      (outs vgpr_class:$dst),
+      (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
+      "", []
+    >;
+  } // End UseNamedOperandTable = 1, VGPRSpill = 1
+}
+
+defm SI_SPILL_V32  : SI_SPILL_VGPR <VGPR_32>;
+defm SI_SPILL_V64  : SI_SPILL_VGPR <VReg_64>;
+defm SI_SPILL_V96  : SI_SPILL_VGPR <VReg_96>;
+defm SI_SPILL_V128 : SI_SPILL_VGPR <VReg_128>;
+defm SI_SPILL_V256 : SI_SPILL_VGPR <VReg_256>;
+defm SI_SPILL_V512 : SI_SPILL_VGPR <VReg_512>;
+
+let Defs = [SCC] in {
+
+def SI_CONSTDATA_PTR : InstSI <
+  (outs SReg_64:$dst),
+  (ins),
+  "", [(set SReg_64:$dst, (i64 SIconstdata_ptr))]
+>;
+
+} // End Defs = [SCC]
+
+} // end IsCodeGenOnly, isPseudo
+
+} // end SubtargetPredicate = isGCN
+
+let Predicates = [isGCN] in {
+
+def : Pat<
+  (int_AMDGPU_cndlt f32:$src0, f32:$src1, f32:$src2),
+  (V_CNDMASK_B32_e64 $src2, $src1,
+                     (V_CMP_GT_F32_e64 SRCMODS.NONE, 0, SRCMODS.NONE, $src0,
+                                       DSTCLAMP.NONE, DSTOMOD.NONE))
+>;
+
+def : Pat <
+  (int_AMDGPU_kilp),
+  (SI_KILL 0xbf800000)
+>;
+
+/* int_SI_vs_load_input */
+def : Pat<
+  (SIload_input v4i32:$tlst, imm:$attr_offset, i32:$buf_idx_vgpr),
+  (BUFFER_LOAD_FORMAT_XYZW_IDXEN $buf_idx_vgpr, $tlst, 0, imm:$attr_offset, 0, 0, 0)
+>;
+
+/* int_SI_export */
+def : Pat <
+  (int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr,
+                 f32:$src0, f32:$src1, f32:$src2, f32:$src3),
+  (EXP imm:$en, imm:$tgt, imm:$compr, imm:$done, imm:$vm,
+       $src0, $src1, $src2, $src3)
+>;
+
+//===----------------------------------------------------------------------===//
+// SMRD Patterns
+//===----------------------------------------------------------------------===//
+
+multiclass SMRD_Pattern <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> {
+
+  // 1. SI-CI: Offset as 8bit DWORD immediate
+  def : Pat <
+    (constant_load (add i64:$sbase, (i64 IMM8bitDWORD:$offset))),
+    (vt (Instr_IMM $sbase, (as_dword_i32imm $offset)))
+  >;
+
+  // 2. Offset loaded in an 32bit SGPR
+  def : Pat <
+    (constant_load (add i64:$sbase, (i64 IMM32bit:$offset))),
+    (vt (Instr_SGPR $sbase, (S_MOV_B32 (i32 (as_i32imm $offset)))))
+  >;
+
+  // 3. No offset at all
+  def : Pat <
+    (constant_load i64:$sbase),
+    (vt (Instr_IMM $sbase, 0))
+  >;
+}
+
+multiclass SMRD_Pattern_vi <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> {
+
+  // 1. VI: Offset as 20bit immediate in bytes
+  def : Pat <
+    (constant_load (add i64:$sbase, (i64 IMM20bit:$offset))),
+    (vt (Instr_IMM $sbase, (as_i32imm $offset)))
+  >;
+
+  // 2. Offset loaded in an 32bit SGPR
+  def : Pat <
+    (constant_load (add i64:$sbase, (i64 IMM32bit:$offset))),
+    (vt (Instr_SGPR $sbase, (S_MOV_B32 (i32 (as_i32imm $offset)))))
+  >;
+
+  // 3. No offset at all
+  def : Pat <
+    (constant_load i64:$sbase),
+    (vt (Instr_IMM $sbase, 0))
+  >;
+}
+
+let Predicates = [isSICI] in {
+defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>;
+defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>;
+defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, v2i32>;
+defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v4i32>;
+defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
+defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v8i32>;
+defm : SMRD_Pattern <S_LOAD_DWORDX16_IMM, S_LOAD_DWORDX16_SGPR, v16i32>;
+} // End Predicates = [isSICI]
+
+let Predicates = [isVI] in {
+defm : SMRD_Pattern_vi <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>;
+defm : SMRD_Pattern_vi <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>;
+defm : SMRD_Pattern_vi <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, v2i32>;
+defm : SMRD_Pattern_vi <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v4i32>;
+defm : SMRD_Pattern_vi <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
+defm : SMRD_Pattern_vi <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v8i32>;
+defm : SMRD_Pattern_vi <S_LOAD_DWORDX16_IMM, S_LOAD_DWORDX16_SGPR, v16i32>;
+} // End Predicates = [isVI]
+
+let Predicates = [isSICI] in {
+
+// 1. Offset as 8bit DWORD immediate
+def : Pat <
+  (SIload_constant v4i32:$sbase, IMM8bitDWORD:$offset),
+  (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_dword_i32imm $offset))
+>;
+
+} // End Predicates = [isSICI]
+
+// 2. Offset loaded in an 32bit SGPR
+def : Pat <
+  (SIload_constant v4i32:$sbase, imm:$offset),
+  (S_BUFFER_LOAD_DWORD_SGPR $sbase, (S_MOV_B32 imm:$offset))
+>;
+
+//===----------------------------------------------------------------------===//
+// SOP1 Patterns
+//===----------------------------------------------------------------------===//
+
+def : Pat <
+  (i64 (ctpop i64:$src)),
+    (i64 (REG_SEQUENCE SReg_64,
+     (S_BCNT1_I32_B64 $src), sub0,
+     (S_MOV_B32 0), sub1))
+>;
+
+//===----------------------------------------------------------------------===//
+// SOP2 Patterns
+//===----------------------------------------------------------------------===//
+
+// V_ADD_I32_e32/S_ADD_U32 produces carry in VCC/SCC. For the vector
+// case, the sgpr-copies pass will fix this to use the vector version.
+def : Pat <
+  (i32 (addc i32:$src0, i32:$src1)),
+  (S_ADD_U32 $src0, $src1)
+>;
+
+//===----------------------------------------------------------------------===//
+// SOPP Patterns
+//===----------------------------------------------------------------------===//
+
+def : Pat <
+  (int_AMDGPU_barrier_global),
+  (S_BARRIER)
+>;
+
+//===----------------------------------------------------------------------===//
+// VOP1 Patterns
+//===----------------------------------------------------------------------===//
+
+let Predicates = [UnsafeFPMath] in {
+
+//def : RcpPat<V_RCP_F64_e32, f64>;
+//defm : RsqPat<V_RSQ_F64_e32, f64>;
+//defm : RsqPat<V_RSQ_F32_e32, f32>;
+
+def : RsqPat<V_RSQ_F32_e32, f32>;
+def : RsqPat<V_RSQ_F64_e32, f64>;
+}
+
+//===----------------------------------------------------------------------===//
+// VOP2 Patterns
+//===----------------------------------------------------------------------===//
+
+def : Pat <
+  (i32 (add (i32 (ctpop i32:$popcnt)), i32:$val)),
+  (V_BCNT_U32_B32_e64 $popcnt, $val)
+>;
+
+def : Pat <
+  (i32 (select i1:$src0, i32:$src1, i32:$src2)),
+  (V_CNDMASK_B32_e64 $src2, $src1, $src0)
+>;
+
+/********** ======================= **********/
+/********** Image sampling patterns **********/
+/********** ======================= **********/
+
+// Image + sampler
+class SampleRawPattern<SDPatternOperator name, MIMG opcode, ValueType vt> : Pat <
+  (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, i32:$dmask, i32:$unorm,
+        i32:$r128, i32:$da, i32:$glc, i32:$slc, i32:$tfe, i32:$lwe),
+  (opcode (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $da),
+          (as_i1imm $r128), (as_i1imm $tfe), (as_i1imm $lwe), (as_i1imm $slc),
+          $addr, $rsrc, $sampler)
+>;
+
+multiclass SampleRawPatterns<SDPatternOperator name, string opcode> {
+  def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V1), i32>;
+  def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V2), v2i32>;
+  def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V4), v4i32>;
+  def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V8), v8i32>;
+  def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V16), v16i32>;
+}
+
+// Image only
+class ImagePattern<SDPatternOperator name, MIMG opcode, ValueType vt> : Pat <
+  (name vt:$addr, v8i32:$rsrc, i32:$dmask, i32:$unorm,
+        i32:$r128, i32:$da, i32:$glc, i32:$slc, i32:$tfe, i32:$lwe),
+  (opcode (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $da),
+          (as_i1imm $r128), (as_i1imm $tfe), (as_i1imm $lwe), (as_i1imm $slc),
+          $addr, $rsrc)
+>;
+
+multiclass ImagePatterns<SDPatternOperator name, string opcode> {
+  def : ImagePattern<name, !cast<MIMG>(opcode # _V4_V1), i32>;
+  def : ImagePattern<name, !cast<MIMG>(opcode # _V4_V2), v2i32>;
+  def : ImagePattern<name, !cast<MIMG>(opcode # _V4_V4), v4i32>;
+}
+
+// Basic sample
+defm : SampleRawPatterns<int_SI_image_sample,           "IMAGE_SAMPLE">;
+defm : SampleRawPatterns<int_SI_image_sample_cl,        "IMAGE_SAMPLE_CL">;
+defm : SampleRawPatterns<int_SI_image_sample_d,         "IMAGE_SAMPLE_D">;
+defm : SampleRawPatterns<int_SI_image_sample_d_cl,      "IMAGE_SAMPLE_D_CL">;
+defm : SampleRawPatterns<int_SI_image_sample_l,         "IMAGE_SAMPLE_L">;
+defm : SampleRawPatterns<int_SI_image_sample_b,         "IMAGE_SAMPLE_B">;
+defm : SampleRawPatterns<int_SI_image_sample_b_cl,      "IMAGE_SAMPLE_B_CL">;
+defm : SampleRawPatterns<int_SI_image_sample_lz,        "IMAGE_SAMPLE_LZ">;
+defm : SampleRawPatterns<int_SI_image_sample_cd,        "IMAGE_SAMPLE_CD">;
+defm : SampleRawPatterns<int_SI_image_sample_cd_cl,     "IMAGE_SAMPLE_CD_CL">;
+
+// Sample with comparison
+defm : SampleRawPatterns<int_SI_image_sample_c,         "IMAGE_SAMPLE_C">;
+defm : SampleRawPatterns<int_SI_image_sample_c_cl,      "IMAGE_SAMPLE_C_CL">;
+defm : SampleRawPatterns<int_SI_image_sample_c_d,       "IMAGE_SAMPLE_C_D">;
+defm : SampleRawPatterns<int_SI_image_sample_c_d_cl,    "IMAGE_SAMPLE_C_D_CL">;
+defm : SampleRawPatterns<int_SI_image_sample_c_l,       "IMAGE_SAMPLE_C_L">;
+defm : SampleRawPatterns<int_SI_image_sample_c_b,       "IMAGE_SAMPLE_C_B">;
+defm : SampleRawPatterns<int_SI_image_sample_c_b_cl,    "IMAGE_SAMPLE_C_B_CL">;
+defm : SampleRawPatterns<int_SI_image_sample_c_lz,      "IMAGE_SAMPLE_C_LZ">;
+defm : SampleRawPatterns<int_SI_image_sample_c_cd,      "IMAGE_SAMPLE_C_CD">;
+defm : SampleRawPatterns<int_SI_image_sample_c_cd_cl,   "IMAGE_SAMPLE_C_CD_CL">;
+
+// Sample with offsets
+defm : SampleRawPatterns<int_SI_image_sample_o,         "IMAGE_SAMPLE_O">;
+defm : SampleRawPatterns<int_SI_image_sample_cl_o,      "IMAGE_SAMPLE_CL_O">;
+defm : SampleRawPatterns<int_SI_image_sample_d_o,       "IMAGE_SAMPLE_D_O">;
+defm : SampleRawPatterns<int_SI_image_sample_d_cl_o,    "IMAGE_SAMPLE_D_CL_O">;
+defm : SampleRawPatterns<int_SI_image_sample_l_o,       "IMAGE_SAMPLE_L_O">;
+defm : SampleRawPatterns<int_SI_image_sample_b_o,       "IMAGE_SAMPLE_B_O">;
+defm : SampleRawPatterns<int_SI_image_sample_b_cl_o,    "IMAGE_SAMPLE_B_CL_O">;
+defm : SampleRawPatterns<int_SI_image_sample_lz_o,      "IMAGE_SAMPLE_LZ_O">;
+defm : SampleRawPatterns<int_SI_image_sample_cd_o,      "IMAGE_SAMPLE_CD_O">;
+defm : SampleRawPatterns<int_SI_image_sample_cd_cl_o,   "IMAGE_SAMPLE_CD_CL_O">;
+
+// Sample with comparison and offsets
+defm : SampleRawPatterns<int_SI_image_sample_c_o,       "IMAGE_SAMPLE_C_O">;
+defm : SampleRawPatterns<int_SI_image_sample_c_cl_o,    "IMAGE_SAMPLE_C_CL_O">;
+defm : SampleRawPatterns<int_SI_image_sample_c_d_o,     "IMAGE_SAMPLE_C_D_O">;
+defm : SampleRawPatterns<int_SI_image_sample_c_d_cl_o,  "IMAGE_SAMPLE_C_D_CL_O">;
+defm : SampleRawPatterns<int_SI_image_sample_c_l_o,     "IMAGE_SAMPLE_C_L_O">;
+defm : SampleRawPatterns<int_SI_image_sample_c_b_o,     "IMAGE_SAMPLE_C_B_O">;
+defm : SampleRawPatterns<int_SI_image_sample_c_b_cl_o,  "IMAGE_SAMPLE_C_B_CL_O">;
+defm : SampleRawPatterns<int_SI_image_sample_c_lz_o,    "IMAGE_SAMPLE_C_LZ_O">;
+defm : SampleRawPatterns<int_SI_image_sample_c_cd_o,    "IMAGE_SAMPLE_C_CD_O">;
+defm : SampleRawPatterns<int_SI_image_sample_c_cd_cl_o, "IMAGE_SAMPLE_C_CD_CL_O">;
+
+// Gather opcodes
+// Only the variants which make sense are defined.
+def : SampleRawPattern<int_SI_gather4,           IMAGE_GATHER4_V4_V2,        v2i32>;
+def : SampleRawPattern<int_SI_gather4,           IMAGE_GATHER4_V4_V4,        v4i32>;
+def : SampleRawPattern<int_SI_gather4_cl,        IMAGE_GATHER4_CL_V4_V4,     v4i32>;
+def : SampleRawPattern<int_SI_gather4_l,         IMAGE_GATHER4_L_V4_V4,      v4i32>;
+def : SampleRawPattern<int_SI_gather4_b,         IMAGE_GATHER4_B_V4_V4,      v4i32>;
+def : SampleRawPattern<int_SI_gather4_b_cl,      IMAGE_GATHER4_B_CL_V4_V4,   v4i32>;
+def : SampleRawPattern<int_SI_gather4_b_cl,      IMAGE_GATHER4_B_CL_V4_V8,   v8i32>;
+def : SampleRawPattern<int_SI_gather4_lz,        IMAGE_GATHER4_LZ_V4_V2,     v2i32>;
+def : SampleRawPattern<int_SI_gather4_lz,        IMAGE_GATHER4_LZ_V4_V4,     v4i32>;
+
+def : SampleRawPattern<int_SI_gather4_c,         IMAGE_GATHER4_C_V4_V4,      v4i32>;
+def : SampleRawPattern<int_SI_gather4_c_cl,      IMAGE_GATHER4_C_CL_V4_V4,   v4i32>;
+def : SampleRawPattern<int_SI_gather4_c_cl,      IMAGE_GATHER4_C_CL_V4_V8,   v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_l,       IMAGE_GATHER4_C_L_V4_V4,    v4i32>;
+def : SampleRawPattern<int_SI_gather4_c_l,       IMAGE_GATHER4_C_L_V4_V8,    v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_b,       IMAGE_GATHER4_C_B_V4_V4,    v4i32>;
+def : SampleRawPattern<int_SI_gather4_c_b,       IMAGE_GATHER4_C_B_V4_V8,    v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_b_cl,    IMAGE_GATHER4_C_B_CL_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_lz,      IMAGE_GATHER4_C_LZ_V4_V4,   v4i32>;
+
+def : SampleRawPattern<int_SI_gather4_o,         IMAGE_GATHER4_O_V4_V4,      v4i32>;
+def : SampleRawPattern<int_SI_gather4_cl_o,      IMAGE_GATHER4_CL_O_V4_V4,   v4i32>;
+def : SampleRawPattern<int_SI_gather4_cl_o,      IMAGE_GATHER4_CL_O_V4_V8,   v8i32>;
+def : SampleRawPattern<int_SI_gather4_l_o,       IMAGE_GATHER4_L_O_V4_V4,    v4i32>;
+def : SampleRawPattern<int_SI_gather4_l_o,       IMAGE_GATHER4_L_O_V4_V8,    v8i32>;
+def : SampleRawPattern<int_SI_gather4_b_o,       IMAGE_GATHER4_B_O_V4_V4,    v4i32>;
+def : SampleRawPattern<int_SI_gather4_b_o,       IMAGE_GATHER4_B_O_V4_V8,    v8i32>;
+def : SampleRawPattern<int_SI_gather4_b_cl_o,    IMAGE_GATHER4_B_CL_O_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_lz_o,      IMAGE_GATHER4_LZ_O_V4_V4,   v4i32>;
+
+def : SampleRawPattern<int_SI_gather4_c_o,       IMAGE_GATHER4_C_O_V4_V4,    v4i32>;
+def : SampleRawPattern<int_SI_gather4_c_o,       IMAGE_GATHER4_C_O_V4_V8,    v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_cl_o,    IMAGE_GATHER4_C_CL_O_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_l_o,     IMAGE_GATHER4_C_L_O_V4_V8,  v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_b_o,     IMAGE_GATHER4_C_B_O_V4_V8,  v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_b_cl_o,  IMAGE_GATHER4_C_B_CL_O_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_lz_o,    IMAGE_GATHER4_C_LZ_O_V4_V4, v4i32>;
+def : SampleRawPattern<int_SI_gather4_c_lz_o,    IMAGE_GATHER4_C_LZ_O_V4_V8, v8i32>;
+
+def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V1, i32>;
+def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V2, v2i32>;
+def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V4, v4i32>;
+
+def : ImagePattern<int_SI_getresinfo, IMAGE_GET_RESINFO_V4_V1, i32>;
+defm : ImagePatterns<int_SI_image_load, "IMAGE_LOAD">;
+defm : ImagePatterns<int_SI_image_load_mip, "IMAGE_LOAD_MIP">;
+
+/* SIsample for simple 1D texture lookup */
+def : Pat <
+  (SIsample i32:$addr, v32i8:$rsrc, v4i32:$sampler, imm),
+  (IMAGE_SAMPLE_V4_V1 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
+>;
+
+class SamplePattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
+    (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, imm),
+    (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
+>;
+
+class SampleRectPattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
+    (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_RECT),
+    (opcode 0xf, 1, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
+>;
+
+class SampleArrayPattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
+    (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_ARRAY),
+    (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler)
+>;
+
+class SampleShadowPattern<SDNode name, MIMG opcode,
+                          ValueType vt> : Pat <
+    (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_SHADOW),
+    (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
+>;
+
+class SampleShadowArrayPattern<SDNode name, MIMG opcode,
+                               ValueType vt> : Pat <
+    (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_SHADOW_ARRAY),
+    (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler)
+>;
+
+/* SIsample* for texture lookups consuming more address parameters */
+multiclass SamplePatterns<MIMG sample, MIMG sample_c, MIMG sample_l,
+                          MIMG sample_c_l, MIMG sample_b, MIMG sample_c_b,
+MIMG sample_d, MIMG sample_c_d, ValueType addr_type> {
+  def : SamplePattern <SIsample, sample, addr_type>;
+  def : SampleRectPattern <SIsample, sample, addr_type>;
+  def : SampleArrayPattern <SIsample, sample, addr_type>;
+  def : SampleShadowPattern <SIsample, sample_c, addr_type>;
+  def : SampleShadowArrayPattern <SIsample, sample_c, addr_type>;
+
+  def : SamplePattern <SIsamplel, sample_l, addr_type>;
+  def : SampleArrayPattern <SIsamplel, sample_l, addr_type>;
+  def : SampleShadowPattern <SIsamplel, sample_c_l, addr_type>;
+  def : SampleShadowArrayPattern <SIsamplel, sample_c_l, addr_type>;
+
+  def : SamplePattern <SIsampleb, sample_b, addr_type>;
+  def : SampleArrayPattern <SIsampleb, sample_b, addr_type>;
+  def : SampleShadowPattern <SIsampleb, sample_c_b, addr_type>;
+  def : SampleShadowArrayPattern <SIsampleb, sample_c_b, addr_type>;
+
+  def : SamplePattern <SIsampled, sample_d, addr_type>;
+  def : SampleArrayPattern <SIsampled, sample_d, addr_type>;
+  def : SampleShadowPattern <SIsampled, sample_c_d, addr_type>;
+  def : SampleShadowArrayPattern <SIsampled, sample_c_d, addr_type>;
+}
+
+defm : SamplePatterns<IMAGE_SAMPLE_V4_V2, IMAGE_SAMPLE_C_V4_V2,
+                      IMAGE_SAMPLE_L_V4_V2, IMAGE_SAMPLE_C_L_V4_V2,
+                      IMAGE_SAMPLE_B_V4_V2, IMAGE_SAMPLE_C_B_V4_V2,
+                      IMAGE_SAMPLE_D_V4_V2, IMAGE_SAMPLE_C_D_V4_V2,
+                      v2i32>;
+defm : SamplePatterns<IMAGE_SAMPLE_V4_V4, IMAGE_SAMPLE_C_V4_V4,
+                      IMAGE_SAMPLE_L_V4_V4, IMAGE_SAMPLE_C_L_V4_V4,
+                      IMAGE_SAMPLE_B_V4_V4, IMAGE_SAMPLE_C_B_V4_V4,
+                      IMAGE_SAMPLE_D_V4_V4, IMAGE_SAMPLE_C_D_V4_V4,
+                      v4i32>;
+defm : SamplePatterns<IMAGE_SAMPLE_V4_V8, IMAGE_SAMPLE_C_V4_V8,
+                      IMAGE_SAMPLE_L_V4_V8, IMAGE_SAMPLE_C_L_V4_V8,
+                      IMAGE_SAMPLE_B_V4_V8, IMAGE_SAMPLE_C_B_V4_V8,
+                      IMAGE_SAMPLE_D_V4_V8, IMAGE_SAMPLE_C_D_V4_V8,
+                      v8i32>;
+defm : SamplePatterns<IMAGE_SAMPLE_V4_V16, IMAGE_SAMPLE_C_V4_V16,
+                      IMAGE_SAMPLE_L_V4_V16, IMAGE_SAMPLE_C_L_V4_V16,
+                      IMAGE_SAMPLE_B_V4_V16, IMAGE_SAMPLE_C_B_V4_V16,
+                      IMAGE_SAMPLE_D_V4_V16, IMAGE_SAMPLE_C_D_V4_V16,
+                      v16i32>;
+
+/* int_SI_imageload for texture fetches consuming varying address parameters */
+class ImageLoadPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat <
+    (name addr_type:$addr, v32i8:$rsrc, imm),
+    (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc)
+>;
+
+class ImageLoadArrayPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat <
+    (name addr_type:$addr, v32i8:$rsrc, TEX_ARRAY),
+    (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc)
+>;
+
+class ImageLoadMSAAPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat <
+    (name addr_type:$addr, v32i8:$rsrc, TEX_MSAA),
+    (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc)
+>;
+
+class ImageLoadArrayMSAAPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat <
+    (name addr_type:$addr, v32i8:$rsrc, TEX_ARRAY_MSAA),
+    (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc)
+>;
+
+multiclass ImageLoadPatterns<MIMG opcode, ValueType addr_type> {
+  def : ImageLoadPattern <int_SI_imageload, opcode, addr_type>;
+  def : ImageLoadArrayPattern <int_SI_imageload, opcode, addr_type>;
+}
+
+multiclass ImageLoadMSAAPatterns<MIMG opcode, ValueType addr_type> {
+  def : ImageLoadMSAAPattern <int_SI_imageload, opcode, addr_type>;
+  def : ImageLoadArrayMSAAPattern <int_SI_imageload, opcode, addr_type>;
+}
+
+defm : ImageLoadPatterns<IMAGE_LOAD_MIP_V4_V2, v2i32>;
+defm : ImageLoadPatterns<IMAGE_LOAD_MIP_V4_V4, v4i32>;
+
+defm : ImageLoadMSAAPatterns<IMAGE_LOAD_V4_V2, v2i32>;
+defm : ImageLoadMSAAPatterns<IMAGE_LOAD_V4_V4, v4i32>;
+
+/* Image resource information */
+def : Pat <
+  (int_SI_resinfo i32:$mipid, v32i8:$rsrc, imm),
+  (IMAGE_GET_RESINFO_V4_V1 0xf, 0, 0, 0, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc)
+>;
+
+def : Pat <
+  (int_SI_resinfo i32:$mipid, v32i8:$rsrc, TEX_ARRAY),
+  (IMAGE_GET_RESINFO_V4_V1 0xf, 0, 0, 1, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc)
+>;
+
+def : Pat <
+  (int_SI_resinfo i32:$mipid, v32i8:$rsrc, TEX_ARRAY_MSAA),
+  (IMAGE_GET_RESINFO_V4_V1 0xf, 0, 0, 1, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc)
+>;
+
+/********** ============================================ **********/
+/********** Extraction, Insertion, Building and Casting  **********/
+/********** ============================================ **********/
+
+foreach Index = 0-2 in {
+  def Extract_Element_v2i32_#Index : Extract_Element <
+    i32, v2i32, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+  def Insert_Element_v2i32_#Index : Insert_Element <
+    i32, v2i32, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+
+  def Extract_Element_v2f32_#Index : Extract_Element <
+    f32, v2f32, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+  def Insert_Element_v2f32_#Index : Insert_Element <
+    f32, v2f32, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+}
+
+foreach Index = 0-3 in {
+  def Extract_Element_v4i32_#Index : Extract_Element <
+    i32, v4i32, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+  def Insert_Element_v4i32_#Index : Insert_Element <
+    i32, v4i32, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+
+  def Extract_Element_v4f32_#Index : Extract_Element <
+    f32, v4f32, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+  def Insert_Element_v4f32_#Index : Insert_Element <
+    f32, v4f32, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+}
+
+foreach Index = 0-7 in {
+  def Extract_Element_v8i32_#Index : Extract_Element <
+    i32, v8i32, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+  def Insert_Element_v8i32_#Index : Insert_Element <
+    i32, v8i32, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+
+  def Extract_Element_v8f32_#Index : Extract_Element <
+    f32, v8f32, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+  def Insert_Element_v8f32_#Index : Insert_Element <
+    f32, v8f32, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+}
+
+foreach Index = 0-15 in {
+  def Extract_Element_v16i32_#Index : Extract_Element <
+    i32, v16i32, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+  def Insert_Element_v16i32_#Index : Insert_Element <
+    i32, v16i32, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+
+  def Extract_Element_v16f32_#Index : Extract_Element <
+    f32, v16f32, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+  def Insert_Element_v16f32_#Index : Insert_Element <
+    f32, v16f32, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+}
+
+def : BitConvert <i32, f32, SReg_32>;
+def : BitConvert <i32, f32, VGPR_32>;
+
+def : BitConvert <f32, i32, SReg_32>;
+def : BitConvert <f32, i32, VGPR_32>;
+
+def : BitConvert <i64, f64, VReg_64>;
+
+def : BitConvert <f64, i64, VReg_64>;
+
+def : BitConvert <v2f32, v2i32, VReg_64>;
+def : BitConvert <v2i32, v2f32, VReg_64>;
+def : BitConvert <v2i32, i64, VReg_64>;
+def : BitConvert <i64, v2i32, VReg_64>;
+def : BitConvert <v2f32, i64, VReg_64>;
+def : BitConvert <i64, v2f32, VReg_64>;
+def : BitConvert <v2i32, f64, VReg_64>;
+def : BitConvert <f64, v2i32, VReg_64>;
+def : BitConvert <v4f32, v4i32, VReg_128>;
+def : BitConvert <v4i32, v4f32, VReg_128>;
+
+def : BitConvert <v8f32, v8i32, SReg_256>;
+def : BitConvert <v8i32, v8f32, SReg_256>;
+def : BitConvert <v8i32, v32i8, SReg_256>;
+def : BitConvert <v32i8, v8i32, SReg_256>;
+def : BitConvert <v8i32, v32i8, VReg_256>;
+def : BitConvert <v8i32, v8f32, VReg_256>;
+def : BitConvert <v8f32, v8i32, VReg_256>;
+def : BitConvert <v32i8, v8i32, VReg_256>;
+
+def : BitConvert <v16i32, v16f32, VReg_512>;
+def : BitConvert <v16f32, v16i32, VReg_512>;
+
+/********** =================== **********/
+/********** Src & Dst modifiers **********/
+/********** =================== **********/
+
+def : Pat <
+  (AMDGPUclamp (VOP3Mods0Clamp f32:$src0, i32:$src0_modifiers, i32:$omod),
+               (f32 FP_ZERO), (f32 FP_ONE)),
+  (V_ADD_F32_e64 $src0_modifiers, $src0, 0, 0, 1, $omod)
+>;
+
+/********** ================================ **********/
+/********** Floating point absolute/negative **********/
+/********** ================================ **********/
+
+// Prevent expanding both fneg and fabs.
+
+// FIXME: Should use S_OR_B32
+def : Pat <
+  (fneg (fabs f32:$src)),
+  (V_OR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Set sign bit */
+>;
+
+// FIXME: Should use S_OR_B32
+def : Pat <
+  (fneg (fabs f64:$src)),
+  (REG_SEQUENCE VReg_64,
+    (i32 (EXTRACT_SUBREG f64:$src, sub0)),
+    sub0,
+    (V_OR_B32_e32 (EXTRACT_SUBREG f64:$src, sub1),
+                  (V_MOV_B32_e32 0x80000000)), // Set sign bit.
+    sub1)
+>;
+
+def : Pat <
+  (fabs f32:$src),
+  (V_AND_B32_e32 $src, (V_MOV_B32_e32 0x7fffffff))
+>;
+
+def : Pat <
+  (fneg f32:$src),
+  (V_XOR_B32_e32 $src, (V_MOV_B32_e32 0x80000000))
+>;
+
+def : Pat <
+  (fabs f64:$src),
+  (REG_SEQUENCE VReg_64,
+    (i32 (EXTRACT_SUBREG f64:$src, sub0)),
+    sub0,
+    (V_AND_B32_e32 (EXTRACT_SUBREG f64:$src, sub1),
+                   (V_MOV_B32_e32 0x7fffffff)), // Set sign bit.
+     sub1)
+>;
+
+def : Pat <
+  (fneg f64:$src),
+  (REG_SEQUENCE VReg_64,
+    (i32 (EXTRACT_SUBREG f64:$src, sub0)),
+    sub0,
+    (V_XOR_B32_e32 (EXTRACT_SUBREG f64:$src, sub1),
+                   (V_MOV_B32_e32 0x80000000)),
+    sub1)
+>;
+
+/********** ================== **********/
+/********** Immediate Patterns **********/
+/********** ================== **********/
+
+def : Pat <
+  (SGPRImm<(i32 imm)>:$imm),
+  (S_MOV_B32 imm:$imm)
+>;
+
+def : Pat <
+  (SGPRImm<(f32 fpimm)>:$imm),
+  (S_MOV_B32 (f32 (bitcast_fpimm_to_i32 $imm)))
+>;
+
+def : Pat <
+  (i32 imm:$imm),
+  (V_MOV_B32_e32 imm:$imm)
+>;
+
+def : Pat <
+  (f32 fpimm:$imm),
+  (V_MOV_B32_e32 (f32 (bitcast_fpimm_to_i32 $imm)))
+>;
+
+def : Pat <
+  (i64 InlineImm<i64>:$imm),
+  (S_MOV_B64 InlineImm<i64>:$imm)
+>;
+
+// XXX - Should this use a s_cmp to set SCC?
+
+// Set to sign-extended 64-bit value (true = -1, false = 0)
+def : Pat <
+  (i1 imm:$imm),
+  (S_MOV_B64 (i64 (as_i64imm $imm)))
+>;
+
+def : Pat <
+  (f64 InlineFPImm<f64>:$imm),
+  (S_MOV_B64 (f64 (bitcast_fpimm_to_i64 InlineFPImm<f64>:$imm)))
+>;
+
+/********** ================== **********/
+/********** Intrinsic Patterns **********/
+/********** ================== **********/
+
+/* llvm.AMDGPU.pow */
+def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32>;
+
+def : Pat <
+  (int_AMDGPU_div f32:$src0, f32:$src1),
+  (V_MUL_LEGACY_F32_e32 $src0, (V_RCP_LEGACY_F32_e32 $src1))
+>;
+
+def : Pat <
+  (int_AMDGPU_cube v4f32:$src),
+  (REG_SEQUENCE VReg_128,
+    (V_CUBETC_F32 0 /* src0_modifiers */, (EXTRACT_SUBREG $src, sub0),
+                  0 /* src1_modifiers */, (EXTRACT_SUBREG $src, sub1),
+                  0 /* src2_modifiers */, (EXTRACT_SUBREG $src, sub2),
+                  0 /* clamp */, 0 /* omod */), sub0,
+    (V_CUBESC_F32 0 /* src0_modifiers */, (EXTRACT_SUBREG $src, sub0),
+                  0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub1),
+                  0 /* src2_modifiers */,(EXTRACT_SUBREG $src, sub2),
+                  0 /* clamp */, 0 /* omod */), sub1,
+    (V_CUBEMA_F32 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub0),
+                  0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub1),
+                  0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub2),
+                  0 /* clamp */, 0 /* omod */), sub2,
+    (V_CUBEID_F32 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub0),
+                  0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub1),
+                  0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub2),
+                  0 /* clamp */, 0 /* omod */), sub3)
+>;
+
+def : Pat <
+  (i32 (sext i1:$src0)),
+  (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src0)
+>;
+
+class Ext32Pat <SDNode ext> : Pat <
+  (i32 (ext i1:$src0)),
+  (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src0)
+>;
+
+def : Ext32Pat <zext>;
+def : Ext32Pat <anyext>;
+
+// Offset in an 32Bit VGPR
+def : Pat <
+  (SIload_constant v4i32:$sbase, i32:$voff),
+  (BUFFER_LOAD_DWORD_OFFEN $voff, $sbase, 0, 0, 0, 0, 0)
+>;
+
+// The multiplication scales from [0,1] to the unsigned integer range
+def : Pat <
+  (AMDGPUurecip i32:$src0),
+  (V_CVT_U32_F32_e32
+    (V_MUL_F32_e32 CONST.FP_UINT_MAX_PLUS_1,
+                   (V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0))))
+>;
+
+def : Pat <
+  (int_SI_tid),
+  (V_MBCNT_HI_U32_B32_e64 0xffffffff,
+                          (V_MBCNT_LO_U32_B32_e64 0xffffffff, 0))
+>;
+
+//===----------------------------------------------------------------------===//
+// VOP3 Patterns
+//===----------------------------------------------------------------------===//
+
+def : IMad24Pat<V_MAD_I32_I24>;
+def : UMad24Pat<V_MAD_U32_U24>;
+
+def : Pat <
+  (mulhu i32:$src0, i32:$src1),
+  (V_MUL_HI_U32 $src0, $src1)
+>;
+
+def : Pat <
+  (mulhs i32:$src0, i32:$src1),
+  (V_MUL_HI_I32 $src0, $src1)
+>;
+
+defm : BFIPatterns <V_BFI_B32, S_MOV_B32, SReg_64>;
+def : ROTRPattern <V_ALIGNBIT_B32>;
+
+/********** ======================= **********/
+/**********   Load/Store Patterns   **********/
+/********** ======================= **********/
+
+class DSReadPat <DS inst, ValueType vt, PatFrag frag> : Pat <
+  (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))),
+  (inst $ptr, (as_i16imm $offset), (i1 0))
+>;
+
+def : DSReadPat <DS_READ_I8,  i32, si_sextload_local_i8>;
+def : DSReadPat <DS_READ_U8,  i32, si_az_extload_local_i8>;
+def : DSReadPat <DS_READ_I16, i32, si_sextload_local_i16>;
+def : DSReadPat <DS_READ_U16, i32, si_az_extload_local_i16>;
+def : DSReadPat <DS_READ_B32, i32, si_load_local>;
+
+let AddedComplexity = 100 in {
+
+def : DSReadPat <DS_READ_B64, v2i32, si_load_local_align8>;
+
+} // End AddedComplexity = 100
+
+def : Pat <
+  (v2i32 (si_load_local (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
+                                                    i8:$offset1))),
+  (DS_READ2_B32 $ptr, $offset0, $offset1, (i1 0))
+>;
+
+class DSWritePat <DS inst, ValueType vt, PatFrag frag> : Pat <
+  (frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)),
+  (inst $ptr, $value, (as_i16imm $offset), (i1 0))
+>;
+
+def : DSWritePat <DS_WRITE_B8, i32, si_truncstore_local_i8>;
+def : DSWritePat <DS_WRITE_B16, i32, si_truncstore_local_i16>;
+def : DSWritePat <DS_WRITE_B32, i32, si_store_local>;
+
+let AddedComplexity = 100 in {
+
+def : DSWritePat <DS_WRITE_B64, v2i32, si_store_local_align8>;
+} // End AddedComplexity = 100
+
+def : Pat <
+  (si_store_local v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
+                                                               i8:$offset1)),
+  (DS_WRITE2_B32 $ptr, (EXTRACT_SUBREG $value, sub0),
+                       (EXTRACT_SUBREG $value, sub1), $offset0, $offset1,
+                       (i1 0))
+>;
+
+class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat <
+  (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
+  (inst $ptr, $value, (as_i16imm $offset), (i1 0))
+>;
+
+// Special case of DSAtomicRetPat for add / sub 1 -> inc / dec
+//
+// We need to use something for the data0, so we set a register to
+// -1. For the non-rtn variants, the manual says it does
+// DS[A] = (DS[A] >= D0) ? 0 : DS[A] + 1, and setting D0 to uint_max
+// will always do the increment so I'm assuming it's the same.
+//
+// We also load this -1 with s_mov_b32 / s_mov_b64 even though this
+// needs to be a VGPR. The SGPR copy pass will fix this, and it's
+// easier since there is no v_mov_b64.
+class DSAtomicIncRetPat<DS inst, ValueType vt,
+                        Instruction LoadImm, PatFrag frag> : Pat <
+  (frag (DS1Addr1Offset i32:$ptr, i32:$offset), (vt 1)),
+  (inst $ptr, (LoadImm (vt -1)), (as_i16imm $offset), (i1 0))
+>;
+
+
+class DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> : Pat <
+  (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
+  (inst $ptr, $cmp, $swap, (as_i16imm $offset), (i1 0))
+>;
+
+
+// 32-bit atomics.
+def : DSAtomicIncRetPat<DS_INC_RTN_U32, i32,
+                        S_MOV_B32, si_atomic_load_add_local>;
+def : DSAtomicIncRetPat<DS_DEC_RTN_U32, i32,
+                        S_MOV_B32, si_atomic_load_sub_local>;
+
+def : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, si_atomic_swap_local>;
+def : DSAtomicRetPat<DS_ADD_RTN_U32, i32, si_atomic_load_add_local>;
+def : DSAtomicRetPat<DS_SUB_RTN_U32, i32, si_atomic_load_sub_local>;
+def : DSAtomicRetPat<DS_AND_RTN_B32, i32, si_atomic_load_and_local>;
+def : DSAtomicRetPat<DS_OR_RTN_B32, i32, si_atomic_load_or_local>;
+def : DSAtomicRetPat<DS_XOR_RTN_B32, i32, si_atomic_load_xor_local>;
+def : DSAtomicRetPat<DS_MIN_RTN_I32, i32, si_atomic_load_min_local>;
+def : DSAtomicRetPat<DS_MAX_RTN_I32, i32, si_atomic_load_max_local>;
+def : DSAtomicRetPat<DS_MIN_RTN_U32, i32, si_atomic_load_umin_local>;
+def : DSAtomicRetPat<DS_MAX_RTN_U32, i32, si_atomic_load_umax_local>;
+
+def : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, si_atomic_cmp_swap_32_local>;
+
+// 64-bit atomics.
+def : DSAtomicIncRetPat<DS_INC_RTN_U64, i64,
+                        S_MOV_B64, si_atomic_load_add_local>;
+def : DSAtomicIncRetPat<DS_DEC_RTN_U64, i64,
+                        S_MOV_B64, si_atomic_load_sub_local>;
+
+def : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, si_atomic_swap_local>;
+def : DSAtomicRetPat<DS_ADD_RTN_U64, i64, si_atomic_load_add_local>;
+def : DSAtomicRetPat<DS_SUB_RTN_U64, i64, si_atomic_load_sub_local>;
+def : DSAtomicRetPat<DS_AND_RTN_B64, i64, si_atomic_load_and_local>;
+def : DSAtomicRetPat<DS_OR_RTN_B64, i64, si_atomic_load_or_local>;
+def : DSAtomicRetPat<DS_XOR_RTN_B64, i64, si_atomic_load_xor_local>;
+def : DSAtomicRetPat<DS_MIN_RTN_I64, i64, si_atomic_load_min_local>;
+def : DSAtomicRetPat<DS_MAX_RTN_I64, i64, si_atomic_load_max_local>;
+def : DSAtomicRetPat<DS_MIN_RTN_U64, i64, si_atomic_load_umin_local>;
+def : DSAtomicRetPat<DS_MAX_RTN_U64, i64, si_atomic_load_umax_local>;
+
+def : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, si_atomic_cmp_swap_64_local>;
+
+
+//===----------------------------------------------------------------------===//
+// MUBUF Patterns
+//===----------------------------------------------------------------------===//
+
+multiclass MUBUFLoad_Pattern <MUBUF Instr_ADDR64, ValueType vt,
+                              PatFrag constant_ld> {
+  def : Pat <
+     (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
+                                   i16:$offset, i1:$glc, i1:$slc, i1:$tfe))),
+     (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe)
+  >;
+}
+
+let Predicates = [isSICI] in {
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_ADDR64, i32, sextloadi8_constant>;
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_ADDR64, i32, az_extloadi8_constant>;
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_SSHORT_ADDR64, i32, sextloadi16_constant>;
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_USHORT_ADDR64, i32, az_extloadi16_constant>;
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORD_ADDR64, i32, constant_load>;
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, v2i32, constant_load>;
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX4_ADDR64, v4i32, constant_load>;
+} // End Predicates = [isSICI]
+
+class MUBUFScratchLoadPat <MUBUF Instr, ValueType vt, PatFrag ld> : Pat <
+  (vt (ld (MUBUFScratch v4i32:$srsrc, i32:$vaddr,
+                        i32:$soffset, u16imm:$offset))),
+  (Instr $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
+>;
+
+def : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, i32, sextloadi8_private>;
+def : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, i32, extloadi8_private>;
+def : MUBUFScratchLoadPat <BUFFER_LOAD_SSHORT_OFFEN, i32, sextloadi16_private>;
+def : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, i32, extloadi16_private>;
+def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORD_OFFEN, i32, load_private>;
+def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX2_OFFEN, v2i32, load_private>;
+def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX4_OFFEN, v4i32, load_private>;
+
+// BUFFER_LOAD_DWORD*, addr64=0
+multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxen,
+                             MUBUF bothen> {
+
+  def : Pat <
+    (vt (int_SI_buffer_load_dword v4i32:$rsrc, (i32 imm), i32:$soffset,
+                                  imm:$offset, 0, 0, imm:$glc, imm:$slc,
+                                  imm:$tfe)),
+    (offset $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc),
+            (as_i1imm $slc), (as_i1imm $tfe))
+  >;
+
+  def : Pat <
+    (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
+                                  imm:$offset, 1, 0, imm:$glc, imm:$slc,
+                                  imm:$tfe)),
+    (offen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc),
+           (as_i1imm $tfe))
+  >;
+
+  def : Pat <
+    (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
+                                  imm:$offset, 0, 1, imm:$glc, imm:$slc,
+                                  imm:$tfe)),
+    (idxen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc),
+           (as_i1imm $slc), (as_i1imm $tfe))
+  >;
+
+  def : Pat <
+    (vt (int_SI_buffer_load_dword v4i32:$rsrc, v2i32:$vaddr, i32:$soffset,
+                                  imm:$offset, 1, 1, imm:$glc, imm:$slc,
+                                  imm:$tfe)),
+    (bothen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc),
+            (as_i1imm $tfe))
+  >;
+}
+
+defm : MUBUF_Load_Dword <i32, BUFFER_LOAD_DWORD_OFFSET, BUFFER_LOAD_DWORD_OFFEN,
+                         BUFFER_LOAD_DWORD_IDXEN, BUFFER_LOAD_DWORD_BOTHEN>;
+defm : MUBUF_Load_Dword <v2i32, BUFFER_LOAD_DWORDX2_OFFSET, BUFFER_LOAD_DWORDX2_OFFEN,
+                         BUFFER_LOAD_DWORDX2_IDXEN, BUFFER_LOAD_DWORDX2_BOTHEN>;
+defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_OFFEN,
+                         BUFFER_LOAD_DWORDX4_IDXEN, BUFFER_LOAD_DWORDX4_BOTHEN>;
+
+class MUBUFScratchStorePat <MUBUF Instr, ValueType vt, PatFrag st> : Pat <
+  (st vt:$value, (MUBUFScratch v4i32:$srsrc, i32:$vaddr, i32:$soffset,
+                               u16imm:$offset)),
+  (Instr $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
+>;
+
+def : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, i32, truncstorei8_private>;
+def : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, i32, truncstorei16_private>;
+def : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, i32, store_private>;
+def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, v2i32, store_private>;
+def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, v4i32, store_private>;
+
+/*
+class MUBUFStore_Pattern <MUBUF Instr, ValueType vt, PatFrag st> : Pat <
+  (st vt:$value, (MUBUFScratch v4i32:$srsrc, i64:$vaddr, u16imm:$offset)),
+  (Instr $value, $srsrc, $vaddr, $offset)
+>;
+
+let Predicates = [isSICI] in {
+def : MUBUFStore_Pattern <BUFFER_STORE_BYTE_ADDR64, i32, truncstorei8_private>;
+def : MUBUFStore_Pattern <BUFFER_STORE_SHORT_ADDR64, i32, truncstorei16_private>;
+def : MUBUFStore_Pattern <BUFFER_STORE_DWORD_ADDR64, i32, store_private>;
+def : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2_ADDR64, v2i32, store_private>;
+def : MUBUFStore_Pattern <BUFFER_STORE_DWORDX4_ADDR64, v4i32, store_private>;
+} // End Predicates = [isSICI]
+
+*/
+
+//===----------------------------------------------------------------------===//
+// MTBUF Patterns
+//===----------------------------------------------------------------------===//
+
+// TBUFFER_STORE_FORMAT_*, addr64=0
+class MTBUF_StoreResource <ValueType vt, int num_channels, MTBUF opcode> : Pat<
+  (SItbuffer_store v4i32:$rsrc, vt:$vdata, num_channels, i32:$vaddr,
+                   i32:$soffset, imm:$inst_offset, imm:$dfmt,
+                   imm:$nfmt, imm:$offen, imm:$idxen,
+                   imm:$glc, imm:$slc, imm:$tfe),
+  (opcode
+    $vdata, (as_i16imm $inst_offset), (as_i1imm $offen), (as_i1imm $idxen),
+    (as_i1imm $glc), 0, (as_i8imm $dfmt), (as_i8imm $nfmt), $vaddr, $rsrc,
+    (as_i1imm $slc), (as_i1imm $tfe), $soffset)
+>;
+
+def : MTBUF_StoreResource <i32, 1, TBUFFER_STORE_FORMAT_X>;
+def : MTBUF_StoreResource <v2i32, 2, TBUFFER_STORE_FORMAT_XY>;
+def : MTBUF_StoreResource <v4i32, 3, TBUFFER_STORE_FORMAT_XYZ>;
+def : MTBUF_StoreResource <v4i32, 4, TBUFFER_STORE_FORMAT_XYZW>;
+
+let SubtargetPredicate = isCI in {
+
+defm V_QSAD_PK_U16_U8 : VOP3Inst <vop3<0x173>, "v_qsad_pk_u16_u8",
+  VOP_I32_I32_I32
+>;
+defm V_MQSAD_U16_U8 : VOP3Inst <vop3<0x172>, "v_mqsad_u16_u8",
+  VOP_I32_I32_I32
+>;
+defm V_MQSAD_U32_U8 : VOP3Inst <vop3<0x175>, "v_mqsad_u32_u8",
+  VOP_I32_I32_I32
+>;
+
+let isCommutable = 1 in {
+defm V_MAD_U64_U32 : VOP3Inst <vop3<0x176>, "v_mad_u64_u32",
+  VOP_I64_I32_I32_I64
+>;
+
+// XXX - Does this set VCC?
+defm V_MAD_I64_I32 : VOP3Inst <vop3<0x177>, "v_mad_i64_i32",
+  VOP_I64_I32_I32_I64
+>;
+} // End isCommutable = 1
+
+// Remaining instructions:
+// FLAT_*
+// S_CBRANCH_CDBGUSER
+// S_CBRANCH_CDBGSYS
+// S_CBRANCH_CDBGSYS_OR_USER
+// S_CBRANCH_CDBGSYS_AND_USER
+// S_DCACHE_INV_VOL
+// DS_NOP
+// DS_GWS_SEMA_RELEASE_ALL
+// DS_WRAP_RTN_B32
+// DS_CNDXCHG32_RTN_B64
+// DS_WRITE_B96
+// DS_WRITE_B128
+// DS_CONDXCHG32_RTN_B128
+// DS_READ_B96
+// DS_READ_B128
+// BUFFER_LOAD_DWORDX3
+// BUFFER_STORE_DWORDX3
+
+} // End isCI
+
+/********** ====================== **********/
+/**********   Indirect adressing   **********/
+/********** ====================== **********/
+
+multiclass SI_INDIRECT_Pattern <ValueType vt, ValueType eltvt, SI_INDIRECT_DST IndDst> {
+
+  // 1. Extract with offset
+  def : Pat<
+    (eltvt (vector_extract vt:$vec, (add i32:$idx, imm:$off))),
+    (SI_INDIRECT_SRC $vec, $idx, imm:$off)
+  >;
+
+  // 2. Extract without offset
+  def : Pat<
+    (eltvt (vector_extract vt:$vec, i32:$idx)),
+    (SI_INDIRECT_SRC $vec, $idx, 0)
+  >;
+
+  // 3. Insert with offset
+  def : Pat<
+    (vector_insert vt:$vec, eltvt:$val, (add i32:$idx, imm:$off)),
+    (IndDst $vec, $idx, imm:$off, $val)
+  >;
+
+  // 4. Insert without offset
+  def : Pat<
+    (vector_insert vt:$vec, eltvt:$val, i32:$idx),
+    (IndDst $vec, $idx, 0, $val)
+  >;
+}
+
+defm : SI_INDIRECT_Pattern <v2f32, f32, SI_INDIRECT_DST_V2>;
+defm : SI_INDIRECT_Pattern <v4f32, f32, SI_INDIRECT_DST_V4>;
+defm : SI_INDIRECT_Pattern <v8f32, f32, SI_INDIRECT_DST_V8>;
+defm : SI_INDIRECT_Pattern <v16f32, f32, SI_INDIRECT_DST_V16>;
+
+defm : SI_INDIRECT_Pattern <v2i32, i32, SI_INDIRECT_DST_V2>;
+defm : SI_INDIRECT_Pattern <v4i32, i32, SI_INDIRECT_DST_V4>;
+defm : SI_INDIRECT_Pattern <v8i32, i32, SI_INDIRECT_DST_V8>;
+defm : SI_INDIRECT_Pattern <v16i32, i32, SI_INDIRECT_DST_V16>;
+
+//===----------------------------------------------------------------------===//
+// Conversion Patterns
+//===----------------------------------------------------------------------===//
+
+def : Pat<(i32 (sext_inreg i32:$src, i1)),
+  (S_BFE_I32 i32:$src, 65536)>; // 0 | 1 << 16
+
+// Handle sext_inreg in i64
+def : Pat <
+  (i64 (sext_inreg i64:$src, i1)),
+  (S_BFE_I64 i64:$src, 0x10000) // 0 | 1 << 16
+>;
+
+def : Pat <
+  (i64 (sext_inreg i64:$src, i8)),
+  (S_BFE_I64 i64:$src, 0x80000) // 0 | 8 << 16
+>;
+
+def : Pat <
+  (i64 (sext_inreg i64:$src, i16)),
+  (S_BFE_I64 i64:$src, 0x100000) // 0 | 16 << 16
+>;
+
+def : Pat <
+  (i64 (sext_inreg i64:$src, i32)),
+  (S_BFE_I64 i64:$src, 0x200000) // 0 | 32 << 16
+>;
+
+class ZExt_i64_i32_Pat <SDNode ext> : Pat <
+  (i64 (ext i32:$src)),
+  (REG_SEQUENCE SReg_64, $src, sub0, (S_MOV_B32 0), sub1)
+>;
+
+class ZExt_i64_i1_Pat <SDNode ext> : Pat <
+  (i64 (ext i1:$src)),
+    (REG_SEQUENCE VReg_64,
+      (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src), sub0,
+      (S_MOV_B32 0), sub1)
+>;
+
+
+def : ZExt_i64_i32_Pat<zext>;
+def : ZExt_i64_i32_Pat<anyext>;
+def : ZExt_i64_i1_Pat<zext>;
+def : ZExt_i64_i1_Pat<anyext>;
+
+def : Pat <
+  (i64 (sext i32:$src)),
+    (REG_SEQUENCE SReg_64, $src, sub0,
+    (S_ASHR_I32 $src, 31), sub1)
+>;
+
+def : Pat <
+  (i64 (sext i1:$src)),
+  (REG_SEQUENCE VReg_64,
+    (V_CNDMASK_B32_e64 0, -1, $src), sub0,
+    (V_CNDMASK_B32_e64 0, -1, $src), sub1)
+>;
+
+// If we need to perform a logical operation on i1 values, we need to
+// use vector comparisons since there is only one SCC register. Vector
+// comparisions still write to a pair of SGPRs, so treat these as
+// 64-bit comparisons. When legalizing SGPR copies, instructions
+// resulting in the copies from SCC to these instructions will be
+// moved to the VALU.
+def : Pat <
+  (i1 (and i1:$src0, i1:$src1)),
+  (S_AND_B64 $src0, $src1)
+>;
+
+def : Pat <
+  (i1 (or i1:$src0, i1:$src1)),
+  (S_OR_B64 $src0, $src1)
+>;
+
+def : Pat <
+  (i1 (xor i1:$src0, i1:$src1)),
+  (S_XOR_B64 $src0, $src1)
+>;
+
+def : Pat <
+  (f32 (sint_to_fp i1:$src)),
+  (V_CNDMASK_B32_e64 (i32 0), CONST.FP32_NEG_ONE, $src)
+>;
+
+def : Pat <
+  (f32 (uint_to_fp i1:$src)),
+  (V_CNDMASK_B32_e64 (i32 0), CONST.FP32_ONE, $src)
+>;
+
+def : Pat <
+  (f64 (sint_to_fp i1:$src)),
+  (V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src))
+>;
+
+def : Pat <
+  (f64 (uint_to_fp i1:$src)),
+  (V_CVT_F64_U32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src))
+>;
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Patterns
+//===----------------------------------------------------------------------===//
+
+def : Pat <
+  (i32 (trunc i64:$a)),
+  (EXTRACT_SUBREG $a, sub0)
+>;
+
+def : Pat <
+  (i1 (trunc i32:$a)),
+  (V_CMP_EQ_I32_e64 (V_AND_B32_e64 (i32 1), $a), 1)
+>;
+
+def : Pat <
+  (i1 (trunc i64:$a)),
+  (V_CMP_EQ_I32_e64 (V_AND_B32_e64 (i32 1),
+                    (EXTRACT_SUBREG $a, sub0)), 1)
+>;
+
+def : Pat <
+  (i32 (bswap i32:$a)),
+  (V_BFI_B32 (S_MOV_B32 0x00ff00ff),
+             (V_ALIGNBIT_B32 $a, $a, 24),
+             (V_ALIGNBIT_B32 $a, $a, 8))
+>;
+
+def : Pat <
+  (f32 (select i1:$src2, f32:$src1, f32:$src0)),
+  (V_CNDMASK_B32_e64 $src0, $src1, $src2)
+>;
+
+multiclass BFMPatterns <ValueType vt, InstSI BFM, InstSI MOV> {
+  def : Pat <
+    (vt (shl (vt (add (vt (shl 1, vt:$a)), -1)), vt:$b)),
+    (BFM $a, $b)
+  >;
+
+  def : Pat <
+    (vt (add (vt (shl 1, vt:$a)), -1)),
+    (BFM $a, (MOV 0))
+  >;
+}
+
+defm : BFMPatterns <i32, S_BFM_B32, S_MOV_B32>;
+// FIXME: defm : BFMPatterns <i64, S_BFM_B64, S_MOV_B64>;
+
+def : BFEPattern <V_BFE_U32, S_MOV_B32>;
+
+//===----------------------------------------------------------------------===//
+// Fract Patterns
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isSI] in {
+
+// V_FRACT is buggy on SI, so the F32 version is never used and (x-floor(x)) is
+// used instead. However, SI doesn't have V_FLOOR_F64, so the most efficient
+// way to implement it is using V_FRACT_F64.
+// The workaround for the V_FRACT bug is:
+//    fract(x) = isnan(x) ? x : min(V_FRACT(x), 0.99999999999999999)
+
+// Convert (x + (-floor(x)) to fract(x)
+def : Pat <
+  (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)),
+             (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))),
+  (V_CNDMASK_B64_PSEUDO
+      $x,
+      (V_MIN_F64
+          SRCMODS.NONE,
+          (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE),
+          SRCMODS.NONE,
+          (V_MOV_B64_PSEUDO 0x3fefffffffffffff),
+          DSTCLAMP.NONE, DSTOMOD.NONE),
+      (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, 3/*NaN*/))
+>;
+
+// Convert floor(x) to (x - fract(x))
+def : Pat <
+  (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))),
+  (V_ADD_F64
+      $mods,
+      $x,
+      SRCMODS.NEG,
+      (V_CNDMASK_B64_PSEUDO
+         $x,
+         (V_MIN_F64
+             SRCMODS.NONE,
+             (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE),
+             SRCMODS.NONE,
+             (V_MOV_B64_PSEUDO 0x3fefffffffffffff),
+             DSTCLAMP.NONE, DSTOMOD.NONE),
+         (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, 3/*NaN*/)),
+      DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+} // End Predicates = [isSI]
+
+let Predicates = [isCI] in {
+
+// Convert (x - floor(x)) to fract(x)
+def : Pat <
+  (f32 (fsub (f32 (VOP3Mods f32:$x, i32:$mods)),
+             (f32 (ffloor (f32 (VOP3Mods f32:$x, i32:$mods)))))),
+  (V_FRACT_F32_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+// Convert (x + (-floor(x))) to fract(x)
+def : Pat <
+  (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)),
+             (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))),
+  (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+} // End Predicates = [isCI]
+
+//============================================================================//
+// Miscellaneous Optimization Patterns
+//============================================================================//
+
+def : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e64>;
+
+//============================================================================//
+// Assembler aliases
+//============================================================================//
+
+def : MnemonicAlias<"v_add_u32", "v_add_i32">;
+def : MnemonicAlias<"v_sub_u32", "v_sub_i32">;
+def : MnemonicAlias<"v_subrev_u32", "v_subrev_i32">;
+
+} // End isGCN predicate
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIIntrinsics.td b/contrib/llvm/lib/Target/AMDGPU/SIIntrinsics.td
new file mode 100644
index 0000000..027a0a2
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIIntrinsics.td
@@ -0,0 +1,199 @@
+//===-- SIIntrinsics.td - SI Intrinsic defs ----------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// SI Intrinsic Definitions
+//
+//===----------------------------------------------------------------------===//
+
+
+let TargetPrefix = "SI", isTarget = 1 in {
+
+  def int_SI_tid : Intrinsic <[llvm_i32_ty], [], [IntrNoMem]>;
+  def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+  def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
+  def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_anyint_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ;
+
+  // Fully-flexible TBUFFER_STORE_FORMAT_* except for the ADDR64 bit, which is not exposed
+  def int_SI_tbuffer_store : Intrinsic <
+    [],
+    [llvm_anyint_ty, // rsrc(SGPR)
+     llvm_anyint_ty, // vdata(VGPR), overloaded for types i32, v2i32, v4i32
+     llvm_i32_ty,    // num_channels(imm), selects opcode suffix: 1=X, 2=XY, 3=XYZ, 4=XYZW
+     llvm_i32_ty,    // vaddr(VGPR)
+     llvm_i32_ty,    // soffset(SGPR)
+     llvm_i32_ty,    // inst_offset(imm)
+     llvm_i32_ty,    // dfmt(imm)
+     llvm_i32_ty,    // nfmt(imm)
+     llvm_i32_ty,    // offen(imm)
+     llvm_i32_ty,    // idxen(imm)
+     llvm_i32_ty,    // glc(imm)
+     llvm_i32_ty,    // slc(imm)
+     llvm_i32_ty],   // tfe(imm)
+    []>;
+
+  // Fully-flexible BUFFER_LOAD_DWORD_* except for the ADDR64 bit, which is not exposed
+  def int_SI_buffer_load_dword : Intrinsic <
+    [llvm_anyint_ty], // vdata(VGPR), overloaded for types i32, v2i32, v4i32
+    [llvm_anyint_ty,  // rsrc(SGPR)
+     llvm_anyint_ty,  // vaddr(VGPR)
+     llvm_i32_ty,     // soffset(SGPR)
+     llvm_i32_ty,     // inst_offset(imm)
+     llvm_i32_ty,     // offen(imm)
+     llvm_i32_ty,     // idxen(imm)
+     llvm_i32_ty,     // glc(imm)
+     llvm_i32_ty,     // slc(imm)
+     llvm_i32_ty],    // tfe(imm)
+    [IntrReadArgMem]>;
+
+  def int_SI_sendmsg : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+
+  // Fully-flexible SAMPLE instruction.
+  class SampleRaw : Intrinsic <
+    [llvm_v4f32_ty],    // vdata(VGPR)
+    [llvm_anyint_ty,    // vaddr(VGPR)
+     llvm_v8i32_ty,     // rsrc(SGPR)
+     llvm_v4i32_ty,     // sampler(SGPR)
+     llvm_i32_ty,       // dmask(imm)
+     llvm_i32_ty,       // unorm(imm)
+     llvm_i32_ty,       // r128(imm)
+     llvm_i32_ty,       // da(imm)
+     llvm_i32_ty,       // glc(imm)
+     llvm_i32_ty,       // slc(imm)
+     llvm_i32_ty,       // tfe(imm)
+     llvm_i32_ty],      // lwe(imm)
+    [IntrNoMem]>;
+
+  // Image instruction without a sampler.
+  class Image : Intrinsic <
+    [llvm_v4f32_ty],    // vdata(VGPR)
+    [llvm_anyint_ty,    // vaddr(VGPR)
+     llvm_v8i32_ty,     // rsrc(SGPR)
+     llvm_i32_ty,       // dmask(imm)
+     llvm_i32_ty,       // unorm(imm)
+     llvm_i32_ty,       // r128(imm)
+     llvm_i32_ty,       // da(imm)
+     llvm_i32_ty,       // glc(imm)
+     llvm_i32_ty,       // slc(imm)
+     llvm_i32_ty,       // tfe(imm)
+     llvm_i32_ty],      // lwe(imm)
+    [IntrNoMem]>;
+
+  // Basic sample
+  def int_SI_image_sample : SampleRaw;
+  def int_SI_image_sample_cl : SampleRaw;
+  def int_SI_image_sample_d : SampleRaw;
+  def int_SI_image_sample_d_cl : SampleRaw;
+  def int_SI_image_sample_l : SampleRaw;
+  def int_SI_image_sample_b : SampleRaw;
+  def int_SI_image_sample_b_cl : SampleRaw;
+  def int_SI_image_sample_lz : SampleRaw;
+  def int_SI_image_sample_cd : SampleRaw;
+  def int_SI_image_sample_cd_cl : SampleRaw;
+
+  // Sample with comparison
+  def int_SI_image_sample_c : SampleRaw;
+  def int_SI_image_sample_c_cl : SampleRaw;
+  def int_SI_image_sample_c_d : SampleRaw;
+  def int_SI_image_sample_c_d_cl : SampleRaw;
+  def int_SI_image_sample_c_l : SampleRaw;
+  def int_SI_image_sample_c_b : SampleRaw;
+  def int_SI_image_sample_c_b_cl : SampleRaw;
+  def int_SI_image_sample_c_lz : SampleRaw;
+  def int_SI_image_sample_c_cd : SampleRaw;
+  def int_SI_image_sample_c_cd_cl : SampleRaw;
+
+  // Sample with offsets
+  def int_SI_image_sample_o : SampleRaw;
+  def int_SI_image_sample_cl_o : SampleRaw;
+  def int_SI_image_sample_d_o : SampleRaw;
+  def int_SI_image_sample_d_cl_o : SampleRaw;
+  def int_SI_image_sample_l_o : SampleRaw;
+  def int_SI_image_sample_b_o : SampleRaw;
+  def int_SI_image_sample_b_cl_o : SampleRaw;
+  def int_SI_image_sample_lz_o : SampleRaw;
+  def int_SI_image_sample_cd_o : SampleRaw;
+  def int_SI_image_sample_cd_cl_o : SampleRaw;
+
+  // Sample with comparison and offsets
+  def int_SI_image_sample_c_o : SampleRaw;
+  def int_SI_image_sample_c_cl_o : SampleRaw;
+  def int_SI_image_sample_c_d_o : SampleRaw;
+  def int_SI_image_sample_c_d_cl_o : SampleRaw;
+  def int_SI_image_sample_c_l_o : SampleRaw;
+  def int_SI_image_sample_c_b_o : SampleRaw;
+  def int_SI_image_sample_c_b_cl_o : SampleRaw;
+  def int_SI_image_sample_c_lz_o : SampleRaw;
+  def int_SI_image_sample_c_cd_o : SampleRaw;
+  def int_SI_image_sample_c_cd_cl_o : SampleRaw;
+
+  // Basic gather4
+  def int_SI_gather4 : SampleRaw;
+  def int_SI_gather4_cl : SampleRaw;
+  def int_SI_gather4_l : SampleRaw;
+  def int_SI_gather4_b : SampleRaw;
+  def int_SI_gather4_b_cl : SampleRaw;
+  def int_SI_gather4_lz : SampleRaw;
+
+  // Gather4 with comparison
+  def int_SI_gather4_c : SampleRaw;
+  def int_SI_gather4_c_cl : SampleRaw;
+  def int_SI_gather4_c_l : SampleRaw;
+  def int_SI_gather4_c_b : SampleRaw;
+  def int_SI_gather4_c_b_cl : SampleRaw;
+  def int_SI_gather4_c_lz : SampleRaw;
+
+  // Gather4 with offsets
+  def int_SI_gather4_o : SampleRaw;
+  def int_SI_gather4_cl_o : SampleRaw;
+  def int_SI_gather4_l_o : SampleRaw;
+  def int_SI_gather4_b_o : SampleRaw;
+  def int_SI_gather4_b_cl_o : SampleRaw;
+  def int_SI_gather4_lz_o : SampleRaw;
+
+  // Gather4 with comparison and offsets
+  def int_SI_gather4_c_o : SampleRaw;
+  def int_SI_gather4_c_cl_o : SampleRaw;
+  def int_SI_gather4_c_l_o : SampleRaw;
+  def int_SI_gather4_c_b_o : SampleRaw;
+  def int_SI_gather4_c_b_cl_o : SampleRaw;
+  def int_SI_gather4_c_lz_o : SampleRaw;
+
+  def int_SI_getlod : SampleRaw;
+
+  // Image instrinsics.
+  def int_SI_image_load : Image;
+  def int_SI_image_load_mip : Image;
+  def int_SI_getresinfo : Image;
+
+  // Deprecated image and sample intrinsics.
+  class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
+
+  def int_SI_sample : Sample;
+  def int_SI_sampleb : Sample;
+  def int_SI_sampled : Sample;
+  def int_SI_samplel : Sample;
+  def int_SI_imageload : Intrinsic <[llvm_v4i32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_SI_resinfo : Intrinsic <[llvm_v4i32_ty], [llvm_i32_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
+
+  /* Interpolation Intrinsics */
+
+  def int_SI_fs_constant : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_SI_fs_interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_v2i32_ty], [IntrNoMem]>;
+
+  /* Control flow Intrinsics */
+
+  def int_SI_if : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_empty_ty], []>;
+  def int_SI_else : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_empty_ty], []>;
+  def int_SI_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty], []>;
+  def int_SI_if_break : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_i64_ty], []>;
+  def int_SI_else_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], []>;
+  def int_SI_loop : Intrinsic<[], [llvm_i64_ty, llvm_empty_ty], []>;
+  def int_SI_end_cf : Intrinsic<[], [llvm_i64_ty], []>;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
new file mode 100644
index 0000000..9b1d256
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -0,0 +1,421 @@
+//===-- SILoadStoreOptimizer.cpp ------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass tries to fuse DS instructions with close by immediate offsets.
+// This will fuse operations such as
+//  ds_read_b32 v0, v2 offset:16
+//  ds_read_b32 v1, v2 offset:32
+// ==>
+//   ds_read2_b32 v[0:1], v2, offset0:4 offset1:8
+//
+//
+// Future improvements:
+//
+// - This currently relies on the scheduler to place loads and stores next to
+//   each other, and then only merges adjacent pairs of instructions. It would
+//   be good to be more flexible with interleaved instructions, and possibly run
+//   before scheduling. It currently missing stores of constants because loading
+//   the constant into the data register is placed between the stores, although
+//   this is arguably a scheduling problem.
+//
+// - Live interval recomputing seems inefficient. This currently only matches
+//   one pair, and recomputes live intervals and moves on to the next pair. It
+//   would be better to compute a list of all merges that need to occur
+//
+// - With a list of instructions to process, we can also merge more. If a
+//   cluster of loads have offsets that are too large to fit in the 8-bit
+//   offsets, but are close enough to fit in the 8 bits, we can add to the base
+//   pointer and use the new reduced offsets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "SIInstrInfo.h"
+#include "SIRegisterInfo.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-load-store-opt"
+
+namespace {
+
+class SILoadStoreOptimizer : public MachineFunctionPass {
+private:
+  const SIInstrInfo *TII;
+  const SIRegisterInfo *TRI;
+  MachineRegisterInfo *MRI;
+  LiveIntervals *LIS;
+
+
+  static bool offsetsCanBeCombined(unsigned Offset0,
+                                   unsigned Offset1,
+                                   unsigned EltSize);
+
+  MachineBasicBlock::iterator findMatchingDSInst(MachineBasicBlock::iterator I,
+                                                 unsigned EltSize);
+
+  void updateRegDefsUses(unsigned SrcReg,
+                         unsigned DstReg,
+                         unsigned SubIdx);
+
+  MachineBasicBlock::iterator mergeRead2Pair(
+    MachineBasicBlock::iterator I,
+    MachineBasicBlock::iterator Paired,
+    unsigned EltSize);
+
+  MachineBasicBlock::iterator mergeWrite2Pair(
+    MachineBasicBlock::iterator I,
+    MachineBasicBlock::iterator Paired,
+    unsigned EltSize);
+
+public:
+  static char ID;
+
+  SILoadStoreOptimizer()
+      : MachineFunctionPass(ID), TII(nullptr), TRI(nullptr), MRI(nullptr),
+        LIS(nullptr) {}
+
+  SILoadStoreOptimizer(const TargetMachine &TM_) : MachineFunctionPass(ID) {
+    initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool optimizeBlock(MachineBasicBlock &MBB);
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  const char *getPassName() const override {
+    return "SI Load / Store Optimizer";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addPreserved<SlotIndexes>();
+    AU.addPreserved<LiveIntervals>();
+    AU.addPreserved<LiveVariables>();
+    AU.addRequired<LiveIntervals>();
+
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS_BEGIN(SILoadStoreOptimizer, DEBUG_TYPE,
+                      "SI Load / Store Optimizer", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(LiveVariables)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_END(SILoadStoreOptimizer, DEBUG_TYPE,
+                    "SI Load / Store Optimizer", false, false)
+
+char SILoadStoreOptimizer::ID = 0;
+
+char &llvm::SILoadStoreOptimizerID = SILoadStoreOptimizer::ID;
+
+FunctionPass *llvm::createSILoadStoreOptimizerPass(TargetMachine &TM) {
+  return new SILoadStoreOptimizer(TM);
+}
+
+bool SILoadStoreOptimizer::offsetsCanBeCombined(unsigned Offset0,
+                                                unsigned Offset1,
+                                                unsigned Size) {
+  // XXX - Would the same offset be OK? Is there any reason this would happen or
+  // be useful?
+  if (Offset0 == Offset1)
+    return false;
+
+  // This won't be valid if the offset isn't aligned.
+  if ((Offset0 % Size != 0) || (Offset1 % Size != 0))
+    return false;
+
+  unsigned EltOffset0 = Offset0 / Size;
+  unsigned EltOffset1 = Offset1 / Size;
+
+  // Check if the new offsets fit in the reduced 8-bit range.
+  if (isUInt<8>(EltOffset0) && isUInt<8>(EltOffset1))
+    return true;
+
+  // If the offset in elements doesn't fit in 8-bits, we might be able to use
+  // the stride 64 versions.
+  if ((EltOffset0 % 64 != 0) || (EltOffset1 % 64) != 0)
+    return false;
+
+  return isUInt<8>(EltOffset0 / 64) && isUInt<8>(EltOffset1 / 64);
+}
+
+MachineBasicBlock::iterator
+SILoadStoreOptimizer::findMatchingDSInst(MachineBasicBlock::iterator I,
+                                         unsigned EltSize){
+  MachineBasicBlock::iterator E = I->getParent()->end();
+  MachineBasicBlock::iterator MBBI = I;
+  ++MBBI;
+
+  if (MBBI->getOpcode() != I->getOpcode())
+    return E;
+
+  // Don't merge volatiles.
+  if (MBBI->hasOrderedMemoryRef())
+    return E;
+
+  int AddrIdx = AMDGPU::getNamedOperandIdx(I->getOpcode(), AMDGPU::OpName::addr);
+  const MachineOperand &AddrReg0 = I->getOperand(AddrIdx);
+  const MachineOperand &AddrReg1 = MBBI->getOperand(AddrIdx);
+
+  // Check same base pointer. Be careful of subregisters, which can occur with
+  // vectors of pointers.
+  if (AddrReg0.getReg() == AddrReg1.getReg() &&
+      AddrReg0.getSubReg() == AddrReg1.getSubReg()) {
+    int OffsetIdx = AMDGPU::getNamedOperandIdx(I->getOpcode(),
+                                               AMDGPU::OpName::offset);
+    unsigned Offset0 = I->getOperand(OffsetIdx).getImm() & 0xffff;
+    unsigned Offset1 = MBBI->getOperand(OffsetIdx).getImm() & 0xffff;
+
+    // Check both offsets fit in the reduced range.
+    if (offsetsCanBeCombined(Offset0, Offset1, EltSize))
+      return MBBI;
+  }
+
+  return E;
+}
+
+void SILoadStoreOptimizer::updateRegDefsUses(unsigned SrcReg,
+                                             unsigned DstReg,
+                                             unsigned SubIdx) {
+  for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg),
+         E = MRI->reg_end(); I != E; ) {
+    MachineOperand &O = *I;
+    ++I;
+    O.substVirtReg(DstReg, SubIdx, *TRI);
+  }
+}
+
+MachineBasicBlock::iterator  SILoadStoreOptimizer::mergeRead2Pair(
+  MachineBasicBlock::iterator I,
+  MachineBasicBlock::iterator Paired,
+  unsigned EltSize) {
+  MachineBasicBlock *MBB = I->getParent();
+
+  // Be careful, since the addresses could be subregisters themselves in weird
+  // cases, like vectors of pointers.
+  const MachineOperand *AddrReg = TII->getNamedOperand(*I, AMDGPU::OpName::addr);
+
+  unsigned DestReg0 = TII->getNamedOperand(*I, AMDGPU::OpName::vdst)->getReg();
+  unsigned DestReg1
+    = TII->getNamedOperand(*Paired, AMDGPU::OpName::vdst)->getReg();
+
+  unsigned Offset0
+          = TII->getNamedOperand(*I, AMDGPU::OpName::offset)->getImm() & 0xffff;
+  unsigned Offset1
+    = TII->getNamedOperand(*Paired, AMDGPU::OpName::offset)->getImm() & 0xffff;
+
+  unsigned NewOffset0 = Offset0 / EltSize;
+  unsigned NewOffset1 = Offset1 / EltSize;
+  unsigned Opc = (EltSize == 4) ? AMDGPU::DS_READ2_B32 : AMDGPU::DS_READ2_B64;
+
+  // Prefer the st64 form if we can use it, even if we can fit the offset in the
+  // non st64 version. I'm not sure if there's any real reason to do this.
+  bool UseST64 = (NewOffset0 % 64 == 0) && (NewOffset1 % 64 == 0);
+  if (UseST64) {
+    NewOffset0 /= 64;
+    NewOffset1 /= 64;
+    Opc = (EltSize == 4) ? AMDGPU::DS_READ2ST64_B32 : AMDGPU::DS_READ2ST64_B64;
+  }
+
+  assert((isUInt<8>(NewOffset0) && isUInt<8>(NewOffset1)) &&
+         (NewOffset0 != NewOffset1) &&
+         "Computed offset doesn't fit");
+
+  const MCInstrDesc &Read2Desc = TII->get(Opc);
+
+  const TargetRegisterClass *SuperRC
+    = (EltSize == 4) ? &AMDGPU::VReg_64RegClass : &AMDGPU::VReg_128RegClass;
+  unsigned DestReg = MRI->createVirtualRegister(SuperRC);
+
+  DebugLoc DL = I->getDebugLoc();
+  MachineInstrBuilder Read2
+    = BuildMI(*MBB, I, DL, Read2Desc, DestReg)
+    .addOperand(*AddrReg) // addr
+    .addImm(NewOffset0) // offset0
+    .addImm(NewOffset1) // offset1
+    .addImm(0) // gds
+    .addMemOperand(*I->memoperands_begin())
+    .addMemOperand(*Paired->memoperands_begin());
+
+  unsigned SubRegIdx0 = (EltSize == 4) ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
+  unsigned SubRegIdx1 = (EltSize == 4) ? AMDGPU::sub1 : AMDGPU::sub2_sub3;
+  updateRegDefsUses(DestReg0, DestReg, SubRegIdx0);
+  updateRegDefsUses(DestReg1, DestReg, SubRegIdx1);
+
+  LIS->RemoveMachineInstrFromMaps(I);
+  // Replacing Paired in the maps with Read2 allows us to avoid updating the
+  // live range for the m0 register.
+  LIS->ReplaceMachineInstrInMaps(Paired, Read2);
+  I->eraseFromParent();
+  Paired->eraseFromParent();
+
+  LiveInterval &AddrRegLI = LIS->getInterval(AddrReg->getReg());
+  LIS->shrinkToUses(&AddrRegLI);
+
+  LIS->getInterval(DestReg); // Create new LI
+
+  DEBUG(dbgs() << "Inserted read2: " << *Read2 << '\n');
+  return Read2.getInstr();
+}
+
+MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
+  MachineBasicBlock::iterator I,
+  MachineBasicBlock::iterator Paired,
+  unsigned EltSize) {
+  MachineBasicBlock *MBB = I->getParent();
+
+  // Be sure to use .addOperand(), and not .addReg() with these. We want to be
+  // sure we preserve the subregister index and any register flags set on them.
+  const MachineOperand *Addr = TII->getNamedOperand(*I, AMDGPU::OpName::addr);
+  const MachineOperand *Data0 = TII->getNamedOperand(*I, AMDGPU::OpName::data0);
+  const MachineOperand *Data1
+    = TII->getNamedOperand(*Paired, AMDGPU::OpName::data0);
+
+
+  unsigned Offset0
+    = TII->getNamedOperand(*I, AMDGPU::OpName::offset)->getImm() & 0xffff;
+  unsigned Offset1
+    = TII->getNamedOperand(*Paired, AMDGPU::OpName::offset)->getImm() & 0xffff;
+
+  unsigned NewOffset0 = Offset0 / EltSize;
+  unsigned NewOffset1 = Offset1 / EltSize;
+  unsigned Opc = (EltSize == 4) ? AMDGPU::DS_WRITE2_B32 : AMDGPU::DS_WRITE2_B64;
+
+  // Prefer the st64 form if we can use it, even if we can fit the offset in the
+  // non st64 version. I'm not sure if there's any real reason to do this.
+  bool UseST64 = (NewOffset0 % 64 == 0) && (NewOffset1 % 64 == 0);
+  if (UseST64) {
+    NewOffset0 /= 64;
+    NewOffset1 /= 64;
+    Opc = (EltSize == 4) ? AMDGPU::DS_WRITE2ST64_B32 : AMDGPU::DS_WRITE2ST64_B64;
+  }
+
+  assert((isUInt<8>(NewOffset0) && isUInt<8>(NewOffset1)) &&
+         (NewOffset0 != NewOffset1) &&
+         "Computed offset doesn't fit");
+
+  const MCInstrDesc &Write2Desc = TII->get(Opc);
+  DebugLoc DL = I->getDebugLoc();
+
+  // repairLiveintervalsInRange() doesn't handle physical register, so we have
+  // to update the M0 range manually.
+  SlotIndex PairedIndex = LIS->getInstructionIndex(Paired);
+  LiveRange &M0Range = LIS->getRegUnit(*MCRegUnitIterator(AMDGPU::M0, TRI));
+  LiveRange::Segment *M0Segment = M0Range.getSegmentContaining(PairedIndex);
+  bool UpdateM0Range = M0Segment->end == PairedIndex.getRegSlot();
+
+  MachineInstrBuilder Write2
+    = BuildMI(*MBB, I, DL, Write2Desc)
+    .addOperand(*Addr) // addr
+    .addOperand(*Data0) // data0
+    .addOperand(*Data1) // data1
+    .addImm(NewOffset0) // offset0
+    .addImm(NewOffset1) // offset1
+    .addImm(0) // gds
+    .addMemOperand(*I->memoperands_begin())
+    .addMemOperand(*Paired->memoperands_begin());
+
+  // XXX - How do we express subregisters here?
+  unsigned OrigRegs[] = { Data0->getReg(), Data1->getReg(), Addr->getReg() };
+
+  LIS->RemoveMachineInstrFromMaps(I);
+  LIS->RemoveMachineInstrFromMaps(Paired);
+  I->eraseFromParent();
+  Paired->eraseFromParent();
+
+  // This doesn't handle physical registers like M0
+  LIS->repairIntervalsInRange(MBB, Write2, Write2, OrigRegs);
+
+  if (UpdateM0Range) {
+    SlotIndex Write2Index = LIS->getInstructionIndex(Write2);
+    M0Segment->end = Write2Index.getRegSlot();
+  }
+
+  DEBUG(dbgs() << "Inserted write2 inst: " << *Write2 << '\n');
+  return Write2.getInstr();
+}
+
+// Scan through looking for adjacent LDS operations with constant offsets from
+// the same base register. We rely on the scheduler to do the hard work of
+// clustering nearby loads, and assume these are all adjacent.
+bool SILoadStoreOptimizer::optimizeBlock(MachineBasicBlock &MBB) {
+  bool Modified = false;
+
+  for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) {
+    MachineInstr &MI = *I;
+
+    // Don't combine if volatile.
+    if (MI.hasOrderedMemoryRef()) {
+      ++I;
+      continue;
+    }
+
+    unsigned Opc = MI.getOpcode();
+    if (Opc == AMDGPU::DS_READ_B32 || Opc == AMDGPU::DS_READ_B64) {
+      unsigned Size = (Opc == AMDGPU::DS_READ_B64) ? 8 : 4;
+      MachineBasicBlock::iterator Match = findMatchingDSInst(I, Size);
+      if (Match != E) {
+        Modified = true;
+        I = mergeRead2Pair(I, Match, Size);
+      } else {
+        ++I;
+      }
+
+      continue;
+    } else if (Opc == AMDGPU::DS_WRITE_B32 || Opc == AMDGPU::DS_WRITE_B64) {
+      unsigned Size = (Opc == AMDGPU::DS_WRITE_B64) ? 8 : 4;
+      MachineBasicBlock::iterator Match = findMatchingDSInst(I, Size);
+      if (Match != E) {
+        Modified = true;
+        I = mergeWrite2Pair(I, Match, Size);
+      } else {
+        ++I;
+      }
+
+      continue;
+    }
+
+    ++I;
+  }
+
+  return Modified;
+}
+
+bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) {
+  const TargetSubtargetInfo &STM = MF.getSubtarget();
+  TRI = static_cast<const SIRegisterInfo *>(STM.getRegisterInfo());
+  TII = static_cast<const SIInstrInfo *>(STM.getInstrInfo());
+  MRI = &MF.getRegInfo();
+
+  LIS = &getAnalysis<LiveIntervals>();
+
+  DEBUG(dbgs() << "Running SILoadStoreOptimizer\n");
+
+  assert(!MRI->isSSA());
+
+  bool Modified = false;
+
+  for (MachineBasicBlock &MBB : MF)
+    Modified |= optimizeBlock(MBB);
+
+  return Modified;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/contrib/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
new file mode 100644
index 0000000..c319b32
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -0,0 +1,605 @@
+//===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief This pass lowers the pseudo control flow instructions to real
+/// machine instructions.
+///
+/// All control flow is handled using predicated instructions and
+/// a predicate stack.  Each Scalar ALU controls the operations of 64 Vector
+/// ALUs.  The Scalar ALU can update the predicate for any of the Vector ALUs
+/// by writting to the 64-bit EXEC register (each bit corresponds to a
+/// single vector ALU).  Typically, for predicates, a vector ALU will write
+/// to its bit of the VCC register (like EXEC VCC is 64-bits, one for each
+/// Vector ALU) and then the ScalarALU will AND the VCC register with the
+/// EXEC to update the predicates.
+///
+/// For example:
+/// %VCC = V_CMP_GT_F32 %VGPR1, %VGPR2
+/// %SGPR0 = SI_IF %VCC
+///   %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0
+/// %SGPR0 = SI_ELSE %SGPR0
+///   %VGPR0 = V_SUB_F32 %VGPR0, %VGPR0
+/// SI_END_CF %SGPR0
+///
+/// becomes:
+///
+/// %SGPR0 = S_AND_SAVEEXEC_B64 %VCC  // Save and update the exec mask
+/// %SGPR0 = S_XOR_B64 %SGPR0, %EXEC  // Clear live bits from saved exec mask
+/// S_CBRANCH_EXECZ label0            // This instruction is an optional
+///                                   // optimization which allows us to
+///                                   // branch if all the bits of
+///                                   // EXEC are zero.
+/// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 // Do the IF block of the branch
+///
+/// label0:
+/// %SGPR0 = S_OR_SAVEEXEC_B64 %EXEC   // Restore the exec mask for the Then block
+/// %EXEC = S_XOR_B64 %SGPR0, %EXEC    // Clear live bits from saved exec mask
+/// S_BRANCH_EXECZ label1              // Use our branch optimization
+///                                    // instruction again.
+/// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR   // Do the THEN block
+/// label1:
+/// %EXEC = S_OR_B64 %EXEC, %SGPR0     // Re-enable saved exec mask bits
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Constants.h"
+
+using namespace llvm;
+
+namespace {
+
+class SILowerControlFlowPass : public MachineFunctionPass {
+
+private:
+  static const unsigned SkipThreshold = 12;
+
+  static char ID;
+  const SIRegisterInfo *TRI;
+  const SIInstrInfo *TII;
+
+  bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
+
+  void Skip(MachineInstr &From, MachineOperand &To);
+  void SkipIfDead(MachineInstr &MI);
+
+  void If(MachineInstr &MI);
+  void Else(MachineInstr &MI);
+  void Break(MachineInstr &MI);
+  void IfBreak(MachineInstr &MI);
+  void ElseBreak(MachineInstr &MI);
+  void Loop(MachineInstr &MI);
+  void EndCf(MachineInstr &MI);
+
+  void Kill(MachineInstr &MI);
+  void Branch(MachineInstr &MI);
+
+  void LoadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset = 0);
+  void computeIndirectRegAndOffset(unsigned VecReg, unsigned &Reg, int &Offset);
+  void IndirectSrc(MachineInstr &MI);
+  void IndirectDst(MachineInstr &MI);
+
+public:
+  SILowerControlFlowPass(TargetMachine &tm) :
+    MachineFunctionPass(ID), TRI(nullptr), TII(nullptr) { }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  const char *getPassName() const override {
+    return "SI Lower control flow instructions";
+  }
+
+};
+
+} // End anonymous namespace
+
+char SILowerControlFlowPass::ID = 0;
+
+FunctionPass *llvm::createSILowerControlFlowPass(TargetMachine &tm) {
+  return new SILowerControlFlowPass(tm);
+}
+
+bool SILowerControlFlowPass::shouldSkip(MachineBasicBlock *From,
+                                        MachineBasicBlock *To) {
+
+  unsigned NumInstr = 0;
+
+  for (MachineBasicBlock *MBB = From; MBB != To && !MBB->succ_empty();
+       MBB = *MBB->succ_begin()) {
+
+    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+         NumInstr < SkipThreshold && I != E; ++I) {
+
+      if (I->isBundle() || !I->isBundled())
+        if (++NumInstr >= SkipThreshold)
+          return true;
+    }
+  }
+
+  return false;
+}
+
+void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) {
+
+  if (!shouldSkip(*From.getParent()->succ_begin(), To.getMBB()))
+    return;
+
+  DebugLoc DL = From.getDebugLoc();
+  BuildMI(*From.getParent(), &From, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
+          .addOperand(To)
+          .addReg(AMDGPU::EXEC);
+}
+
+void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) {
+
+  MachineBasicBlock &MBB = *MI.getParent();
+  DebugLoc DL = MI.getDebugLoc();
+
+  if (MBB.getParent()->getInfo<SIMachineFunctionInfo>()->getShaderType() !=
+      ShaderType::PIXEL ||
+      !shouldSkip(&MBB, &MBB.getParent()->back()))
+    return;
+
+  MachineBasicBlock::iterator Insert = &MI;
+  ++Insert;
+
+  // If the exec mask is non-zero, skip the next two instructions
+  BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+          .addImm(3)
+          .addReg(AMDGPU::EXEC);
+
+  // Exec mask is zero: Export to NULL target...
+  BuildMI(MBB, Insert, DL, TII->get(AMDGPU::EXP))
+          .addImm(0)
+          .addImm(0x09) // V_008DFC_SQ_EXP_NULL
+          .addImm(0)
+          .addImm(1)
+          .addImm(1)
+          .addReg(AMDGPU::VGPR0)
+          .addReg(AMDGPU::VGPR0)
+          .addReg(AMDGPU::VGPR0)
+          .addReg(AMDGPU::VGPR0);
+
+  // ... and terminate wavefront
+  BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM));
+}
+
+void SILowerControlFlowPass::If(MachineInstr &MI) {
+  MachineBasicBlock &MBB = *MI.getParent();
+  DebugLoc DL = MI.getDebugLoc();
+  unsigned Reg = MI.getOperand(0).getReg();
+  unsigned Vcc = MI.getOperand(1).getReg();
+
+  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), Reg)
+          .addReg(Vcc);
+
+  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), Reg)
+          .addReg(AMDGPU::EXEC)
+          .addReg(Reg);
+
+  Skip(MI, MI.getOperand(2));
+
+  MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::Else(MachineInstr &MI) {
+  MachineBasicBlock &MBB = *MI.getParent();
+  DebugLoc DL = MI.getDebugLoc();
+  unsigned Dst = MI.getOperand(0).getReg();
+  unsigned Src = MI.getOperand(1).getReg();
+
+  BuildMI(MBB, MBB.getFirstNonPHI(), DL,
+          TII->get(AMDGPU::S_OR_SAVEEXEC_B64), Dst)
+          .addReg(Src); // Saved EXEC
+
+  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
+          .addReg(AMDGPU::EXEC)
+          .addReg(Dst);
+
+  Skip(MI, MI.getOperand(2));
+
+  MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::Break(MachineInstr &MI) {
+  MachineBasicBlock &MBB = *MI.getParent();
+  DebugLoc DL = MI.getDebugLoc();
+
+  unsigned Dst = MI.getOperand(0).getReg();
+  unsigned Src = MI.getOperand(1).getReg();
+ 
+  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
+          .addReg(AMDGPU::EXEC)
+          .addReg(Src);
+
+  MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::IfBreak(MachineInstr &MI) {
+  MachineBasicBlock &MBB = *MI.getParent();
+  DebugLoc DL = MI.getDebugLoc();
+
+  unsigned Dst = MI.getOperand(0).getReg();
+  unsigned Vcc = MI.getOperand(1).getReg();
+  unsigned Src = MI.getOperand(2).getReg();
+ 
+  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
+          .addReg(Vcc)
+          .addReg(Src);
+
+  MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::ElseBreak(MachineInstr &MI) {
+  MachineBasicBlock &MBB = *MI.getParent();
+  DebugLoc DL = MI.getDebugLoc();
+
+  unsigned Dst = MI.getOperand(0).getReg();
+  unsigned Saved = MI.getOperand(1).getReg();
+  unsigned Src = MI.getOperand(2).getReg();
+ 
+  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
+          .addReg(Saved)
+          .addReg(Src);
+
+  MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::Loop(MachineInstr &MI) {
+  MachineBasicBlock &MBB = *MI.getParent();
+  DebugLoc DL = MI.getDebugLoc();
+  unsigned Src = MI.getOperand(0).getReg();
+
+  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ANDN2_B64), AMDGPU::EXEC)
+          .addReg(AMDGPU::EXEC)
+          .addReg(Src);
+
+  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+          .addOperand(MI.getOperand(1))
+          .addReg(AMDGPU::EXEC);
+
+  MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::EndCf(MachineInstr &MI) {
+  MachineBasicBlock &MBB = *MI.getParent();
+  DebugLoc DL = MI.getDebugLoc();
+  unsigned Reg = MI.getOperand(0).getReg();
+
+  BuildMI(MBB, MBB.getFirstNonPHI(), DL,
+          TII->get(AMDGPU::S_OR_B64), AMDGPU::EXEC)
+          .addReg(AMDGPU::EXEC)
+          .addReg(Reg);
+
+  MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::Branch(MachineInstr &MI) {
+  if (MI.getOperand(0).getMBB() == MI.getParent()->getNextNode())
+    MI.eraseFromParent();
+
+  // If these aren't equal, this is probably an infinite loop.
+}
+
+void SILowerControlFlowPass::Kill(MachineInstr &MI) {
+  MachineBasicBlock &MBB = *MI.getParent();
+  DebugLoc DL = MI.getDebugLoc();
+  const MachineOperand &Op = MI.getOperand(0);
+
+#ifndef NDEBUG
+  const SIMachineFunctionInfo *MFI
+    = MBB.getParent()->getInfo<SIMachineFunctionInfo>();
+  // Kill is only allowed in pixel / geometry shaders.
+  assert(MFI->getShaderType() == ShaderType::PIXEL ||
+         MFI->getShaderType() == ShaderType::GEOMETRY);
+#endif
+
+  // Clear this thread from the exec mask if the operand is negative
+  if ((Op.isImm())) {
+    // Constant operand: Set exec mask to 0 or do nothing
+    if (Op.getImm() & 0x80000000) {
+      BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
+              .addImm(0);
+    }
+  } else {
+    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC)
+           .addImm(0)
+           .addOperand(Op);
+  }
+
+  MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset) {
+
+  MachineBasicBlock &MBB = *MI.getParent();
+  DebugLoc DL = MI.getDebugLoc();
+  MachineBasicBlock::iterator I = MI;
+
+  unsigned Save = MI.getOperand(1).getReg();
+  unsigned Idx = MI.getOperand(3).getReg();
+
+  if (AMDGPU::SReg_32RegClass.contains(Idx)) {
+    if (Offset) {
+      BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
+              .addReg(Idx)
+              .addImm(Offset);
+    } else {
+      BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+              .addReg(Idx);
+    }
+    MBB.insert(I, MovRel);
+  } else {
+
+    assert(AMDGPU::SReg_64RegClass.contains(Save));
+    assert(AMDGPU::VGPR_32RegClass.contains(Idx));
+
+    // Save the EXEC mask
+    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save)
+            .addReg(AMDGPU::EXEC);
+
+    // Read the next variant into VCC (lower 32 bits) <- also loop target
+    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
+            AMDGPU::VCC_LO)
+            .addReg(Idx);
+
+    // Move index from VCC into M0
+    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+            .addReg(AMDGPU::VCC_LO);
+
+    // Compare the just read M0 value to all possible Idx values
+    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC)
+            .addReg(AMDGPU::M0)
+            .addReg(Idx);
+
+    // Update EXEC, save the original EXEC value to VCC
+    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
+            .addReg(AMDGPU::VCC);
+
+    if (Offset) {
+      BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
+              .addReg(AMDGPU::M0)
+              .addImm(Offset);
+    }
+    // Do the actual move
+    MBB.insert(I, MovRel);
+
+    // Update EXEC, switch all done bits to 0 and all todo bits to 1
+    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
+            .addReg(AMDGPU::EXEC)
+            .addReg(AMDGPU::VCC);
+
+    // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover
+    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+            .addImm(-7)
+            .addReg(AMDGPU::EXEC);
+
+    // Restore EXEC
+    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
+            .addReg(Save);
+
+  }
+  MI.eraseFromParent();
+}
+
+/// \param @VecReg The register which holds element zero of the vector
+///                 being addressed into.
+/// \param[out] @Reg The base register to use in the indirect addressing instruction.
+/// \param[in,out] @Offset As an input, this is the constant offset part of the
+//                         indirect Index. e.g. v0 = v[VecReg + Offset]
+//                         As an output, this is a constant value that needs
+//                         to be added to the value stored in M0.
+void SILowerControlFlowPass::computeIndirectRegAndOffset(unsigned VecReg,
+                                                         unsigned &Reg,
+                                                         int &Offset) {
+  unsigned SubReg = TRI->getSubReg(VecReg, AMDGPU::sub0);
+  if (!SubReg)
+    SubReg = VecReg;
+
+  const TargetRegisterClass *RC = TRI->getPhysRegClass(SubReg);
+  int RegIdx = TRI->getHWRegIndex(SubReg) + Offset;
+
+  if (RegIdx < 0) {
+    Offset = RegIdx;
+    RegIdx = 0;
+  } else {
+    Offset = 0;
+  }
+
+  Reg = RC->getRegister(RegIdx);
+}
+
+void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) {
+
+  MachineBasicBlock &MBB = *MI.getParent();
+  DebugLoc DL = MI.getDebugLoc();
+
+  unsigned Dst = MI.getOperand(0).getReg();
+  unsigned Vec = MI.getOperand(2).getReg();
+  int Off = MI.getOperand(4).getImm();
+  unsigned Reg;
+
+  computeIndirectRegAndOffset(Vec, Reg, Off);
+
+  MachineInstr *MovRel =
+    BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
+            .addReg(Reg)
+            .addReg(AMDGPU::M0, RegState::Implicit)
+            .addReg(Vec, RegState::Implicit);
+
+  LoadM0(MI, MovRel, Off);
+}
+
+void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
+
+  MachineBasicBlock &MBB = *MI.getParent();
+  DebugLoc DL = MI.getDebugLoc();
+
+  unsigned Dst = MI.getOperand(0).getReg();
+  int Off = MI.getOperand(4).getImm();
+  unsigned Val = MI.getOperand(5).getReg();
+  unsigned Reg;
+
+  computeIndirectRegAndOffset(Dst, Reg, Off);
+
+  MachineInstr *MovRel = 
+    BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32))
+            .addReg(Reg, RegState::Define)
+            .addReg(Val)
+            .addReg(AMDGPU::M0, RegState::Implicit)
+            .addReg(Dst, RegState::Implicit);
+
+  LoadM0(MI, MovRel, Off);
+}
+
+bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
+  TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
+  TRI =
+      static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+  bool HaveKill = false;
+  bool NeedWQM = false;
+  bool NeedFlat = false;
+  unsigned Depth = 0;
+
+  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+       BI != BE; ++BI) {
+
+    MachineBasicBlock &MBB = *BI;
+    MachineBasicBlock::iterator I, Next;
+    for (I = MBB.begin(); I != MBB.end(); I = Next) {
+      Next = std::next(I);
+
+      MachineInstr &MI = *I;
+      if (TII->isWQM(MI.getOpcode()) || TII->isDS(MI.getOpcode()))
+        NeedWQM = true;
+
+      // Flat uses m0 in case it needs to access LDS.
+      if (TII->isFLAT(MI.getOpcode()))
+        NeedFlat = true;
+
+      switch (MI.getOpcode()) {
+        default: break;
+        case AMDGPU::SI_IF:
+          ++Depth;
+          If(MI);
+          break;
+
+        case AMDGPU::SI_ELSE:
+          Else(MI);
+          break;
+
+        case AMDGPU::SI_BREAK:
+          Break(MI);
+          break;
+
+        case AMDGPU::SI_IF_BREAK:
+          IfBreak(MI);
+          break;
+
+        case AMDGPU::SI_ELSE_BREAK:
+          ElseBreak(MI);
+          break;
+
+        case AMDGPU::SI_LOOP:
+          ++Depth;
+          Loop(MI);
+          break;
+
+        case AMDGPU::SI_END_CF:
+          if (--Depth == 0 && HaveKill) {
+            SkipIfDead(MI);
+            HaveKill = false;
+          }
+          EndCf(MI);
+          break;
+
+        case AMDGPU::SI_KILL:
+          if (Depth == 0)
+            SkipIfDead(MI);
+          else
+            HaveKill = true;
+          Kill(MI);
+          break;
+
+        case AMDGPU::S_BRANCH:
+          Branch(MI);
+          break;
+
+        case AMDGPU::SI_INDIRECT_SRC:
+          IndirectSrc(MI);
+          break;
+
+        case AMDGPU::SI_INDIRECT_DST_V1:
+        case AMDGPU::SI_INDIRECT_DST_V2:
+        case AMDGPU::SI_INDIRECT_DST_V4:
+        case AMDGPU::SI_INDIRECT_DST_V8:
+        case AMDGPU::SI_INDIRECT_DST_V16:
+          IndirectDst(MI);
+          break;
+      }
+    }
+  }
+
+  if (NeedWQM && MFI->getShaderType() == ShaderType::PIXEL) {
+    MachineBasicBlock &MBB = MF.front();
+    BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
+            AMDGPU::EXEC).addReg(AMDGPU::EXEC);
+  }
+
+  // FIXME: This seems inappropriate to do here.
+  if (NeedFlat && MFI->IsKernel) {
+    // Insert the prologue initializing the SGPRs pointing to the scratch space
+    // for flat accesses.
+    const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
+
+    // TODO: What to use with function calls?
+
+    // FIXME: This is reporting stack size that is used in a scratch buffer
+    // rather than registers as well.
+    uint64_t StackSizeBytes = FrameInfo->getStackSize();
+
+    int IndirectBegin
+      = static_cast<const AMDGPUInstrInfo*>(TII)->getIndirectIndexBegin(MF);
+    // Convert register index to 256-byte unit.
+    uint64_t StackOffset = IndirectBegin < 0 ? 0 : (4 * IndirectBegin / 256);
+
+    assert((StackSizeBytes < 0xffff) && StackOffset < 0xffff &&
+           "Stack limits should be smaller than 16-bits");
+
+    // Initialize the flat scratch register pair.
+    // TODO: Can we use one s_mov_b64 here?
+
+    // Offset is in units of 256-bytes.
+    MachineBasicBlock &MBB = MF.front();
+    DebugLoc NoDL;
+    MachineBasicBlock::iterator Start = MBB.getFirstNonPHI();
+    const MCInstrDesc &SMovK = TII->get(AMDGPU::S_MOVK_I32);
+
+    assert(isInt<16>(StackOffset) && isInt<16>(StackSizeBytes));
+
+    BuildMI(MBB, Start, NoDL, SMovK, AMDGPU::FLAT_SCR_LO)
+      .addImm(StackOffset);
+
+    // Documentation says size is "per-thread scratch size in bytes"
+    BuildMI(MBB, Start, NoDL, SMovK, AMDGPU::FLAT_SCR_HI)
+      .addImm(StackSizeBytes);
+  }
+
+  return true;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/contrib/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
new file mode 100644
index 0000000..67421e2
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -0,0 +1,151 @@
+//===-- SILowerI1Copies.cpp - Lower I1 Copies -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// i1 values are usually inserted by the CFG Structurize pass and they are
+/// unique in that they can be copied from VALU to SALU registers.
+/// This is not possible for any other value type.  Since there are no
+/// MOV instructions for i1, we to use V_CMP_* and V_CNDMASK to move the i1.
+///
+//===----------------------------------------------------------------------===//
+//
+
+#define DEBUG_TYPE "si-i1-copies"
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+namespace {
+
+class SILowerI1Copies : public MachineFunctionPass {
+public:
+  static char ID;
+
+public:
+  SILowerI1Copies() : MachineFunctionPass(ID) {
+    initializeSILowerI1CopiesPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  const char *getPassName() const override {
+    return "SI Lower i1 Copies";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<MachineDominatorTree>();
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS_BEGIN(SILowerI1Copies, DEBUG_TYPE,
+                      "SI Lower i1 Copies", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(SILowerI1Copies, DEBUG_TYPE,
+                    "SI Lower i1 Copies", false, false)
+
+char SILowerI1Copies::ID = 0;
+
+char &llvm::SILowerI1CopiesID = SILowerI1Copies::ID;
+
+FunctionPass *llvm::createSILowerI1CopiesPass() {
+  return new SILowerI1Copies();
+}
+
+bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) {
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const SIInstrInfo *TII =
+      static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  std::vector<unsigned> I1Defs;
+
+  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+                                                  BI != BE; ++BI) {
+
+    MachineBasicBlock &MBB = *BI;
+    MachineBasicBlock::iterator I, Next;
+    for (I = MBB.begin(); I != MBB.end(); I = Next) {
+      Next = std::next(I);
+      MachineInstr &MI = *I;
+
+      if (MI.getOpcode() == AMDGPU::IMPLICIT_DEF) {
+        unsigned Reg = MI.getOperand(0).getReg();
+        const TargetRegisterClass *RC = MRI.getRegClass(Reg);
+        if (RC == &AMDGPU::VReg_1RegClass)
+          MRI.setRegClass(Reg, &AMDGPU::SReg_64RegClass);
+        continue;
+      }
+
+      if (MI.getOpcode() != AMDGPU::COPY)
+        continue;
+
+      const MachineOperand &Dst = MI.getOperand(0);
+      const MachineOperand &Src = MI.getOperand(1);
+
+      if (!TargetRegisterInfo::isVirtualRegister(Src.getReg()) ||
+          !TargetRegisterInfo::isVirtualRegister(Dst.getReg()))
+        continue;
+
+      const TargetRegisterClass *DstRC = MRI.getRegClass(Dst.getReg());
+      const TargetRegisterClass *SrcRC = MRI.getRegClass(Src.getReg());
+
+      if (DstRC == &AMDGPU::VReg_1RegClass &&
+          TRI->getCommonSubClass(SrcRC, &AMDGPU::SGPR_64RegClass)) {
+        I1Defs.push_back(Dst.getReg());
+        DebugLoc DL = MI.getDebugLoc();
+
+        MachineInstr *DefInst = MRI.getUniqueVRegDef(Src.getReg());
+        if (DefInst->getOpcode() == AMDGPU::S_MOV_B64) {
+          if (DefInst->getOperand(1).isImm()) {
+            I1Defs.push_back(Dst.getReg());
+
+            int64_t Val = DefInst->getOperand(1).getImm();
+            assert(Val == 0 || Val == -1);
+
+            BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_MOV_B32_e32))
+              .addOperand(Dst)
+              .addImm(Val);
+            MI.eraseFromParent();
+            continue;
+          }
+        }
+
+        BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64))
+          .addOperand(Dst)
+          .addImm(0)
+          .addImm(-1)
+          .addOperand(Src);
+        MI.eraseFromParent();
+      } else if (TRI->getCommonSubClass(DstRC, &AMDGPU::SGPR_64RegClass) &&
+                 SrcRC == &AMDGPU::VReg_1RegClass) {
+        BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CMP_NE_I32_e64))
+          .addOperand(Dst)
+          .addOperand(Src)
+          .addImm(0);
+        MI.eraseFromParent();
+      }
+    }
+  }
+
+  for (unsigned Reg : I1Defs)
+    MRI.setRegClass(Reg, &AMDGPU::VGPR_32RegClass);
+
+  return false;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
new file mode 100644
index 0000000..587ea63
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -0,0 +1,77 @@
+//===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+
+#include "SIMachineFunctionInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+
+#define MAX_LANES 64
+
+using namespace llvm;
+
+
+// Pin the vtable to this file.
+void SIMachineFunctionInfo::anchor() {}
+
+SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
+  : AMDGPUMachineFunction(MF),
+    TIDReg(AMDGPU::NoRegister),
+    HasSpilledVGPRs(false),
+    PSInputAddr(0),
+    NumUserSGPRs(0),
+    LDSWaveSpillSize(0) { }
+
+SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
+                                                       MachineFunction *MF,
+                                                       unsigned FrameIndex,
+                                                       unsigned SubIdx) {
+  const MachineFrameInfo *FrameInfo = MF->getFrameInfo();
+  const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
+      MF->getSubtarget<AMDGPUSubtarget>().getRegisterInfo());
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  int64_t Offset = FrameInfo->getObjectOffset(FrameIndex);
+  Offset += SubIdx * 4;
+
+  unsigned LaneVGPRIdx = Offset / (64 * 4);
+  unsigned Lane = (Offset / 4) % 64;
+
+  struct SpilledReg Spill;
+
+  if (!LaneVGPRs.count(LaneVGPRIdx)) {
+    unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);
+    LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
+    MRI.setPhysRegUsed(LaneVGPR);
+
+    // Add this register as live-in to all blocks to avoid machine verifer
+    // complaining about use of an undefined physical register.
+    for (MachineFunction::iterator BI = MF->begin(), BE = MF->end();
+         BI != BE; ++BI) {
+      BI->addLiveIn(LaneVGPR);
+    }
+  }
+
+  Spill.VGPR = LaneVGPRs[LaneVGPRIdx];
+  Spill.Lane = Lane;
+  return Spill;
+}
+
+unsigned SIMachineFunctionInfo::getMaximumWorkGroupSize(
+                                              const MachineFunction &MF) const {
+  const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+  // FIXME: We should get this information from kernel attributes if it
+  // is available.
+  return getShaderType() == ShaderType::COMPUTE ? 256 : ST.getWavefrontSize();
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
new file mode 100644
index 0000000..667da4c
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -0,0 +1,66 @@
+//===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_LIB_TARGET_R600_SIMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_R600_SIMACHINEFUNCTIONINFO_H
+
+#include "AMDGPUMachineFunction.h"
+#include "SIRegisterInfo.h"
+#include <map>
+
+namespace llvm {
+
+class MachineRegisterInfo;
+
+/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
+/// tells the hardware which interpolation parameters to load.
+class SIMachineFunctionInfo : public AMDGPUMachineFunction {
+  void anchor() override;
+
+  unsigned TIDReg;
+  bool HasSpilledVGPRs;
+
+public:
+
+  struct SpilledReg {
+    unsigned VGPR;
+    int Lane;
+    SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { }
+    SpilledReg() : VGPR(0), Lane(-1) { }
+    bool hasLane() { return Lane != -1;}
+  };
+
+  // SIMachineFunctionInfo definition
+
+  SIMachineFunctionInfo(const MachineFunction &MF);
+  SpilledReg getSpilledReg(MachineFunction *MF, unsigned FrameIndex,
+                           unsigned SubIdx);
+  unsigned PSInputAddr;
+  unsigned NumUserSGPRs;
+  std::map<unsigned, unsigned> LaneVGPRs;
+  unsigned LDSWaveSpillSize;
+  unsigned ScratchOffsetReg;
+  bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
+  unsigned getTIDReg() const { return TIDReg; };
+  void setTIDReg(unsigned Reg) { TIDReg = Reg; }
+  bool hasSpilledVGPRs() const { return HasSpilledVGPRs; }
+  void setHasSpilledVGPRs(bool Spill = true) { HasSpilledVGPRs = Spill; }
+
+  unsigned getMaximumWorkGroupSize(const MachineFunction &MF) const;
+};
+
+} // End namespace llvm
+
+
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp b/contrib/llvm/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp
new file mode 100644
index 0000000..0a7f684
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp
@@ -0,0 +1,194 @@
+//===-- SIPrepareScratchRegs.cpp - Use predicates for control flow --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+///
+/// This pass loads scratch pointer and scratch offset into a register or a
+/// frame index which can be used anywhere in the program.  These values will
+/// be used for spilling VGPRs.
+///
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIDefines.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+
+using namespace llvm;
+
+namespace {
+
+class SIPrepareScratchRegs : public MachineFunctionPass {
+
+private:
+  static char ID;
+
+public:
+  SIPrepareScratchRegs() : MachineFunctionPass(ID) { }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  const char *getPassName() const override {
+    return "SI prepare scratch registers";
+  }
+
+};
+
+} // End anonymous namespace
+
+char SIPrepareScratchRegs::ID = 0;
+
+FunctionPass *llvm::createSIPrepareScratchRegs() {
+  return new SIPrepareScratchRegs();
+}
+
+bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
+  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  const SIInstrInfo *TII =
+      static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
+  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  MachineFrameInfo *FrameInfo = MF.getFrameInfo();
+  MachineBasicBlock *Entry = MF.begin();
+  MachineBasicBlock::iterator I = Entry->begin();
+  DebugLoc DL = I->getDebugLoc();
+
+  // FIXME: If we don't have enough VGPRs for SGPR spilling we will need to
+  // run this pass.
+  if (!MFI->hasSpilledVGPRs())
+    return false;
+
+  unsigned ScratchPtrPreloadReg =
+      TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR);
+  unsigned ScratchOffsetPreloadReg =
+      TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);
+
+  if (!Entry->isLiveIn(ScratchPtrPreloadReg))
+    Entry->addLiveIn(ScratchPtrPreloadReg);
+
+  if (!Entry->isLiveIn(ScratchOffsetPreloadReg))
+    Entry->addLiveIn(ScratchOffsetPreloadReg);
+
+  // Load the scratch offset.
+  unsigned ScratchOffsetReg =
+      TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_32RegClass);
+  int ScratchOffsetFI = -1;
+
+  if (ScratchOffsetReg != AMDGPU::NoRegister) {
+    // Found an SGPR to use
+    MRI.setPhysRegUsed(ScratchOffsetReg);
+    BuildMI(*Entry, I, DL, TII->get(AMDGPU::S_MOV_B32), ScratchOffsetReg)
+            .addReg(ScratchOffsetPreloadReg);
+  } else {
+    // No SGPR is available, we must spill.
+    ScratchOffsetFI = FrameInfo->CreateSpillStackObject(4,4);
+    BuildMI(*Entry, I, DL, TII->get(AMDGPU::SI_SPILL_S32_SAVE))
+            .addReg(ScratchOffsetPreloadReg)
+            .addFrameIndex(ScratchOffsetFI)
+            .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
+            .addReg(AMDGPU::SGPR0, RegState::Undef);
+  }
+
+
+  // Now that we have the scratch pointer and offset values, we need to
+  // add them to all the SI_SPILL_V* instructions.
+
+  RegScavenger RS;
+  unsigned ScratchRsrcFI = FrameInfo->CreateSpillStackObject(16, 4);
+  RS.addScavengingFrameIndex(ScratchRsrcFI);
+
+  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+       BI != BE; ++BI) {
+
+    MachineBasicBlock &MBB = *BI;
+    // Add the scratch offset reg as a live-in so that the register scavenger
+    // doesn't re-use it.
+    if (!MBB.isLiveIn(ScratchOffsetReg) &&
+        ScratchOffsetReg != AMDGPU::NoRegister)
+      MBB.addLiveIn(ScratchOffsetReg);
+    RS.enterBasicBlock(&MBB);
+
+    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+         I != E; ++I) {
+      MachineInstr &MI = *I;
+      RS.forward(I);
+      DebugLoc DL = MI.getDebugLoc();
+      if (!TII->isVGPRSpill(MI.getOpcode()))
+        continue;
+
+      // Scratch resource
+      unsigned ScratchRsrcReg =
+          RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0);
+
+      uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
+                      0xffffffff; // Size
+
+      unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
+      unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
+      unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
+      unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
+
+      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc0)
+              .addExternalSymbol("SCRATCH_RSRC_DWORD0")
+              .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc1)
+              .addExternalSymbol("SCRATCH_RSRC_DWORD1")
+              .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2)
+              .addImm(Rsrc & 0xffffffff)
+              .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3)
+              .addImm(Rsrc >> 32)
+              .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+      // Scratch Offset
+      if (ScratchOffsetReg == AMDGPU::NoRegister) {
+        ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
+        BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE),
+                ScratchOffsetReg)
+                .addFrameIndex(ScratchOffsetFI)
+                .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
+                .addReg(AMDGPU::SGPR0, RegState::Undef);
+      } else if (!MBB.isLiveIn(ScratchOffsetReg)) {
+        MBB.addLiveIn(ScratchOffsetReg);
+      }
+
+      if (ScratchRsrcReg == AMDGPU::NoRegister ||
+          ScratchOffsetReg == AMDGPU::NoRegister) {
+        LLVMContext &Ctx = MF.getFunction()->getContext();
+        Ctx.emitError("ran out of SGPRs for spilling VGPRs");
+        ScratchRsrcReg = AMDGPU::SGPR0;
+        ScratchOffsetReg = AMDGPU::SGPR0;
+      }
+      MI.getOperand(2).setReg(ScratchRsrcReg);
+      MI.getOperand(2).setIsKill(true);
+      MI.getOperand(2).setIsUndef(false);
+      MI.getOperand(3).setReg(ScratchOffsetReg);
+      MI.getOperand(3).setIsUndef(false);
+      MI.getOperand(3).setIsKill(false);
+      MI.addOperand(MachineOperand::CreateReg(Rsrc0, false, true, true));
+      MI.addOperand(MachineOperand::CreateReg(Rsrc1, false, true, true));
+      MI.addOperand(MachineOperand::CreateReg(Rsrc2, false, true, true));
+      MI.addOperand(MachineOperand::CreateReg(Rsrc3, false, true, true));
+    }
+  }
+  return true;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
new file mode 100644
index 0000000..db2ff0b
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -0,0 +1,543 @@
+//===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief SI implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "SIRegisterInfo.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+
+using namespace llvm;
+
+SIRegisterInfo::SIRegisterInfo() : AMDGPURegisterInfo() {}
+
+BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  BitVector Reserved(getNumRegs());
+  Reserved.set(AMDGPU::EXEC);
+
+  // EXEC_LO and EXEC_HI could be allocated and used as regular register,
+  // but this seems likely to result in bugs, so I'm marking them as reserved.
+  Reserved.set(AMDGPU::EXEC_LO);
+  Reserved.set(AMDGPU::EXEC_HI);
+
+  Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
+  Reserved.set(AMDGPU::FLAT_SCR);
+  Reserved.set(AMDGPU::FLAT_SCR_LO);
+  Reserved.set(AMDGPU::FLAT_SCR_HI);
+
+  // Reserve some VGPRs to use as temp registers in case we have to spill VGPRs
+  Reserved.set(AMDGPU::VGPR255);
+  Reserved.set(AMDGPU::VGPR254);
+
+  // Tonga and Iceland can only allocate a fixed number of SGPRs due
+  // to a hw bug.
+  if (MF.getSubtarget<AMDGPUSubtarget>().hasSGPRInitBug()) {
+    unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
+    // Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs).
+    // Assume XNACK_MASK is unused.
+    unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4;
+
+    for (unsigned i = Limit; i < NumSGPRs; ++i) {
+      unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
+      MCRegAliasIterator R = MCRegAliasIterator(Reg, this, true);
+
+      for (; R.isValid(); ++R)
+        Reserved.set(*R);
+    }
+  }
+
+  return Reserved;
+}
+
+unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
+                                                unsigned Idx) const {
+
+  const AMDGPUSubtarget &STI = MF.getSubtarget<AMDGPUSubtarget>();
+  // FIXME: We should adjust the max number of waves based on LDS size.
+  unsigned SGPRLimit = getNumSGPRsAllowed(STI.getGeneration(),
+                                          STI.getMaxWavesPerCU());
+  unsigned VGPRLimit = getNumVGPRsAllowed(STI.getMaxWavesPerCU());
+
+  for (regclass_iterator I = regclass_begin(), E = regclass_end();
+       I != E; ++I) {
+
+    unsigned NumSubRegs = std::max((int)(*I)->getSize() / 4, 1);
+    unsigned Limit;
+
+    if (isSGPRClass(*I)) {
+      Limit = SGPRLimit / NumSubRegs;
+    } else {
+      Limit = VGPRLimit / NumSubRegs;
+    }
+
+    const int *Sets = getRegClassPressureSets(*I);
+    assert(Sets);
+    for (unsigned i = 0; Sets[i] != -1; ++i) {
+	    if (Sets[i] == (int)Idx)
+        return Limit;
+    }
+  }
+  return 256;
+}
+
+bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
+  return Fn.getFrameInfo()->hasStackObjects();
+}
+
+static unsigned getNumSubRegsForSpillOp(unsigned Op) {
+
+  switch (Op) {
+  case AMDGPU::SI_SPILL_S512_SAVE:
+  case AMDGPU::SI_SPILL_S512_RESTORE:
+  case AMDGPU::SI_SPILL_V512_SAVE:
+  case AMDGPU::SI_SPILL_V512_RESTORE:
+    return 16;
+  case AMDGPU::SI_SPILL_S256_SAVE:
+  case AMDGPU::SI_SPILL_S256_RESTORE:
+  case AMDGPU::SI_SPILL_V256_SAVE:
+  case AMDGPU::SI_SPILL_V256_RESTORE:
+    return 8;
+  case AMDGPU::SI_SPILL_S128_SAVE:
+  case AMDGPU::SI_SPILL_S128_RESTORE:
+  case AMDGPU::SI_SPILL_V128_SAVE:
+  case AMDGPU::SI_SPILL_V128_RESTORE:
+    return 4;
+  case AMDGPU::SI_SPILL_V96_SAVE:
+  case AMDGPU::SI_SPILL_V96_RESTORE:
+    return 3;
+  case AMDGPU::SI_SPILL_S64_SAVE:
+  case AMDGPU::SI_SPILL_S64_RESTORE:
+  case AMDGPU::SI_SPILL_V64_SAVE:
+  case AMDGPU::SI_SPILL_V64_RESTORE:
+    return 2;
+  case AMDGPU::SI_SPILL_S32_SAVE:
+  case AMDGPU::SI_SPILL_S32_RESTORE:
+  case AMDGPU::SI_SPILL_V32_SAVE:
+  case AMDGPU::SI_SPILL_V32_RESTORE:
+    return 1;
+  default: llvm_unreachable("Invalid spill opcode");
+  }
+}
+
+void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
+                                           unsigned LoadStoreOp,
+                                           unsigned Value,
+                                           unsigned ScratchRsrcReg,
+                                           unsigned ScratchOffset,
+                                           int64_t Offset,
+                                           RegScavenger *RS) const {
+
+  MachineBasicBlock *MBB = MI->getParent();
+  const MachineFunction *MF = MI->getParent()->getParent();
+  const SIInstrInfo *TII =
+      static_cast<const SIInstrInfo *>(MF->getSubtarget().getInstrInfo());
+  LLVMContext &Ctx = MF->getFunction()->getContext();
+  DebugLoc DL = MI->getDebugLoc();
+  bool IsLoad = TII->get(LoadStoreOp).mayLoad();
+
+  bool RanOutOfSGPRs = false;
+  unsigned SOffset = ScratchOffset;
+
+  unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
+  unsigned Size = NumSubRegs * 4;
+
+  if (!isUInt<12>(Offset + Size)) {
+    SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);
+    if (SOffset == AMDGPU::NoRegister) {
+      RanOutOfSGPRs = true;
+      SOffset = AMDGPU::SGPR0;
+    }
+    BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
+            .addReg(ScratchOffset)
+            .addImm(Offset);
+    Offset = 0;
+  }
+
+  if (RanOutOfSGPRs)
+    Ctx.emitError("Ran out of SGPRs for spilling VGPRS");
+
+  for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += 4) {
+    unsigned SubReg = NumSubRegs > 1 ?
+        getPhysRegSubReg(Value, &AMDGPU::VGPR_32RegClass, i) :
+        Value;
+    bool IsKill = (i == e - 1);
+
+    BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
+            .addReg(SubReg, getDefRegState(IsLoad))
+            .addReg(ScratchRsrcReg, getKillRegState(IsKill))
+            .addReg(SOffset)
+            .addImm(Offset)
+            .addImm(0) // glc
+            .addImm(0) // slc
+            .addImm(0) // tfe
+            .addReg(Value, RegState::Implicit | getDefRegState(IsLoad));
+  }
+}
+
+void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
+                                        int SPAdj, unsigned FIOperandNum,
+                                        RegScavenger *RS) const {
+  MachineFunction *MF = MI->getParent()->getParent();
+  MachineBasicBlock *MBB = MI->getParent();
+  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+  MachineFrameInfo *FrameInfo = MF->getFrameInfo();
+  const SIInstrInfo *TII =
+      static_cast<const SIInstrInfo *>(MF->getSubtarget().getInstrInfo());
+  DebugLoc DL = MI->getDebugLoc();
+
+  MachineOperand &FIOp = MI->getOperand(FIOperandNum);
+  int Index = MI->getOperand(FIOperandNum).getIndex();
+
+  switch (MI->getOpcode()) {
+    // SGPR register spill
+    case AMDGPU::SI_SPILL_S512_SAVE:
+    case AMDGPU::SI_SPILL_S256_SAVE:
+    case AMDGPU::SI_SPILL_S128_SAVE:
+    case AMDGPU::SI_SPILL_S64_SAVE:
+    case AMDGPU::SI_SPILL_S32_SAVE: {
+      unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
+
+      for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
+        unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(),
+                                           &AMDGPU::SGPR_32RegClass, i);
+        struct SIMachineFunctionInfo::SpilledReg Spill =
+            MFI->getSpilledReg(MF, Index, i);
+
+        if (Spill.VGPR == AMDGPU::NoRegister) {
+           LLVMContext &Ctx = MF->getFunction()->getContext();
+           Ctx.emitError("Ran out of VGPRs for spilling SGPR");
+        }
+
+        BuildMI(*MBB, MI, DL,
+                TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
+                Spill.VGPR)
+                .addReg(SubReg)
+                .addImm(Spill.Lane);
+
+      }
+      MI->eraseFromParent();
+      break;
+    }
+
+    // SGPR register restore
+    case AMDGPU::SI_SPILL_S512_RESTORE:
+    case AMDGPU::SI_SPILL_S256_RESTORE:
+    case AMDGPU::SI_SPILL_S128_RESTORE:
+    case AMDGPU::SI_SPILL_S64_RESTORE:
+    case AMDGPU::SI_SPILL_S32_RESTORE: {
+      unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
+
+      for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
+        unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(),
+                                           &AMDGPU::SGPR_32RegClass, i);
+        struct SIMachineFunctionInfo::SpilledReg Spill =
+            MFI->getSpilledReg(MF, Index, i);
+
+        if (Spill.VGPR == AMDGPU::NoRegister) {
+           LLVMContext &Ctx = MF->getFunction()->getContext();
+           Ctx.emitError("Ran out of VGPRs for spilling SGPR");
+        }
+
+        BuildMI(*MBB, MI, DL,
+                TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
+                SubReg)
+                .addReg(Spill.VGPR)
+                .addImm(Spill.Lane)
+                .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
+      }
+
+      // TODO: only do this when it is needed
+      switch (MF->getSubtarget<AMDGPUSubtarget>().getGeneration()) {
+      case AMDGPUSubtarget::SOUTHERN_ISLANDS:
+        // "VALU writes SGPR" -> "SMRD reads that SGPR" needs "S_NOP 3" on SI
+        TII->insertNOPs(MI, 3);
+        break;
+      case AMDGPUSubtarget::SEA_ISLANDS:
+        break;
+      default: // VOLCANIC_ISLANDS and later
+        // "VALU writes SGPR -> VMEM reads that SGPR" needs "S_NOP 4" on VI
+        // and later. This also applies to VALUs which write VCC, but we're
+        // unlikely to see VMEM use VCC.
+        TII->insertNOPs(MI, 4);
+      }
+
+      MI->eraseFromParent();
+      break;
+    }
+
+    // VGPR register spill
+    case AMDGPU::SI_SPILL_V512_SAVE:
+    case AMDGPU::SI_SPILL_V256_SAVE:
+    case AMDGPU::SI_SPILL_V128_SAVE:
+    case AMDGPU::SI_SPILL_V96_SAVE:
+    case AMDGPU::SI_SPILL_V64_SAVE:
+    case AMDGPU::SI_SPILL_V32_SAVE:
+      buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
+            TII->getNamedOperand(*MI, AMDGPU::OpName::src)->getReg(),
+            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
+            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
+             FrameInfo->getObjectOffset(Index), RS);
+      MI->eraseFromParent();
+      break;
+    case AMDGPU::SI_SPILL_V32_RESTORE:
+    case AMDGPU::SI_SPILL_V64_RESTORE:
+    case AMDGPU::SI_SPILL_V96_RESTORE:
+    case AMDGPU::SI_SPILL_V128_RESTORE:
+    case AMDGPU::SI_SPILL_V256_RESTORE:
+    case AMDGPU::SI_SPILL_V512_RESTORE: {
+      buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
+            TII->getNamedOperand(*MI, AMDGPU::OpName::dst)->getReg(),
+            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
+            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
+            FrameInfo->getObjectOffset(Index), RS);
+      MI->eraseFromParent();
+      break;
+    }
+
+    default: {
+      int64_t Offset = FrameInfo->getObjectOffset(Index);
+      FIOp.ChangeToImmediate(Offset);
+      if (!TII->isImmOperandLegal(MI, FIOperandNum, FIOp)) {
+        unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, SPAdj);
+        BuildMI(*MBB, MI, MI->getDebugLoc(),
+                TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
+                .addImm(Offset);
+        FIOp.ChangeToRegister(TmpReg, false, false, true);
+      }
+    }
+  }
+}
+
+const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass(
+                                                                   MVT VT) const {
+  switch(VT.SimpleTy) {
+    default:
+    case MVT::i32: return &AMDGPU::VGPR_32RegClass;
+  }
+}
+
+unsigned SIRegisterInfo::getHWRegIndex(unsigned Reg) const {
+  return getEncodingValue(Reg) & 0xff;
+}
+
+const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
+  assert(!TargetRegisterInfo::isVirtualRegister(Reg));
+
+  static const TargetRegisterClass *BaseClasses[] = {
+    &AMDGPU::VGPR_32RegClass,
+    &AMDGPU::SReg_32RegClass,
+    &AMDGPU::VReg_64RegClass,
+    &AMDGPU::SReg_64RegClass,
+    &AMDGPU::VReg_96RegClass,
+    &AMDGPU::VReg_128RegClass,
+    &AMDGPU::SReg_128RegClass,
+    &AMDGPU::VReg_256RegClass,
+    &AMDGPU::SReg_256RegClass,
+    &AMDGPU::VReg_512RegClass
+  };
+
+  for (const TargetRegisterClass *BaseClass : BaseClasses) {
+    if (BaseClass->contains(Reg)) {
+      return BaseClass;
+    }
+  }
+  return nullptr;
+}
+
+bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
+  return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) ||
+         getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) ||
+         getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) ||
+         getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) ||
+         getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) ||
+         getCommonSubClass(&AMDGPU::VReg_512RegClass, RC);
+}
+
+const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
+                                         const TargetRegisterClass *SRC) const {
+    if (hasVGPRs(SRC)) {
+      return SRC;
+    } else if (SRC == &AMDGPU::SCCRegRegClass) {
+      return &AMDGPU::VCCRegRegClass;
+    } else if (getCommonSubClass(SRC, &AMDGPU::SGPR_32RegClass)) {
+      return &AMDGPU::VGPR_32RegClass;
+    } else if (getCommonSubClass(SRC, &AMDGPU::SGPR_64RegClass)) {
+      return &AMDGPU::VReg_64RegClass;
+    } else if (getCommonSubClass(SRC, &AMDGPU::SReg_128RegClass)) {
+      return &AMDGPU::VReg_128RegClass;
+    } else if (getCommonSubClass(SRC, &AMDGPU::SReg_256RegClass)) {
+      return &AMDGPU::VReg_256RegClass;
+    } else if (getCommonSubClass(SRC, &AMDGPU::SReg_512RegClass)) {
+      return &AMDGPU::VReg_512RegClass;
+    }
+    return nullptr;
+}
+
+const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
+                         const TargetRegisterClass *RC, unsigned SubIdx) const {
+  if (SubIdx == AMDGPU::NoSubRegister)
+    return RC;
+
+  // If this register has a sub-register, we can safely assume it is a 32-bit
+  // register, because all of SI's sub-registers are 32-bit.
+  if (isSGPRClass(RC)) {
+    return &AMDGPU::SGPR_32RegClass;
+  } else {
+    return &AMDGPU::VGPR_32RegClass;
+  }
+}
+
+unsigned SIRegisterInfo::getPhysRegSubReg(unsigned Reg,
+                                          const TargetRegisterClass *SubRC,
+                                          unsigned Channel) const {
+
+  switch (Reg) {
+    case AMDGPU::VCC:
+      switch(Channel) {
+        case 0: return AMDGPU::VCC_LO;
+        case 1: return AMDGPU::VCC_HI;
+        default: llvm_unreachable("Invalid SubIdx for VCC");
+      }
+
+  case AMDGPU::FLAT_SCR:
+    switch (Channel) {
+    case 0:
+      return AMDGPU::FLAT_SCR_LO;
+    case 1:
+      return AMDGPU::FLAT_SCR_HI;
+    default:
+      llvm_unreachable("Invalid SubIdx for FLAT_SCR");
+    }
+    break;
+
+  case AMDGPU::EXEC:
+    switch (Channel) {
+    case 0:
+      return AMDGPU::EXEC_LO;
+    case 1:
+      return AMDGPU::EXEC_HI;
+    default:
+      llvm_unreachable("Invalid SubIdx for EXEC");
+    }
+    break;
+  }
+
+  const TargetRegisterClass *RC = getPhysRegClass(Reg);
+  // 32-bit registers don't have sub-registers, so we can just return the
+  // Reg.  We need to have this check here, because the calculation below
+  // using getHWRegIndex() will fail with special 32-bit registers like
+  // VCC_LO, VCC_HI, EXEC_LO, EXEC_HI and M0.
+  if (RC->getSize() == 4) {
+    assert(Channel == 0);
+    return Reg;
+  }
+
+  unsigned Index = getHWRegIndex(Reg);
+  return SubRC->getRegister(Index + Channel);
+}
+
+bool SIRegisterInfo::opCanUseLiteralConstant(unsigned OpType) const {
+  return OpType == AMDGPU::OPERAND_REG_IMM32;
+}
+
+bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
+  if (opCanUseLiteralConstant(OpType))
+    return true;
+
+  return OpType == AMDGPU::OPERAND_REG_INLINE_C;
+}
+
+unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
+                                           enum PreloadedValue Value) const {
+
+  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  switch (Value) {
+  case SIRegisterInfo::TGID_X:
+    return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 0);
+  case SIRegisterInfo::TGID_Y:
+    return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 1);
+  case SIRegisterInfo::TGID_Z:
+    return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 2);
+  case SIRegisterInfo::SCRATCH_WAVE_OFFSET:
+    if (MFI->getShaderType() != ShaderType::COMPUTE)
+      return MFI->ScratchOffsetReg;
+    return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 4);
+  case SIRegisterInfo::SCRATCH_PTR:
+    return AMDGPU::SGPR2_SGPR3;
+  case SIRegisterInfo::INPUT_PTR:
+    return AMDGPU::SGPR0_SGPR1;
+  case SIRegisterInfo::TIDIG_X:
+    return AMDGPU::VGPR0;
+  case SIRegisterInfo::TIDIG_Y:
+    return AMDGPU::VGPR1;
+  case SIRegisterInfo::TIDIG_Z:
+    return AMDGPU::VGPR2;
+  }
+  llvm_unreachable("unexpected preloaded value type");
+}
+
+/// \brief Returns a register that is not used at any point in the function.
+///        If all registers are used, then this function will return
+//         AMDGPU::NoRegister.
+unsigned SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
+                                           const TargetRegisterClass *RC) const {
+
+  for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+       I != E; ++I) {
+    if (!MRI.isPhysRegUsed(*I))
+      return *I;
+  }
+  return AMDGPU::NoRegister;
+}
+
+unsigned SIRegisterInfo::getNumVGPRsAllowed(unsigned WaveCount) const {
+  switch(WaveCount) {
+    case 10: return 24;
+    case 9:  return 28;
+    case 8:  return 32;
+    case 7:  return 36;
+    case 6:  return 40;
+    case 5:  return 48;
+    case 4:  return 64;
+    case 3:  return 84;
+    case 2:  return 128;
+    default: return 256;
+  }
+}
+
+unsigned SIRegisterInfo::getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,
+                                            unsigned WaveCount) const {
+  if (gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+    switch (WaveCount) {
+      case 10: return 80;
+      case 9:  return 80;
+      case 8:  return 96;
+      default: return 102;
+    }
+  } else {
+    switch(WaveCount) {
+      case 10: return 48;
+      case 9:  return 56;
+      case 8:  return 64;
+      case 7:  return 72;
+      case 6:  return 80;
+      case 5:  return 96;
+      default: return 103;
+    }
+  }
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
new file mode 100644
index 0000000..bfdb67c
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -0,0 +1,131 @@
+//===-- SIRegisterInfo.h - SI Register Info Interface ----------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface definition for SIRegisterInfo
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_LIB_TARGET_R600_SIREGISTERINFO_H
+#define LLVM_LIB_TARGET_R600_SIREGISTERINFO_H
+
+#include "AMDGPURegisterInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "llvm/Support/Debug.h"
+
+namespace llvm {
+
+struct SIRegisterInfo : public AMDGPURegisterInfo {
+
+  SIRegisterInfo();
+
+  BitVector getReservedRegs(const MachineFunction &MF) const override;
+
+  unsigned getRegPressureSetLimit(const MachineFunction &MF,
+                                  unsigned Idx) const override;
+
+  bool requiresRegisterScavenging(const MachineFunction &Fn) const override;
+
+  void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
+                           unsigned FIOperandNum,
+                           RegScavenger *RS) const override;
+
+  /// \brief get the register class of the specified type to use in the
+  /// CFGStructurizer
+  const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const override;
+
+  unsigned getHWRegIndex(unsigned Reg) const override;
+
+  /// \brief Return the 'base' register class for this register.
+  /// e.g. SGPR0 => SReg_32, VGPR => VGPR_32 SGPR0_SGPR1 -> SReg_32, etc.
+  const TargetRegisterClass *getPhysRegClass(unsigned Reg) const;
+
+  /// \returns true if this class contains only SGPR registers
+  bool isSGPRClass(const TargetRegisterClass *RC) const {
+    if (!RC)
+      return false;
+
+    return !hasVGPRs(RC);
+  }
+
+  /// \returns true if this class ID contains only SGPR registers
+  bool isSGPRClassID(unsigned RCID) const {
+    if (static_cast<int>(RCID) == -1)
+      return false;
+
+    return isSGPRClass(getRegClass(RCID));
+  }
+
+  /// \returns true if this class contains VGPR registers.
+  bool hasVGPRs(const TargetRegisterClass *RC) const;
+
+  /// \returns A VGPR reg class with the same width as \p SRC
+  const TargetRegisterClass *getEquivalentVGPRClass(
+                                          const TargetRegisterClass *SRC) const;
+
+  /// \returns The register class that is used for a sub-register of \p RC for
+  /// the given \p SubIdx.  If \p SubIdx equals NoSubRegister, \p RC will
+  /// be returned.
+  const TargetRegisterClass *getSubRegClass(const TargetRegisterClass *RC,
+                                            unsigned SubIdx) const;
+
+  /// \p Channel This is the register channel (e.g. a value from 0-16), not the
+  ///            SubReg index.
+  /// \returns The sub-register of Reg that is in Channel.
+  unsigned getPhysRegSubReg(unsigned Reg, const TargetRegisterClass *SubRC,
+                            unsigned Channel) const;
+
+  /// \returns True if operands defined with this operand type can accept
+  /// a literal constant (i.e. any 32-bit immediate).
+  bool opCanUseLiteralConstant(unsigned OpType) const;
+
+  /// \returns True if operands defined with this operand type can accept
+  /// an inline constant. i.e. An integer value in the range (-16, 64) or
+  /// -4.0f, -2.0f, -1.0f, -0.5f, 0.0f, 0.5f, 1.0f, 2.0f, 4.0f. 
+  bool opCanUseInlineConstant(unsigned OpType) const;
+
+  enum PreloadedValue {
+    TGID_X,
+    TGID_Y,
+    TGID_Z,
+    SCRATCH_WAVE_OFFSET,
+    SCRATCH_PTR,
+    INPUT_PTR,
+    TIDIG_X,
+    TIDIG_Y,
+    TIDIG_Z
+  };
+
+  /// \brief Returns the physical register that \p Value is stored in.
+  unsigned getPreloadedValue(const MachineFunction &MF,
+                             enum PreloadedValue Value) const;
+
+  /// \brief Give the maximum number of VGPRs that can be used by \p WaveCount
+  ///        concurrent waves.
+  unsigned getNumVGPRsAllowed(unsigned WaveCount) const;
+
+  /// \brief Give the maximum number of SGPRs that can be used by \p WaveCount
+  ///        concurrent waves.
+  unsigned getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,
+                              unsigned WaveCount) const;
+
+  unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
+                              const TargetRegisterClass *RC) const;
+
+private:
+  void buildScratchLoadStore(MachineBasicBlock::iterator MI,
+                             unsigned LoadStoreOp, unsigned Value,
+                             unsigned ScratchRsrcReg, unsigned ScratchOffset,
+                             int64_t Offset, RegScavenger *RS) const;
+};
+
+} // End namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
new file mode 100644
index 0000000..2a9017f
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -0,0 +1,284 @@
+//===-- SIRegisterInfo.td - SI Register defs ---------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Declarations that describe the SI registers
+//===----------------------------------------------------------------------===//
+
+class SIReg <string n, bits<16> encoding = 0> : Register<n> {
+  let Namespace = "AMDGPU";
+  let HWEncoding = encoding;
+}
+
+// Special Registers
+def VCC_LO : SIReg<"vcc_lo", 106>;
+def VCC_HI : SIReg<"vcc_hi", 107>;
+
+// VCC for 64-bit instructions
+def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]> {
+  let Namespace = "AMDGPU";
+  let SubRegIndices = [sub0, sub1];
+  let HWEncoding = 106;
+}
+
+def EXEC_LO : SIReg<"exec_lo", 126>;
+def EXEC_HI : SIReg<"exec_hi", 127>;
+
+def EXEC : RegisterWithSubRegs<"EXEC", [EXEC_LO, EXEC_HI]> {
+  let Namespace = "AMDGPU";
+  let SubRegIndices = [sub0, sub1];
+  let HWEncoding = 126;
+}
+
+def SCC : SIReg<"scc", 253>;
+def M0 : SIReg <"m0", 124>;
+
+def FLAT_SCR_LO : SIReg<"flat_scr_lo", 104>; // Offset in units of 256-bytes.
+def FLAT_SCR_HI : SIReg<"flat_scr_hi", 105>; // Size is the per-thread scratch size, in bytes.
+
+// Pair to indicate location of scratch space for flat accesses.
+def FLAT_SCR : RegisterWithSubRegs <"flat_scr", [FLAT_SCR_LO, FLAT_SCR_HI]> {
+  let Namespace = "AMDGPU";
+  let SubRegIndices = [sub0, sub1];
+  let HWEncoding = 104;
+}
+
+// SGPR registers
+foreach Index = 0-101 in {
+  def SGPR#Index : SIReg <"SGPR"#Index, Index>;
+}
+
+// VGPR registers
+foreach Index = 0-255 in {
+  def VGPR#Index : SIReg <"VGPR"#Index, Index> {
+    let HWEncoding{8} = 1;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//  Groupings using register classes and tuples
+//===----------------------------------------------------------------------===//
+
+// SGPR 32-bit registers
+def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
+                            (add (sequence "SGPR%u", 0, 101))>;
+
+// SGPR 64-bit registers
+def SGPR_64Regs : RegisterTuples<[sub0, sub1],
+                             [(add (decimate (trunc SGPR_32, 101), 2)),
+                              (add (decimate (shl SGPR_32, 1), 2))]>;
+
+// SGPR 128-bit registers
+def SGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
+                              [(add (decimate (trunc SGPR_32, 99), 4)),
+                               (add (decimate (shl SGPR_32, 1), 4)),
+                               (add (decimate (shl SGPR_32, 2), 4)),
+                               (add (decimate (shl SGPR_32, 3), 4))]>;
+
+// SGPR 256-bit registers
+def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
+                              [(add (decimate (trunc SGPR_32, 95), 4)),
+                               (add (decimate (shl SGPR_32, 1), 4)),
+                               (add (decimate (shl SGPR_32, 2), 4)),
+                               (add (decimate (shl SGPR_32, 3), 4)),
+                               (add (decimate (shl SGPR_32, 4), 4)),
+                               (add (decimate (shl SGPR_32, 5), 4)),
+                               (add (decimate (shl SGPR_32, 6), 4)),
+                               (add (decimate (shl SGPR_32, 7), 4))]>;
+
+// SGPR 512-bit registers
+def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
+                               sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15],
+                              [(add (decimate (trunc SGPR_32, 87), 4)),
+                               (add (decimate (shl SGPR_32, 1), 4)),
+                               (add (decimate (shl SGPR_32, 2), 4)),
+                               (add (decimate (shl SGPR_32, 3), 4)),
+                               (add (decimate (shl SGPR_32, 4), 4)),
+                               (add (decimate (shl SGPR_32, 5), 4)),
+                               (add (decimate (shl SGPR_32, 6), 4)),
+                               (add (decimate (shl SGPR_32, 7), 4)),
+                               (add (decimate (shl SGPR_32, 8), 4)),
+                               (add (decimate (shl SGPR_32, 9), 4)),
+                               (add (decimate (shl SGPR_32, 10), 4)),
+                               (add (decimate (shl SGPR_32, 11), 4)),
+                               (add (decimate (shl SGPR_32, 12), 4)),
+                               (add (decimate (shl SGPR_32, 13), 4)),
+                               (add (decimate (shl SGPR_32, 14), 4)),
+                               (add (decimate (shl SGPR_32, 15), 4))]>;
+
+// VGPR 32-bit registers
+def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
+                            (add (sequence "VGPR%u", 0, 255))>;
+
+// VGPR 64-bit registers
+def VGPR_64 : RegisterTuples<[sub0, sub1],
+                             [(add (trunc VGPR_32, 255)),
+                              (add (shl VGPR_32, 1))]>;
+
+// VGPR 96-bit registers
+def VGPR_96 : RegisterTuples<[sub0, sub1, sub2],
+                             [(add (trunc VGPR_32, 254)),
+                              (add (shl VGPR_32, 1)),
+                              (add (shl VGPR_32, 2))]>;
+
+// VGPR 128-bit registers
+def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
+                              [(add (trunc VGPR_32, 253)),
+                               (add (shl VGPR_32, 1)),
+                               (add (shl VGPR_32, 2)),
+                               (add (shl VGPR_32, 3))]>;
+
+// VGPR 256-bit registers
+def VGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
+                              [(add (trunc VGPR_32, 249)),
+                               (add (shl VGPR_32, 1)),
+                               (add (shl VGPR_32, 2)),
+                               (add (shl VGPR_32, 3)),
+                               (add (shl VGPR_32, 4)),
+                               (add (shl VGPR_32, 5)),
+                               (add (shl VGPR_32, 6)),
+                               (add (shl VGPR_32, 7))]>;
+
+// VGPR 512-bit registers
+def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
+                               sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15],
+                              [(add (trunc VGPR_32, 241)),
+                               (add (shl VGPR_32, 1)),
+                               (add (shl VGPR_32, 2)),
+                               (add (shl VGPR_32, 3)),
+                               (add (shl VGPR_32, 4)),
+                               (add (shl VGPR_32, 5)),
+                               (add (shl VGPR_32, 6)),
+                               (add (shl VGPR_32, 7)),
+                               (add (shl VGPR_32, 8)),
+                               (add (shl VGPR_32, 9)),
+                               (add (shl VGPR_32, 10)),
+                               (add (shl VGPR_32, 11)),
+                               (add (shl VGPR_32, 12)),
+                               (add (shl VGPR_32, 13)),
+                               (add (shl VGPR_32, 14)),
+                               (add (shl VGPR_32, 15))]>;
+
+//===----------------------------------------------------------------------===//
+//  Register classes used as source and destination
+//===----------------------------------------------------------------------===//
+
+class RegImmMatcher<string name> : AsmOperandClass {
+  let Name = name;
+  let RenderMethod = "addRegOrImmOperands";
+}
+
+// Special register classes for predicates and the M0 register
+def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)> {
+  let CopyCost = -1; // Theoretically it is possible to read from SCC,
+                     // but it should never be necessary.
+}
+
+def VCCReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add VCC)>;
+def EXECReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add EXEC)>;
+
+// Register class for all scalar registers (SGPRs + Special Registers)
+def SReg_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
+  (add SGPR_32, M0, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI)
+>;
+
+def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 64, (add SGPR_64Regs)>;
+
+def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 64,
+  (add SGPR_64, VCCReg, EXECReg, FLAT_SCR)
+>;
+
+def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8], 128, (add SGPR_128)>;
+
+def SReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add SGPR_256)>;
+
+def SReg_512 : RegisterClass<"AMDGPU", [v64i8, v16i32], 512, (add SGPR_512)>;
+
+// Register class for all vector registers (VGPRs + Interploation Registers)
+def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 64, (add VGPR_64)>;
+
+def VReg_96 : RegisterClass<"AMDGPU", [untyped], 96, (add VGPR_96)> {
+  let Size = 96;
+}
+
+def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>;
+
+def VReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add VGPR_256)>;
+
+def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>;
+
+def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)> {
+  let Size = 32;
+}
+
+class RegImmOperand <RegisterClass rc> : RegisterOperand<rc> {
+  let OperandNamespace = "AMDGPU";
+  let OperandType = "OPERAND_REG_IMM32";
+}
+
+class RegInlineOperand <RegisterClass rc> : RegisterOperand<rc> {
+  let OperandNamespace = "AMDGPU";
+  let OperandType = "OPERAND_REG_INLINE_C";
+}
+
+//===----------------------------------------------------------------------===//
+//  SSrc_* Operands with an SGPR or a 32-bit immediate
+//===----------------------------------------------------------------------===//
+
+def SSrc_32 : RegImmOperand<SReg_32> {
+  let ParserMatchClass = RegImmMatcher<"SSrc32">;
+}
+
+def SSrc_64 : RegImmOperand<SReg_64> {
+  let ParserMatchClass = RegImmMatcher<"SSrc64">;
+}
+
+//===----------------------------------------------------------------------===//
+//  SCSrc_* Operands with an SGPR or a inline constant
+//===----------------------------------------------------------------------===//
+
+def SCSrc_32 : RegInlineOperand<SReg_32> {
+  let ParserMatchClass = RegImmMatcher<"SCSrc32">;
+}
+
+//===----------------------------------------------------------------------===//
+//  VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate
+//===----------------------------------------------------------------------===//
+
+def VS_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VGPR_32, SReg_32)>;
+
+def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>;
+
+def VSrc_32 : RegisterOperand<VS_32> {
+  let OperandNamespace = "AMDGPU";
+  let OperandType = "OPERAND_REG_IMM32";
+  let ParserMatchClass = RegImmMatcher<"VSrc32">;
+}
+
+def VSrc_64 : RegisterOperand<VS_64> {
+  let OperandNamespace = "AMDGPU";
+  let OperandType = "OPERAND_REG_IMM32";
+  let ParserMatchClass = RegImmMatcher<"VSrc64">;
+}
+
+//===----------------------------------------------------------------------===//
+//  VCSrc_* Operands with an SGPR, VGPR or an inline constant
+//===----------------------------------------------------------------------===//
+
+def VCSrc_32 : RegisterOperand<VS_32> {
+  let OperandNamespace = "AMDGPU";
+  let OperandType = "OPERAND_REG_INLINE_C";
+  let ParserMatchClass = RegImmMatcher<"VCSrc32">;
+}
+
+def VCSrc_64 : RegisterOperand<VS_64> {
+  let OperandNamespace = "AMDGPU";
+  let OperandType = "OPERAND_REG_INLINE_C";
+  let ParserMatchClass = RegImmMatcher<"VCSrc64">;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SISchedule.td b/contrib/llvm/lib/Target/AMDGPU/SISchedule.td
new file mode 100644
index 0000000..9b1f676
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SISchedule.td
@@ -0,0 +1,91 @@
+//===-- SISchedule.td - SI Scheduling definitons -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// MachineModel definitions for Southern Islands (SI)
+//
+//===----------------------------------------------------------------------===//
+
+def WriteBranch : SchedWrite;
+def WriteExport : SchedWrite;
+def WriteLDS    : SchedWrite;
+def WriteSALU   : SchedWrite;
+def WriteSMEM   : SchedWrite;
+def WriteVMEM   : SchedWrite;
+
+// Vector ALU instructions
+def Write32Bit         : SchedWrite;
+def WriteQuarterRate32 : SchedWrite;
+
+def WriteFloatFMA   : SchedWrite;
+
+def WriteDouble     : SchedWrite;
+def WriteDoubleAdd  : SchedWrite;
+
+def SIFullSpeedModel : SchedMachineModel;
+def SIQuarterSpeedModel : SchedMachineModel;
+
+// BufferSize = 0 means the processors are in-order.
+let BufferSize = 0 in {
+
+// XXX: Are the resource counts correct?
+def HWBranch : ProcResource<1>;
+def HWExport : ProcResource<7>;   // Taken from S_WAITCNT
+def HWLGKM   : ProcResource<31>;  // Taken from S_WAITCNT
+def HWSALU   : ProcResource<1>;
+def HWVMEM   : ProcResource<15>;  // Taken from S_WAITCNT
+def HWVALU   : ProcResource<1>;
+
+}
+
+class HWWriteRes<SchedWrite write, list<ProcResourceKind> resources,
+                 int latency> : WriteRes<write, resources> {
+  let Latency = latency;
+}
+
+class HWVALUWriteRes<SchedWrite write, int latency> :
+  HWWriteRes<write, [HWVALU], latency>;
+
+
+// The latency numbers are taken from AMD Accelerated Parallel Processing
+// guide.  They may not be acurate.
+
+// The latency values are 1 / (operations / cycle) / 4.
+multiclass SICommonWriteRes {
+
+  def : HWWriteRes<WriteBranch,  [HWBranch], 100>; // XXX: Guessed ???
+  def : HWWriteRes<WriteExport,  [HWExport], 100>; // XXX: Guessed ???
+  def : HWWriteRes<WriteLDS,     [HWLGKM],    32>; // 2 - 64
+  def : HWWriteRes<WriteSALU,    [HWSALU],     1>;
+  def : HWWriteRes<WriteSMEM,    [HWLGKM],    10>; // XXX: Guessed ???
+  def : HWWriteRes<WriteVMEM,    [HWVMEM],   450>; // 300 - 600
+
+  def : HWVALUWriteRes<Write32Bit,         1>;
+  def : HWVALUWriteRes<WriteQuarterRate32, 4>;
+}
+
+
+let SchedModel = SIFullSpeedModel in {
+
+defm : SICommonWriteRes;
+
+def : HWVALUWriteRes<WriteFloatFMA,   1>;
+def : HWVALUWriteRes<WriteDouble,     4>;
+def : HWVALUWriteRes<WriteDoubleAdd,  2>;
+
+} // End SchedModel = SIFullSpeedModel
+
+let SchedModel = SIQuarterSpeedModel in {
+
+defm : SICommonWriteRes;
+
+def : HWVALUWriteRes<WriteFloatFMA, 16>;
+def : HWVALUWriteRes<WriteDouble,   16>;
+def : HWVALUWriteRes<WriteDoubleAdd, 8>;
+
+}  // End SchedModel = SIQuarterSpeedModel
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/contrib/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
new file mode 100644
index 0000000..51e72cd
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -0,0 +1,272 @@
+//===-- SIShrinkInstructions.cpp - Shrink Instructions --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// The pass tries to use the 32-bit encoding for instructions when possible.
+//===----------------------------------------------------------------------===//
+//
+
+#include "AMDGPU.h"
+#include "AMDGPUMCInstLower.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define DEBUG_TYPE "si-shrink-instructions"
+
+STATISTIC(NumInstructionsShrunk,
+          "Number of 64-bit instruction reduced to 32-bit.");
+STATISTIC(NumLiteralConstantsFolded,
+          "Number of literal constants folded into 32-bit instructions.");
+
+namespace llvm {
+  void initializeSIShrinkInstructionsPass(PassRegistry&);
+}
+
+using namespace llvm;
+
+namespace {
+
+class SIShrinkInstructions : public MachineFunctionPass {
+public:
+  static char ID;
+
+public:
+  SIShrinkInstructions() : MachineFunctionPass(ID) {
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  const char *getPassName() const override {
+    return "SI Shrink Instructions";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS_BEGIN(SIShrinkInstructions, DEBUG_TYPE,
+                      "SI Lower il Copies", false, false)
+INITIALIZE_PASS_END(SIShrinkInstructions, DEBUG_TYPE,
+                    "SI Lower il Copies", false, false)
+
+char SIShrinkInstructions::ID = 0;
+
+FunctionPass *llvm::createSIShrinkInstructionsPass() {
+  return new SIShrinkInstructions();
+}
+
+static bool isVGPR(const MachineOperand *MO, const SIRegisterInfo &TRI,
+                   const MachineRegisterInfo &MRI) {
+  if (!MO->isReg())
+    return false;
+
+  if (TargetRegisterInfo::isVirtualRegister(MO->getReg()))
+    return TRI.hasVGPRs(MRI.getRegClass(MO->getReg()));
+
+  return TRI.hasVGPRs(TRI.getPhysRegClass(MO->getReg()));
+}
+
+static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII,
+                      const SIRegisterInfo &TRI,
+                      const MachineRegisterInfo &MRI) {
+
+  const MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
+  // Can't shrink instruction with three operands.
+  // FIXME: v_cndmask_b32 has 3 operands and is shrinkable, but we need to add
+  // a special case for it.  It can only be shrunk if the third operand
+  // is vcc.  We should handle this the same way we handle vopc, by addding
+  // a register allocation hint pre-regalloc and then do the shrining
+  // post-regalloc.
+  if (Src2)
+    return false;
+
+  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
+  const MachineOperand *Src1Mod =
+      TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
+
+  if (Src1 && (!isVGPR(Src1, TRI, MRI) || (Src1Mod && Src1Mod->getImm() != 0)))
+    return false;
+
+  // We don't need to check src0, all input types are legal, so just make sure
+  // src0 isn't using any modifiers.
+  if (TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers))
+    return false;
+
+  // Check output modifiers
+  if (TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
+    return false;
+
+  if (TII->hasModifiersSet(MI, AMDGPU::OpName::clamp))
+    return false;
+
+  return true;
+}
+
+/// \brief This function checks \p MI for operands defined by a move immediate
+/// instruction and then folds the literal constant into the instruction if it
+/// can.  This function assumes that \p MI is a VOP1, VOP2, or VOPC instruction
+/// and will only fold literal constants if we are still in SSA.
+static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
+                           MachineRegisterInfo &MRI, bool TryToCommute = true) {
+
+  if (!MRI.isSSA())
+    return;
+
+  assert(TII->isVOP1(MI.getOpcode()) || TII->isVOP2(MI.getOpcode()) ||
+         TII->isVOPC(MI.getOpcode()));
+
+  const SIRegisterInfo &TRI = TII->getRegisterInfo();
+  int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
+  MachineOperand &Src0 = MI.getOperand(Src0Idx);
+
+  // Only one literal constant is allowed per instruction, so if src0 is a
+  // literal constant then we can't do any folding.
+  if (Src0.isImm() &&
+      TII->isLiteralConstant(Src0, TII->getOpSize(MI, Src0Idx)))
+    return;
+
+  // Literal constants and SGPRs can only be used in Src0, so if Src0 is an
+  // SGPR, we cannot commute the instruction, so we can't fold any literal
+  // constants.
+  if (Src0.isReg() && !isVGPR(&Src0, TRI, MRI))
+    return;
+
+  // Try to fold Src0
+  if (Src0.isReg()) {
+    unsigned Reg = Src0.getReg();
+    MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
+    if (Def && Def->isMoveImmediate()) {
+      MachineOperand &MovSrc = Def->getOperand(1);
+      bool ConstantFolded = false;
+
+      if (MovSrc.isImm() && isUInt<32>(MovSrc.getImm())) {
+        Src0.ChangeToImmediate(MovSrc.getImm());
+        ConstantFolded = true;
+      }
+      if (ConstantFolded) {
+        if (MRI.use_empty(Reg))
+          Def->eraseFromParent();
+        ++NumLiteralConstantsFolded;
+        return;
+      }
+    }
+  }
+
+  // We have failed to fold src0, so commute the instruction and try again.
+  if (TryToCommute && MI.isCommutable() && TII->commuteInstruction(&MI))
+    foldImmediates(MI, TII, MRI, false);
+
+}
+
+bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const SIInstrInfo *TII =
+      static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
+  const SIRegisterInfo &TRI = TII->getRegisterInfo();
+  std::vector<unsigned> I1Defs;
+
+  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+                                                  BI != BE; ++BI) {
+
+    MachineBasicBlock &MBB = *BI;
+    MachineBasicBlock::iterator I, Next;
+    for (I = MBB.begin(); I != MBB.end(); I = Next) {
+      Next = std::next(I);
+      MachineInstr &MI = *I;
+
+      // Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
+      if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
+        const MachineOperand &Src = MI.getOperand(1);
+
+        if (Src.isImm()) {
+          if (isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src, 4))
+            MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
+        }
+
+        continue;
+      }
+
+      if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
+        continue;
+
+      if (!canShrink(MI, TII, TRI, MRI)) {
+        // Try commuting the instruction and see if that enables us to shrink
+        // it.
+        if (!MI.isCommutable() || !TII->commuteInstruction(&MI) ||
+            !canShrink(MI, TII, TRI, MRI))
+          continue;
+      }
+
+      // getVOPe32 could be -1 here if we started with an instruction that had
+      // a 32-bit encoding and then commuted it to an instruction that did not.
+      if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
+        continue;
+
+      int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
+
+      if (TII->isVOPC(Op32)) {
+        unsigned DstReg = MI.getOperand(0).getReg();
+        if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
+          // VOPC instructions can only write to the VCC register.  We can't
+          // force them to use VCC here, because the register allocator has
+          // trouble with sequences like this, which cause the allocator to run
+          // out of registers if vreg0 and vreg1 belong to the VCCReg register
+          // class:
+          // vreg0 = VOPC;
+          // vreg1 = VOPC;
+          // S_AND_B64 vreg0, vreg1
+          //
+          // So, instead of forcing the instruction to write to VCC, we provide
+          // a hint to the register allocator to use VCC and then we we will run
+          // this pass again after RA and shrink it if it outputs to VCC.
+          MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, AMDGPU::VCC);
+          continue;
+        }
+        if (DstReg != AMDGPU::VCC)
+          continue;
+      }
+
+      // We can shrink this instruction
+      DEBUG(dbgs() << "Shrinking "; MI.dump(); dbgs() << '\n';);
+
+      MachineInstrBuilder Inst32 =
+          BuildMI(MBB, I, MI.getDebugLoc(), TII->get(Op32));
+
+      // dst
+      Inst32.addOperand(MI.getOperand(0));
+
+      Inst32.addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::src0));
+
+      const MachineOperand *Src1 =
+          TII->getNamedOperand(MI, AMDGPU::OpName::src1);
+      if (Src1)
+        Inst32.addOperand(*Src1);
+
+      ++NumInstructionsShrunk;
+      MI.eraseFromParent();
+
+      foldImmediates(*Inst32, TII, MRI);
+      DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n');
+
+
+    }
+  }
+  return false;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SITypeRewriter.cpp b/contrib/llvm/lib/Target/AMDGPU/SITypeRewriter.cpp
new file mode 100644
index 0000000..591ce85
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SITypeRewriter.cpp
@@ -0,0 +1,161 @@
+//===-- SITypeRewriter.cpp - Remove unwanted types ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass removes performs the following type substitution on all
+/// non-compute shaders:
+///
+/// v16i8 => i128
+///   - v16i8 is used for constant memory resource descriptors.  This type is
+///      legal for some compute APIs, and we don't want to declare it as legal
+///      in the backend, because we want the legalizer to expand all v16i8
+///      operations.
+/// v1* => *
+///   - Having v1* types complicates the legalizer and we can easily replace
+///   - them with the element type.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
+
+using namespace llvm;
+
+namespace {
+
+class SITypeRewriter : public FunctionPass,
+                       public InstVisitor<SITypeRewriter> {
+
+  static char ID;
+  Module *Mod;
+  Type *v16i8;
+  Type *v4i32;
+
+public:
+  SITypeRewriter() : FunctionPass(ID) { }
+  bool doInitialization(Module &M) override;
+  bool runOnFunction(Function &F) override;
+  const char *getPassName() const override {
+    return "SI Type Rewriter";
+  }
+  void visitLoadInst(LoadInst &I);
+  void visitCallInst(CallInst &I);
+  void visitBitCast(BitCastInst &I);
+};
+
+} // End anonymous namespace
+
+char SITypeRewriter::ID = 0;
+
+bool SITypeRewriter::doInitialization(Module &M) {
+  Mod = &M;
+  v16i8 = VectorType::get(Type::getInt8Ty(M.getContext()), 16);
+  v4i32 = VectorType::get(Type::getInt32Ty(M.getContext()), 4);
+  return false;
+}
+
+bool SITypeRewriter::runOnFunction(Function &F) {
+  Attribute A = F.getFnAttribute("ShaderType");
+
+  unsigned ShaderType = ShaderType::COMPUTE;
+  if (A.isStringAttribute()) {
+    StringRef Str = A.getValueAsString();
+    Str.getAsInteger(0, ShaderType);
+  }
+  if (ShaderType == ShaderType::COMPUTE)
+    return false;
+
+  visit(F);
+  visit(F);
+
+  return false;
+}
+
+void SITypeRewriter::visitLoadInst(LoadInst &I) {
+  Value *Ptr = I.getPointerOperand();
+  Type *PtrTy = Ptr->getType();
+  Type *ElemTy = PtrTy->getPointerElementType();
+  IRBuilder<> Builder(&I);
+  if (ElemTy == v16i8)  {
+    Value *BitCast = Builder.CreateBitCast(Ptr,
+        PointerType::get(v4i32,PtrTy->getPointerAddressSpace()));
+    LoadInst *Load = Builder.CreateLoad(BitCast);
+    SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
+    I.getAllMetadataOtherThanDebugLoc(MD);
+    for (unsigned i = 0, e = MD.size(); i != e; ++i) {
+      Load->setMetadata(MD[i].first, MD[i].second);
+    }
+    Value *BitCastLoad = Builder.CreateBitCast(Load, I.getType());
+    I.replaceAllUsesWith(BitCastLoad);
+    I.eraseFromParent();
+  }
+}
+
+void SITypeRewriter::visitCallInst(CallInst &I) {
+  IRBuilder<> Builder(&I);
+
+  SmallVector <Value*, 8> Args;
+  SmallVector <Type*, 8> Types;
+  bool NeedToReplace = false;
+  Function *F = I.getCalledFunction();
+  std::string Name = F->getName();
+  for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+    Value *Arg = I.getArgOperand(i);
+    if (Arg->getType() == v16i8) {
+      Args.push_back(Builder.CreateBitCast(Arg, v4i32));
+      Types.push_back(v4i32);
+      NeedToReplace = true;
+      Name = Name + ".v4i32";
+    } else if (Arg->getType()->isVectorTy() &&
+               Arg->getType()->getVectorNumElements() == 1 &&
+               Arg->getType()->getVectorElementType() ==
+                                              Type::getInt32Ty(I.getContext())){
+      Type *ElementTy = Arg->getType()->getVectorElementType();
+      std::string TypeName = "i32";
+      InsertElementInst *Def = cast<InsertElementInst>(Arg);
+      Args.push_back(Def->getOperand(1));
+      Types.push_back(ElementTy);
+      std::string VecTypeName = "v1" + TypeName;
+      Name = Name.replace(Name.find(VecTypeName), VecTypeName.length(), TypeName);
+      NeedToReplace = true;
+    } else {
+      Args.push_back(Arg);
+      Types.push_back(Arg->getType());
+    }
+  }
+
+  if (!NeedToReplace) {
+    return;
+  }
+  Function *NewF = Mod->getFunction(Name);
+  if (!NewF) {
+    NewF = Function::Create(FunctionType::get(F->getReturnType(), Types, false), GlobalValue::ExternalLinkage, Name, Mod);
+    NewF->setAttributes(F->getAttributes());
+  }
+  I.replaceAllUsesWith(Builder.CreateCall(NewF, Args));
+  I.eraseFromParent();
+}
+
+void SITypeRewriter::visitBitCast(BitCastInst &I) {
+  IRBuilder<> Builder(&I);
+  if (I.getDestTy() != v4i32) {
+    return;
+  }
+
+  if (BitCastInst *Op = dyn_cast<BitCastInst>(I.getOperand(0))) {
+    if (Op->getSrcTy() == v4i32) {
+      I.replaceAllUsesWith(Op->getOperand(0));
+      I.eraseFromParent();
+    }
+  }
+}
+
+FunctionPass *llvm::createSITypeRewriter() {
+  return new SITypeRewriter();
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp
new file mode 100644
index 0000000..2112135
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp
@@ -0,0 +1,30 @@
+//===-- TargetInfo/AMDGPUTargetInfo.cpp - TargetInfo for AMDGPU -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUTargetMachine.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+/// \brief The target which suports all AMD GPUs.  This will eventually
+///         be deprecated and there will be a R600 target and a GCN target.
+Target llvm::TheAMDGPUTarget;
+/// \brief The target for GCN GPUs
+Target llvm::TheGCNTarget;
+
+/// \brief Extern function to initialize the targets for the AMDGPU backend
+extern "C" void LLVMInitializeAMDGPUTargetInfo() {
+  RegisterTarget<Triple::r600, false>
+    R600(TheAMDGPUTarget, "r600", "AMD GPUs HD2XXX-HD6XXX");
+  RegisterTarget<Triple::amdgcn, false> GCN(TheGCNTarget, "amdgcn", "AMD GCN GPUs");
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/VIInstrFormats.td b/contrib/llvm/lib/Target/AMDGPU/VIInstrFormats.td
new file mode 100644
index 0000000..d8738f9
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/VIInstrFormats.td
@@ -0,0 +1,166 @@
+//===-- VIInstrFormats.td - VI Instruction Encodings ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// VI Instruction format definitions.
+//
+//===----------------------------------------------------------------------===//
+
+class DSe_vi <bits<8> op> : Enc64 {
+  bits<8> vdst;
+  bits<1> gds;
+  bits<8> addr;
+  bits<8> data0;
+  bits<8> data1;
+  bits<8> offset0;
+  bits<8> offset1;
+
+  let Inst{7-0} = offset0;
+  let Inst{15-8} = offset1;
+  let Inst{16} = gds;
+  let Inst{24-17} = op;
+  let Inst{31-26} = 0x36; //encoding
+  let Inst{39-32} = addr;
+  let Inst{47-40} = data0;
+  let Inst{55-48} = data1;
+  let Inst{63-56} = vdst;
+}
+
+class MUBUFe_vi <bits<7> op> : Enc64 {
+  bits<12> offset;
+  bits<1> offen;
+  bits<1> idxen;
+  bits<1> glc;
+  bits<1> lds;
+  bits<8> vaddr;
+  bits<8> vdata;
+  bits<7> srsrc;
+  bits<1> slc;
+  bits<1> tfe;
+  bits<8> soffset;
+
+  let Inst{11-0} = offset;
+  let Inst{12} = offen;
+  let Inst{13} = idxen;
+  let Inst{14} = glc;
+  let Inst{16} = lds;
+  let Inst{17} = slc;
+  let Inst{24-18} = op;
+  let Inst{31-26} = 0x38; //encoding
+  let Inst{39-32} = vaddr;
+  let Inst{47-40} = vdata;
+  let Inst{52-48} = srsrc{6-2};
+  let Inst{55} = tfe;
+  let Inst{63-56} = soffset;
+}
+
+class MTBUFe_vi <bits<4> op> : Enc64 {
+  bits<12> offset;
+  bits<1>  offen;
+  bits<1>  idxen;
+  bits<1>  glc;
+  bits<4>  dfmt;
+  bits<3>  nfmt;
+  bits<8>  vaddr;
+  bits<8>  vdata;
+  bits<7>  srsrc;
+  bits<1>  slc;
+  bits<1>  tfe;
+  bits<8>  soffset;
+
+  let Inst{11-0}  = offset;
+  let Inst{12}    = offen;
+  let Inst{13}    = idxen;
+  let Inst{14}    = glc;
+  let Inst{18-15} = op;
+  let Inst{22-19} = dfmt;
+  let Inst{25-23} = nfmt;
+  let Inst{31-26} = 0x3a; //encoding
+  let Inst{39-32} = vaddr;
+  let Inst{47-40} = vdata;
+  let Inst{52-48} = srsrc{6-2};
+  let Inst{54}    = slc;
+  let Inst{55}    = tfe;
+  let Inst{63-56} = soffset;
+}
+
+class SMEMe_vi <bits<8> op, bit imm> : Enc64 {
+  bits<7>  sbase;
+  bits<7>  sdata;
+  bits<1>  glc;
+  bits<20> offset;
+
+  let Inst{5-0}   = sbase{6-1};
+  let Inst{12-6}  = sdata;
+  let Inst{16}    = glc;
+  let Inst{17}    = imm;
+  let Inst{25-18} = op;
+  let Inst{31-26} = 0x30; //encoding
+  let Inst{51-32} = offset;
+}
+
+class VOP3e_vi <bits<10> op> : Enc64 {
+  bits<8> vdst;
+  bits<2> src0_modifiers;
+  bits<9> src0;
+  bits<2> src1_modifiers;
+  bits<9> src1;
+  bits<2> src2_modifiers;
+  bits<9> src2;
+  bits<1> clamp;
+  bits<2> omod;
+
+  let Inst{7-0}   = vdst;
+  let Inst{8}     = src0_modifiers{1};
+  let Inst{9}     = src1_modifiers{1};
+  let Inst{10}    = src2_modifiers{1};
+  let Inst{15}    = clamp;
+  let Inst{25-16} = op;
+  let Inst{31-26} = 0x34; //encoding
+  let Inst{40-32} = src0;
+  let Inst{49-41} = src1;
+  let Inst{58-50} = src2;
+  let Inst{60-59} = omod;
+  let Inst{61} = src0_modifiers{0};
+  let Inst{62} = src1_modifiers{0};
+  let Inst{63} = src2_modifiers{0};
+}
+
+class VOP3be_vi <bits<10> op> : Enc64 {
+  bits<8> vdst;
+  bits<2> src0_modifiers;
+  bits<9> src0;
+  bits<2> src1_modifiers;
+  bits<9> src1;
+  bits<2> src2_modifiers;
+  bits<9> src2;
+  bits<7> sdst;
+  bits<2> omod;
+  bits<1> clamp;
+
+  let Inst{7-0} = vdst;
+  let Inst{14-8} = sdst;
+  let Inst{15} = clamp;
+  let Inst{25-16} = op;
+  let Inst{31-26} = 0x34; //encoding
+  let Inst{40-32} = src0;
+  let Inst{49-41} = src1;
+  let Inst{58-50} = src2;
+  let Inst{60-59} = omod;
+  let Inst{61} = src0_modifiers{0};
+  let Inst{62} = src1_modifiers{0};
+  let Inst{63} = src2_modifiers{0};
+}
+
+class EXPe_vi : EXPe {
+  let Inst{31-26} = 0x31; //encoding
+}
+
+class VINTRPe_vi <bits<2> op> : VINTRPe <op> {
+  let Inst{31-26} = 0x35; // encoding
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/VIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/VIInstructions.td
new file mode 100644
index 0000000..5bf86e6
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/VIInstructions.td
@@ -0,0 +1,106 @@
+//===-- VIInstructions.td - VI Instruction Defintions ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Instruction definitions for VI and newer.
+//===----------------------------------------------------------------------===//
+
+let SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI in {
+
+//===----------------------------------------------------------------------===//
+// VOP1 Instructions
+//===----------------------------------------------------------------------===//
+
+defm V_CVT_F16_U16 : VOP1Inst <vop1<0, 0x39>, "v_cvt_f16_u16", VOP_F16_I16>;
+defm V_CVT_F16_I16 : VOP1Inst <vop1<0, 0x3a>, "v_cvt_f16_i16", VOP_F16_I16>;
+defm V_CVT_U16_F16 : VOP1Inst <vop1<0, 0x3b>, "v_cvt_u16_f16", VOP_I16_F16>;
+defm V_CVT_I16_F16 : VOP1Inst <vop1<0, 0x3c>, "v_cvt_i16_f16", VOP_I16_F16>;
+defm V_RCP_F16 : VOP1Inst <vop1<0, 0x3d>, "v_rcp_f16", VOP_F16_F16>;
+defm V_SQRT_F16 : VOP1Inst <vop1<0, 0x3e>, "v_sqrt_f16", VOP_F16_F16>;
+defm V_RSQ_F16 : VOP1Inst <vop1<0, 0x3f>, "v_rsq_f16", VOP_F16_F16>;
+defm V_LOG_F16 : VOP1Inst <vop1<0, 0x40>, "v_log_f16", VOP_F16_F16>;
+defm V_EXP_F16 : VOP1Inst <vop1<0, 0x41>, "v_exp_f16", VOP_F16_F16>;
+defm V_FREXP_MANT_F16 : VOP1Inst <vop1<0, 0x42>, "v_frexp_mant_f16",
+  VOP_F16_F16
+>;
+defm V_FREXP_EXP_I16_F16 : VOP1Inst <vop1<0, 0x43>, "v_frexp_exp_i16_f16",
+  VOP_I16_F16
+>;
+defm V_FLOOR_F16 : VOP1Inst <vop1<0, 0x44>, "v_floor_f16", VOP_F16_F16>;
+defm V_CEIL_F16 : VOP1Inst <vop1<0, 0x45>, "v_ceil_f16", VOP_F16_F16>;
+defm V_TRUNC_F16 : VOP1Inst <vop1<0, 0x46>, "v_trunc_f16", VOP_F16_F16>;
+defm V_RNDNE_F16 : VOP1Inst <vop1<0, 0x47>, "v_rndne_f16", VOP_F16_F16>;
+defm V_FRACT_F16 : VOP1Inst <vop1<0, 0x48>, "v_fract_f16", VOP_F16_F16>;
+defm V_SIN_F16 : VOP1Inst <vop1<0, 0x49>, "v_sin_f16", VOP_F16_F16>;
+defm V_COS_F16 : VOP1Inst <vop1<0, 0x4a>, "v_cos_f16", VOP_F16_F16>;
+
+//===----------------------------------------------------------------------===//
+// VOP2 Instructions
+//===----------------------------------------------------------------------===//
+
+let isCommutable = 1 in {
+
+defm V_ADD_F16 : VOP2Inst <vop2<0, 0x1f>, "v_add_f16", VOP_F16_F16_F16>;
+defm V_SUB_F16 : VOP2Inst <vop2<0, 0x20>, "v_sub_f16", VOP_F16_F16_F16>;
+defm V_SUBREV_F16 : VOP2Inst <vop2<0, 0x21>, "v_subrev_f16", VOP_F16_F16_F16,
+  null_frag, "v_sub_f16"
+>;
+defm V_MUL_F16 : VOP2Inst <vop2<0, 0x22>, "v_mul_f16", VOP_F16_F16_F16>;
+defm V_MAC_F16 : VOP2Inst <vop2<0, 0x23>, "v_mac_f16", VOP_F16_F16_F16>;
+} // End isCommutable = 1
+defm V_MADMK_F16 : VOP2MADK <vop2<0,0x24>, "v_madmk_f16">;
+let isCommutable = 1 in {
+defm V_MADAK_F16 : VOP2MADK <vop2<0,0x25>, "v_madak_f16">;
+defm V_ADD_U16 : VOP2Inst <vop2<0,0x26>, "v_add_u16", VOP_I16_I16_I16>;
+defm V_SUB_U16 : VOP2Inst <vop2<0,0x27>, "v_sub_u16" , VOP_I16_I16_I16>;
+defm V_SUBREV_U16 : VOP2Inst <vop2<0,0x28>, "v_subrev_u16", VOP_I16_I16_I16>;
+defm V_MUL_LO_U16 : VOP2Inst <vop2<0,0x29>, "v_mul_lo_u16", VOP_I16_I16_I16>;
+} // End isCommutable = 1
+defm V_LSHLREV_B16 : VOP2Inst <vop2<0,0x2a>, "v_lshlrev_b16", VOP_I16_I16_I16>;
+defm V_LSHRREV_B16 : VOP2Inst <vop2<0,0x2b>, "v_lshrrev_b16", VOP_I16_I16_I16>;
+defm V_ASHRREV_B16 : VOP2Inst <vop2<0,0x2c>, "v_ashrrev_b16", VOP_I16_I16_I16>;
+let isCommutable = 1 in {
+defm V_MAX_F16 : VOP2Inst <vop2<0,0x2d>, "v_max_f16", VOP_F16_F16_F16>;
+defm V_MIN_F16 : VOP2Inst <vop2<0,0x2e>, "v_min_f16", VOP_F16_F16_F16>;
+defm V_MAX_U16 : VOP2Inst <vop2<0,0x2f>, "v_max_u16", VOP_I16_I16_I16>;
+defm V_MAX_I16 : VOP2Inst <vop2<0,0x30>, "v_max_i16", VOP_I16_I16_I16>;
+defm V_MIN_U16 : VOP2Inst <vop2<0,0x31>, "v_min_u16", VOP_I16_I16_I16>;
+defm V_MIN_I16 : VOP2Inst <vop2<0,0x32>, "v_min_i16", VOP_I16_I16_I16>;
+} // End isCommutable = 1
+defm V_LDEXP_F16 : VOP2Inst <vop2<0,0x33>, "v_ldexp_f16", VOP_F16_F16_I16>;
+
+// Aliases to simplify matching of floating-pint instructions that are VOP2 on
+// SI and VOP3 on VI.
+
+class SI2_VI3Alias <string name, Instruction inst> : InstAlias <
+  name#" $dst, $src0, $src1",
+  (inst VGPR_32:$dst, 0, VCSrc_32:$src0, 0, VCSrc_32:$src1, 0, 0)
+>, PredicateControl {
+  let UseInstAsmMatchConverter = 0;
+}
+
+def : SI2_VI3Alias <"v_ldexp_f32", V_LDEXP_F32_e64_vi>;
+def : SI2_VI3Alias <"v_cvt_pkaccum_u8_f32", V_CVT_PKACCUM_U8_F32_e64_vi>;
+def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>;
+def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>;
+def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>;
+
+} // End SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI
+
+//===----------------------------------------------------------------------===//
+// SMEM Patterns
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isVI] in {
+
+// 1. Offset as 20bit DWORD immediate
+def : Pat <
+  (SIload_constant v4i32:$sbase, IMM20bit:$offset),
+  (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset))
+>;
+
+} // End Predicates = [isVI]
diff --git a/contrib/llvm/lib/Target/ARM/ARM.h b/contrib/llvm/lib/Target/ARM/ARM.h
index 9550a3a..d554fe5 100644
--- a/contrib/llvm/lib/Target/ARM/ARM.h
+++ b/contrib/llvm/lib/Target/ARM/ARM.h
@@ -46,6 +46,6 @@ FunctionPass *createThumb2SizeReductionPass(
 void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
                                   ARMAsmPrinter &AP);
 
-} // end namespace llvm;
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index d84f296..4530e41 100644
--- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -429,7 +429,7 @@ void ARMAsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
 }
 
 void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
-  Triple TT(TM.getTargetTriple());
+  const Triple &TT = TM.getTargetTriple();
   // Use unified assembler syntax.
   OutStreamer->EmitAssemblerFlag(MCAF_SyntaxUnified);
 
@@ -473,7 +473,7 @@ emitNonLazySymbolPointer(MCStreamer &OutStreamer, MCSymbol *StubLabel,
 
 
 void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
-  Triple TT(TM.getTargetTriple());
+  const Triple &TT = TM.getTargetTriple();
   if (TT.isOSBinFormatMachO()) {
     // All darwin targets use mach-o.
     const TargetLoweringObjectFileMachO &TLOFMacho =
@@ -564,7 +564,7 @@ void ARMAsmPrinter::emitAttributes() {
   // anyhow.
   // FIXME: For ifunc related functions we could iterate over and look
   // for a feature string that doesn't match the default one.
-  StringRef TT = TM.getTargetTriple();
+  const Triple &TT = TM.getTargetTriple();
   StringRef CPU = TM.getTargetCPU();
   StringRef FS = TM.getTargetFeatureString();
   std::string ArchFS = ARM_MC::ParseARMTriple(TT, CPU);
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
index a6bc368..3d25121 100644
--- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
@@ -105,7 +105,7 @@ private:
 public:
   unsigned getISAEncoding() override {
     // ARM/Darwin adds ISA to the DWARF info for each function.
-    Triple TT(TM.getTargetTriple());
+    const Triple &TT = TM.getTargetTriple();
     if (!TT.isOSBinFormatMachO())
       return 0;
     bool isThumb = TT.getArch() == Triple::thumb ||
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 9c4b496..f2b7a64 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -28,6 +28,7 @@
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetSchedule.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalValue.h"
@@ -396,7 +397,7 @@ unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
 unsigned
 ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                                MachineBasicBlock *FBB,
-                               const SmallVectorImpl<MachineOperand> &Cond,
+                               ArrayRef<MachineOperand> Cond,
                                DebugLoc DL) const {
   ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
   int BOpc   = !AFI->isThumbFunction()
@@ -458,8 +459,7 @@ bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const {
 }
 
 bool ARMBaseInstrInfo::
-PredicateInstruction(MachineInstr *MI,
-                     const SmallVectorImpl<MachineOperand> &Pred) const {
+PredicateInstruction(MachineInstr *MI, ArrayRef<MachineOperand> Pred) const {
   unsigned Opc = MI->getOpcode();
   if (isUncondBranchOpcode(Opc)) {
     MI->setDesc(get(getMatchingCondBranchOpcode(Opc)));
@@ -479,9 +479,8 @@ PredicateInstruction(MachineInstr *MI,
   return false;
 }
 
-bool ARMBaseInstrInfo::
-SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
-                  const SmallVectorImpl<MachineOperand> &Pred2) const {
+bool ARMBaseInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
+                                         ArrayRef<MachineOperand> Pred2) const {
   if (Pred1.size() > 2 || Pred2.size() > 2)
     return false;
 
@@ -595,7 +594,7 @@ template <> bool IsCPSRDead<MachineInstr>(MachineInstr *MI) {
   // all definitions of CPSR are dead
   return true;
 }
-}
+} // namespace llvm
 
 /// GetInstSize - Return the size of the specified MachineInstr.
 ///
@@ -3995,7 +3994,7 @@ int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
 }
 
 bool ARMBaseInstrInfo::
-hasHighOperandLatency(const InstrItineraryData *ItinData,
+hasHighOperandLatency(const TargetSchedModel &SchedModel,
                       const MachineRegisterInfo *MRI,
                       const MachineInstr *DefMI, unsigned DefIdx,
                       const MachineInstr *UseMI, unsigned UseIdx) const {
@@ -4007,9 +4006,8 @@ hasHighOperandLatency(const InstrItineraryData *ItinData,
     return true;
 
   // Hoist VFP / NEON instructions with 4 or higher latency.
-  int Latency = computeOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
-  if (Latency < 0)
-    Latency = getInstrLatency(ItinData, DefMI);
+  unsigned Latency
+    = SchedModel.computeOperandLatency(DefMI, DefIdx, UseMI, UseIdx);
   if (Latency <= 3)
     return false;
   return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
@@ -4017,8 +4015,9 @@ hasHighOperandLatency(const InstrItineraryData *ItinData,
 }
 
 bool ARMBaseInstrInfo::
-hasLowDefLatency(const InstrItineraryData *ItinData,
+hasLowDefLatency(const TargetSchedModel &SchedModel,
                  const MachineInstr *DefMI, unsigned DefIdx) const {
+  const InstrItineraryData *ItinData = SchedModel.getInstrItineraries();
   if (!ItinData || ItinData->isEmpty())
     return false;
 
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index c7185fe..6fc0edd 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -116,8 +116,7 @@ public:
                      bool AllowModify = false) const override;
   unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
   unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
-                        MachineBasicBlock *FBB,
-                        const SmallVectorImpl<MachineOperand> &Cond,
+                        MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
                         DebugLoc DL) const override;
 
   bool
@@ -133,10 +132,10 @@ public:
   }
 
   bool PredicateInstruction(MachineInstr *MI,
-                    const SmallVectorImpl<MachineOperand> &Pred) const override;
+                    ArrayRef<MachineOperand> Pred) const override;
 
-  bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
-                   const SmallVectorImpl<MachineOperand> &Pred2) const override;
+  bool SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
+                         ArrayRef<MachineOperand> Pred2) const override;
 
   bool DefinesPredicate(MachineInstr *MI,
                         std::vector<MachineOperand> &Pred) const override;
@@ -328,12 +327,12 @@ private:
   int getInstrLatency(const InstrItineraryData *ItinData,
                       SDNode *Node) const override;
 
-  bool hasHighOperandLatency(const InstrItineraryData *ItinData,
+  bool hasHighOperandLatency(const TargetSchedModel &SchedModel,
                              const MachineRegisterInfo *MRI,
                              const MachineInstr *DefMI, unsigned DefIdx,
                              const MachineInstr *UseMI,
                              unsigned UseIdx) const override;
-  bool hasLowDefLatency(const InstrItineraryData *ItinData,
+  bool hasLowDefLatency(const TargetSchedModel &SchedModel,
                         const MachineInstr *DefMI,
                         unsigned DefIdx) const override;
 
@@ -494,6 +493,6 @@ bool rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
                          unsigned FrameReg, int &Offset,
                          const ARMBaseInstrInfo &TII);
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
index d687568..2edb96a 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
@@ -281,6 +281,6 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT,
   return true;
 }
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index f4ec8c6..cb4eeb5 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -335,7 +335,7 @@ namespace {
     }
   };
   char ARMConstantIslands::ID = 0;
-}
+} // namespace
 
 /// verify - check BBOffsets, BBSizes, alignment of islands
 void ARMConstantIslands::verify() {
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
index 36f63e2..b429bed 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
@@ -44,7 +44,7 @@ namespace ARMCP {
     GOTTPOFF,
     TPOFF
   };
-}
+} // namespace ARMCP
 
 /// ARMConstantPoolValue - ARM specific constantpool value. This is used to
 /// represent PC-relative displacement between the address of the load
@@ -254,6 +254,6 @@ public:
   }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 4438f50..963b46c 100644
--- a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -69,7 +69,7 @@ namespace {
                            MachineBasicBlock::iterator &MBBI);
   };
   char ARMExpandPseudo::ID = 0;
-}
+} // namespace
 
 /// TransferImpOps - Transfer implicit operands on the pseudo instruction to
 /// the instructions created from the expansion.
@@ -129,7 +129,7 @@ namespace {
       return PseudoOpc < TE.PseudoOpc;
     }
   };
-}
+} // namespace
 
 static const NEONLdStTableEntry NEONLdStTable[] = {
 { ARM::VLD1LNq16Pseudo,     ARM::VLD1LNd16,     true, false, false, EvenDblSpc, 1, 4 ,true},
diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
index 4175b4a..cead18f 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -2898,7 +2898,7 @@ const struct FoldableLoadExtendsStruct {
   { { ARM::SXTB,  ARM::t2SXTB  },   0, 0, MVT::i8  },
   { { ARM::UXTB,  ARM::t2UXTB  },   0, 1, MVT::i8  }
 };
-}
+} // namespace
 
 /// \brief The specified machine instr operand is a vreg, and that
 /// vreg is being provided by the specified load instruction.  If possible,
diff --git a/contrib/llvm/lib/Target/ARM/ARMFeatures.h b/contrib/llvm/lib/Target/ARM/ARMFeatures.h
index 0c910ab..5b4a44c 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFeatures.h
+++ b/contrib/llvm/lib/Target/ARM/ARMFeatures.h
@@ -92,6 +92,6 @@ inline bool isV8EligibleForIT(InstrType *Instr) {
   }
 }
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index a52e497..091086d 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -221,7 +221,7 @@ struct StackAdjustingInsts {
     }
   }
 };
-}
+} // namespace
 
 /// Emit an instruction sequence that will align the address in
 /// register Reg by zero-ing out the lower bits.  For versions of the
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
index d763d17..98313e6 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
@@ -78,6 +78,6 @@ public:
                                 MachineBasicBlock::iterator MI) const override;
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 50afb19..575a9d9 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -279,7 +279,7 @@ private:
   SDValue GetVLDSTAlign(SDValue Align, SDLoc dl, unsigned NumVecs,
                         bool is64BitVector);
 };
-}
+} // namespace
 
 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
 /// operand. If so Imm will receive the 32-bit value.
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 47c8400..94a026b 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -83,7 +83,7 @@ namespace {
       CallOrPrologue = PC;
     }
   };
-}
+} // namespace
 
 // The APCS parameter registers.
 static const MCPhysReg GPRArgRegs[] = {
@@ -1483,9 +1483,10 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   bool isStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
   bool isThisReturn   = false;
   bool isSibCall      = false;
+  auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
 
   // Disable tail calls if they're not supported.
-  if (!Subtarget->supportsTailCall() || MF.getTarget().Options.DisableTailCalls)
+  if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true")
     isTailCall = false;
 
   if (isTailCall) {
@@ -2042,7 +2043,7 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
   // cannot rely on the linker replacing the tail call with a return.
   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
     const GlobalValue *GV = G->getGlobal();
-    const Triple TT(getTargetMachine().getTargetTriple());
+    const Triple &TT = getTargetMachine().getTargetTriple();
     if (GV->hasExternalWeakLinkage() &&
         (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
       return false;
@@ -2375,7 +2376,9 @@ bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
   if (!Subtarget->supportsTailCall())
     return false;
 
-  if (!CI->isTailCall() || getTargetMachine().Options.DisableTailCalls)
+  auto Attr =
+      CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
+  if (!CI->isTailCall() || Attr.getValueAsString() == "true")
     return false;
 
   return !Subtarget->isThumb1Only();
@@ -5060,6 +5063,30 @@ static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
   return true;
 }
 
+/// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN),
+/// and return the corresponding ARMISD opcode if it is, or 0 if it isn't.
+static unsigned isNEONTwoResultShuffleMask(ArrayRef<int> ShuffleMask, EVT VT,
+                                           unsigned &WhichResult,
+                                           bool &isV_UNDEF) {
+  isV_UNDEF = false;
+  if (isVTRNMask(ShuffleMask, VT, WhichResult))
+    return ARMISD::VTRN;
+  if (isVUZPMask(ShuffleMask, VT, WhichResult))
+    return ARMISD::VUZP;
+  if (isVZIPMask(ShuffleMask, VT, WhichResult))
+    return ARMISD::VZIP;
+
+  isV_UNDEF = true;
+  if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
+    return ARMISD::VTRN;
+  if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
+    return ARMISD::VUZP;
+  if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
+    return ARMISD::VZIP;
+
+  return 0;
+}
+
 /// \return true if this is a reverse operation on an vector.
 static bool isReverseMask(ArrayRef<int> M, EVT VT) {
   unsigned NumElts = VT.getVectorNumElements();
@@ -5476,7 +5503,7 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
       return true;
   }
 
-  bool ReverseVEXT;
+  bool ReverseVEXT, isV_UNDEF;
   unsigned Imm, WhichResult;
 
   unsigned EltSize = VT.getVectorElementType().getSizeInBits();
@@ -5487,12 +5514,7 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
           isVREVMask(M, VT, 16) ||
           isVEXTMask(M, VT, ReverseVEXT, Imm) ||
           isVTBLMask(M, VT) ||
-          isVTRNMask(M, VT, WhichResult) ||
-          isVUZPMask(M, VT, WhichResult) ||
-          isVZIPMask(M, VT, WhichResult) ||
-          isVTRN_v_undef_Mask(M, VT, WhichResult) ||
-          isVUZP_v_undef_Mask(M, VT, WhichResult) ||
-          isVZIP_v_undef_Mask(M, VT, WhichResult) ||
+          isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF) ||
           ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT)));
 }
 
@@ -5684,25 +5706,53 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
     // these operations, DAG memoization will ensure that a single node is
     // used for both shuffles.
     unsigned WhichResult;
-    if (isVTRNMask(ShuffleMask, VT, WhichResult))
-      return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
-                         V1, V2).getValue(WhichResult);
-    if (isVUZPMask(ShuffleMask, VT, WhichResult))
-      return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
-                         V1, V2).getValue(WhichResult);
-    if (isVZIPMask(ShuffleMask, VT, WhichResult))
-      return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
-                         V1, V2).getValue(WhichResult);
-
-    if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
-      return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
-                         V1, V1).getValue(WhichResult);
-    if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
-      return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
-                         V1, V1).getValue(WhichResult);
-    if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
-      return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
-                         V1, V1).getValue(WhichResult);
+    bool isV_UNDEF;
+    if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
+            ShuffleMask, VT, WhichResult, isV_UNDEF)) {
+      if (isV_UNDEF)
+        V2 = V1;
+      return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2)
+          .getValue(WhichResult);
+    }
+
+    // Also check for these shuffles through CONCAT_VECTORS: we canonicalize
+    // shuffles that produce a result larger than their operands with:
+    //   shuffle(concat(v1, undef), concat(v2, undef))
+    // ->
+    //   shuffle(concat(v1, v2), undef)
+    // because we can access quad vectors (see PerformVECTOR_SHUFFLECombine).
+    //
+    // This is useful in the general case, but there are special cases where
+    // native shuffles produce larger results: the two-result ops.
+    //
+    // Look through the concat when lowering them:
+    //   shuffle(concat(v1, v2), undef)
+    // ->
+    //   concat(VZIP(v1, v2):0, :1)
+    //
+    if (V1->getOpcode() == ISD::CONCAT_VECTORS &&
+        V2->getOpcode() == ISD::UNDEF) {
+      SDValue SubV1 = V1->getOperand(0);
+      SDValue SubV2 = V1->getOperand(1);
+      EVT SubVT = SubV1.getValueType();
+
+      // We expect these to have been canonicalized to -1.
+      assert(std::all_of(ShuffleMask.begin(), ShuffleMask.end(), [&](int i) {
+        return i < (int)VT.getVectorNumElements();
+      }) && "Unexpected shuffle index into UNDEF operand!");
+
+      if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
+              ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {
+        if (isV_UNDEF)
+          SubV2 = SubV1;
+        assert((WhichResult == 0) &&
+               "In-place shuffle of concat can only have one result!");
+        SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT),
+                                  SubV1, SubV2);
+        return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0),
+                           Res.getValue(1));
+      }
+    }
   }
 
   // If the shuffle is not directly supported and it has 4 elements, use
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
index c0b329c..71a47a2 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -215,7 +215,7 @@ namespace llvm {
       VST3LN_UPD,
       VST4LN_UPD
     };
-  }
+  } // namespace ARMISD
 
   /// Define some predicates that are used for node matching.
   namespace ARM {
@@ -638,6 +638,6 @@ namespace llvm {
     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
                              const TargetLibraryInfo *libInfo);
   }
-}
+} // namespace llvm
 
 #endif  // ARMISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
index 84f95be..59e1535 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
@@ -198,7 +198,7 @@ namespace {
       MachineFunctionPass::getAnalysisUsage(AU);
     }
   };
-}
+} // namespace
 
 char ARMCGBR::ID = 0;
 FunctionPass*
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h
index 90f34ea..9e5700a 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h
@@ -43,6 +43,6 @@ private:
                             Reloc::Model RM) const override;
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 46ff326..50e2292 100644
--- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -142,7 +142,7 @@ namespace {
     bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
   };
   char ARMLoadStoreOpt::ID = 0;
-}
+} // namespace
 
 static bool definesCPSR(const MachineInstr *MI) {
   for (const auto &MO : MI->operands()) {
@@ -1859,7 +1859,7 @@ namespace {
     bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
   };
   char ARMPreAllocLoadStoreOpt::ID = 0;
-}
+} // namespace
 
 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
   TD = Fn.getTarget().getDataLayout();
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
index 14dd9ef..8b12102 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -229,6 +229,6 @@ public:
     return It;
   }
 };
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp b/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
index 30baf42..1c8e1f8 100644
--- a/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
@@ -32,7 +32,7 @@ public:
   }
 };
 char ARMOptimizeBarriersPass::ID = 0;
-}
+} // namespace
 
 // Returns whether the instruction can safely move past a DMB instruction
 // The current implementation allows this iif MI does not have any possible
diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
index 1db190f..4563caa 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -70,6 +70,6 @@ public:
                                  RTLIB::Libcall LC) const;
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
index f20318d..55808df 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -106,7 +106,7 @@ ARMFrameLowering *ARMSubtarget::initializeFrameLowering(StringRef CPU,
   return new ARMFrameLowering(STI);
 }
 
-ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
+ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
                            const std::string &FS,
                            const ARMBaseTargetMachine &TM, bool IsLittle)
     : ARMGenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
@@ -187,8 +187,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
   // Insert the architecture feature derived from the target triple into the
   // feature string. This is important for setting features that are implied
   // based on the architecture version.
-  std::string ArchFS =
-      ARM_MC::ParseARMTriple(TargetTriple.getTriple(), CPUString);
+  std::string ArchFS = ARM_MC::ParseARMTriple(TargetTriple, CPUString);
   if (!FS.empty()) {
     if (!ArchFS.empty())
       ArchFS = (Twine(ArchFS) + "," + FS).str();
@@ -338,7 +337,7 @@ bool ARMSubtarget::hasSinCos() const {
 }
 
 // This overrides the PostRAScheduler bit in the SchedModel for any CPU.
-bool ARMSubtarget::enablePostMachineScheduler() const {
+bool ARMSubtarget::enablePostRAScheduler() const {
   return (!isThumb() || hasThumb2());
 }
 
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
index 77ceb08..f00594f 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -237,8 +237,8 @@ public:
   /// This constructor initializes the data members to match that
   /// of the specified triple.
   ///
-  ARMSubtarget(const std::string &TT, const std::string &CPU,
-               const std::string &FS, const ARMBaseTargetMachine &TM, bool IsLittle);
+  ARMSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS,
+               const ARMBaseTargetMachine &TM, bool IsLittle);
 
   /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
   /// that still makes it profitable to inline the call.
@@ -430,7 +430,7 @@ public:
   bool hasSinCos() const;
 
   /// True for some subtargets at > -O0.
-  bool enablePostMachineScheduler() const override;
+  bool enablePostRAScheduler() const override;
 
   // enableAtomicExpand- True if we need to expand our atomics.
   bool enableAtomicExpand() const override;
@@ -453,6 +453,6 @@ public:
   /// True if fast-isel is used.
   bool useFastISel() const;
 };
-} // End llvm namespace
+} // namespace llvm
 
 #endif  // ARMSUBTARGET_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index 0aceaed..104a34f 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -115,11 +115,10 @@ computeTargetABI(const Triple &TT, StringRef CPU,
   return TargetABI;
 }
 
-static std::string computeDataLayout(StringRef TT, StringRef CPU,
+static std::string computeDataLayout(const Triple &TT, StringRef CPU,
                                      const TargetOptions &Options,
                                      bool isLittle) {
-  const Triple Triple(TT);
-  auto ABI = computeTargetABI(Triple, CPU, Options);
+  auto ABI = computeTargetABI(TT, CPU, Options);
   std::string Ret = "";
 
   if (isLittle)
@@ -129,7 +128,7 @@ static std::string computeDataLayout(StringRef TT, StringRef CPU,
     // Big endian.
     Ret += "E";
 
-  Ret += DataLayout::getManglingComponent(Triple);
+  Ret += DataLayout::getManglingComponent(TT);
 
   // Pointers are 32 bits and aligned to 32 bits.
   Ret += "-p:32:32";
@@ -159,7 +158,7 @@ static std::string computeDataLayout(StringRef TT, StringRef CPU,
 
   // The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit
   // aligned everywhere else.
-  if (Triple.isOSNaCl())
+  if (TT.isOSNaCl())
     Ret += "-S128";
   else if (ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS)
     Ret += "-S64";
@@ -171,15 +170,15 @@ static std::string computeDataLayout(StringRef TT, StringRef CPU,
 
 /// TargetMachine ctor - Create an ARM architecture model.
 ///
-ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
+ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT,
                                            StringRef CPU, StringRef FS,
                                            const TargetOptions &Options,
                                            Reloc::Model RM, CodeModel::Model CM,
                                            CodeGenOpt::Level OL, bool isLittle)
     : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, isLittle), TT,
                         CPU, FS, Options, RM, CM, OL),
-      TargetABI(computeTargetABI(Triple(TT), CPU, Options)),
-      TLOF(createTLOF(Triple(getTargetTriple()))),
+      TargetABI(computeTargetABI(TT, CPU, Options)),
+      TLOF(createTLOF(getTargetTriple())),
       Subtarget(TT, CPU, FS, *this, isLittle), isLittle(isLittle) {
 
   // Default to triple-appropriate float ABI
@@ -234,8 +233,9 @@ TargetIRAnalysis ARMBaseTargetMachine::getTargetIRAnalysis() {
 
 void ARMTargetMachine::anchor() { }
 
-ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, StringRef CPU,
-                                   StringRef FS, const TargetOptions &Options,
+ARMTargetMachine::ARMTargetMachine(const Target &T, const Triple &TT,
+                                   StringRef CPU, StringRef FS,
+                                   const TargetOptions &Options,
                                    Reloc::Model RM, CodeModel::Model CM,
                                    CodeGenOpt::Level OL, bool isLittle)
     : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, isLittle) {
@@ -247,7 +247,7 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, StringRef CPU,
 
 void ARMLETargetMachine::anchor() { }
 
-ARMLETargetMachine::ARMLETargetMachine(const Target &T, StringRef TT,
+ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT,
                                        StringRef CPU, StringRef FS,
                                        const TargetOptions &Options,
                                        Reloc::Model RM, CodeModel::Model CM,
@@ -256,7 +256,7 @@ ARMLETargetMachine::ARMLETargetMachine(const Target &T, StringRef TT,
 
 void ARMBETargetMachine::anchor() { }
 
-ARMBETargetMachine::ARMBETargetMachine(const Target &T, StringRef TT,
+ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT,
                                        StringRef CPU, StringRef FS,
                                        const TargetOptions &Options,
                                        Reloc::Model RM, CodeModel::Model CM,
@@ -265,19 +265,18 @@ ARMBETargetMachine::ARMBETargetMachine(const Target &T, StringRef TT,
 
 void ThumbTargetMachine::anchor() { }
 
-ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
+ThumbTargetMachine::ThumbTargetMachine(const Target &T, const Triple &TT,
                                        StringRef CPU, StringRef FS,
                                        const TargetOptions &Options,
                                        Reloc::Model RM, CodeModel::Model CM,
                                        CodeGenOpt::Level OL, bool isLittle)
-    : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL,
-                           isLittle) {
+    : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, isLittle) {
   initAsmInfo();
 }
 
 void ThumbLETargetMachine::anchor() { }
 
-ThumbLETargetMachine::ThumbLETargetMachine(const Target &T, StringRef TT,
+ThumbLETargetMachine::ThumbLETargetMachine(const Target &T, const Triple &TT,
                                            StringRef CPU, StringRef FS,
                                            const TargetOptions &Options,
                                            Reloc::Model RM, CodeModel::Model CM,
@@ -286,7 +285,7 @@ ThumbLETargetMachine::ThumbLETargetMachine(const Target &T, StringRef TT,
 
 void ThumbBETargetMachine::anchor() { }
 
-ThumbBETargetMachine::ThumbBETargetMachine(const Target &T, StringRef TT,
+ThumbBETargetMachine::ThumbBETargetMachine(const Target &T, const Triple &TT,
                                            StringRef CPU, StringRef FS,
                                            const TargetOptions &Options,
                                            Reloc::Model RM, CodeModel::Model CM,
@@ -355,8 +354,7 @@ bool ARMPassConfig::addPreISel() {
 bool ARMPassConfig::addInstSelector() {
   addPass(createARMISelDag(getARMTargetMachine(), getOptLevel()));
 
-  if (Triple(TM->getTargetTriple()).isOSBinFormatELF() &&
-      TM->Options.EnableFastISel)
+  if (TM->getTargetTriple().isOSBinFormatELF() && TM->Options.EnableFastISel)
     addPass(createARMGlobalBaseRegPass());
   return false;
 }
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h
index 20ca97b..8c98e08 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h
@@ -36,12 +36,10 @@ protected:
   mutable StringMap<std::unique_ptr<ARMSubtarget>> SubtargetMap;
 
 public:
-  ARMBaseTargetMachine(const Target &T, StringRef TT,
-                       StringRef CPU, StringRef FS,
-                       const TargetOptions &Options,
+  ARMBaseTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+                       StringRef FS, const TargetOptions &Options,
                        Reloc::Model RM, CodeModel::Model CM,
-                       CodeGenOpt::Level OL,
-                       bool isLittle);
+                       CodeGenOpt::Level OL, bool isLittle);
   ~ARMBaseTargetMachine() override;
 
   const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; }
@@ -64,8 +62,8 @@ public:
 class ARMTargetMachine : public ARMBaseTargetMachine {
   virtual void anchor();
  public:
-   ARMTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
-                    const TargetOptions &Options, Reloc::Model RM,
+   ARMTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+                    StringRef FS, const TargetOptions &Options, Reloc::Model RM,
                     CodeModel::Model CM, CodeGenOpt::Level OL, bool isLittle);
 };
 
@@ -74,8 +72,8 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
 class ARMLETargetMachine : public ARMTargetMachine {
   void anchor() override;
 public:
-  ARMLETargetMachine(const Target &T, StringRef TT,
-                     StringRef CPU, StringRef FS, const TargetOptions &Options,
+  ARMLETargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+                     StringRef FS, const TargetOptions &Options,
                      Reloc::Model RM, CodeModel::Model CM,
                      CodeGenOpt::Level OL);
 };
@@ -85,9 +83,10 @@ public:
 class ARMBETargetMachine : public ARMTargetMachine {
   void anchor() override;
 public:
-  ARMBETargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
-                     const TargetOptions &Options, Reloc::Model RM,
-                     CodeModel::Model CM, CodeGenOpt::Level OL);
+  ARMBETargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+                     StringRef FS, const TargetOptions &Options,
+                     Reloc::Model RM, CodeModel::Model CM,
+                     CodeGenOpt::Level OL);
 };
 
 /// ThumbTargetMachine - Thumb target machine.
@@ -97,9 +96,10 @@ public:
 class ThumbTargetMachine : public ARMBaseTargetMachine {
   virtual void anchor();
 public:
-  ThumbTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
-                     const TargetOptions &Options, Reloc::Model RM,
-                     CodeModel::Model CM, CodeGenOpt::Level OL, bool isLittle);
+  ThumbTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+                     StringRef FS, const TargetOptions &Options,
+                     Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL,
+                     bool isLittle);
 };
 
 /// ThumbLETargetMachine - Thumb little endian target machine.
@@ -107,7 +107,7 @@ public:
 class ThumbLETargetMachine : public ThumbTargetMachine {
   void anchor() override;
 public:
-  ThumbLETargetMachine(const Target &T, StringRef TT, StringRef CPU,
+  ThumbLETargetMachine(const Target &T, const Triple &TT, StringRef CPU,
                        StringRef FS, const TargetOptions &Options,
                        Reloc::Model RM, CodeModel::Model CM,
                        CodeGenOpt::Level OL);
@@ -118,7 +118,7 @@ public:
 class ThumbBETargetMachine : public ThumbTargetMachine {
   void anchor() override;
 public:
-  ThumbBETargetMachine(const Target &T, StringRef TT, StringRef CPU,
+  ThumbBETargetMachine(const Target &T, const Triple &TT, StringRef CPU,
                        StringRef FS, const TargetOptions &Options,
                        Reloc::Model RM, CodeModel::Model CM,
                        CodeGenOpt::Level OL);
diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 8bcbb11..35387d3 100644
--- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -5841,7 +5841,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
   // do and don't have a cc_out optional-def operand. With some spot-checks
   // of the operand list, we can figure out which variant we're trying to
   // parse and adjust accordingly before actually matching. We shouldn't ever
-  // try to remove a cc_out operand that was explicitly set on the the
+  // try to remove a cc_out operand that was explicitly set on the
   // mnemonic, of course (CarrySetting == true). Reason number #317 the
   // table driven matcher doesn't fit well with the ARM instruction set.
   if (!CarrySetting && shouldOmitCCOutOperand(Mnemonic, Operands))
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 097ec04..f973a8d 100644
--- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -81,7 +81,7 @@ namespace {
     private:
       std::vector<unsigned char> ITStates;
   };
-}
+} // namespace
 
 namespace {
 /// ARM disassembler for all ARM platforms.
@@ -118,7 +118,7 @@ private:
   DecodeStatus AddThumbPredicate(MCInst&) const;
   void UpdateThumbVFPPredicate(MCInst&) const;
 };
-}
+} // namespace
 
 static bool Check(DecodeStatus &Out, DecodeStatus In) {
   switch (In) {
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index be23e90..1114635 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -744,10 +744,9 @@ void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
 }
 
 MCAsmBackend *llvm::createARMAsmBackend(const Target &T,
-                                        const MCRegisterInfo &MRI, StringRef TT,
-                                        StringRef CPU, bool isLittle) {
-  Triple TheTriple(TT);
-
+                                        const MCRegisterInfo &MRI,
+                                        const Triple &TheTriple, StringRef CPU,
+                                        bool isLittle) {
   switch (TheTriple.getObjectFormat()) {
   default:
     llvm_unreachable("unsupported object format");
@@ -764,38 +763,38 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T,
             .Cases("armv7s", "thumbv7s", MachO::CPU_SUBTYPE_ARM_V7S)
             .Default(MachO::CPU_SUBTYPE_ARM_V7);
 
-    return new ARMAsmBackendDarwin(T, TT, CS);
+    return new ARMAsmBackendDarwin(T, TheTriple, CS);
   }
   case Triple::COFF:
     assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported");
-    return new ARMAsmBackendWinCOFF(T, TT);
+    return new ARMAsmBackendWinCOFF(T, TheTriple);
   case Triple::ELF:
     assert(TheTriple.isOSBinFormatELF() && "using ELF for non-ELF target");
-    uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS());
-    return new ARMAsmBackendELF(T, TT, OSABI, isLittle);
+    uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
+    return new ARMAsmBackendELF(T, TheTriple, OSABI, isLittle);
   }
 }
 
 MCAsmBackend *llvm::createARMLEAsmBackend(const Target &T,
                                           const MCRegisterInfo &MRI,
-                                          StringRef TT, StringRef CPU) {
+                                          const Triple &TT, StringRef CPU) {
   return createARMAsmBackend(T, MRI, TT, CPU, true);
 }
 
 MCAsmBackend *llvm::createARMBEAsmBackend(const Target &T,
                                           const MCRegisterInfo &MRI,
-                                          StringRef TT, StringRef CPU) {
+                                          const Triple &TT, StringRef CPU) {
   return createARMAsmBackend(T, MRI, TT, CPU, false);
 }
 
 MCAsmBackend *llvm::createThumbLEAsmBackend(const Target &T,
                                             const MCRegisterInfo &MRI,
-                                            StringRef TT, StringRef CPU) {
+                                            const Triple &TT, StringRef CPU) {
   return createARMAsmBackend(T, MRI, TT, CPU, true);
 }
 
 MCAsmBackend *llvm::createThumbBEAsmBackend(const Target &T,
                                             const MCRegisterInfo &MRI,
-                                            StringRef TT, StringRef CPU) {
+                                            const Triple &TT, StringRef CPU) {
   return createARMAsmBackend(T, MRI, TT, CPU, false);
 }
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
index 4e60372..6b4abd5 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
@@ -23,9 +23,10 @@ class ARMAsmBackend : public MCAsmBackend {
   bool isThumbMode;    // Currently emitting Thumb code.
   bool IsLittleEndian; // Big or little endian.
 public:
-  ARMAsmBackend(const Target &T, StringRef TT, bool IsLittle)
+  ARMAsmBackend(const Target &T, const Triple &TT, bool IsLittle)
       : MCAsmBackend(), STI(ARM_MC::createARMMCSubtargetInfo(TT, "", "")),
-        isThumbMode(TT.startswith("thumb")), IsLittleEndian(IsLittle) {}
+        isThumbMode(TT.getArchName().startswith("thumb")),
+        IsLittleEndian(IsLittle) {}
 
   ~ARMAsmBackend() override { delete STI; }
 
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
index ebef789..e28f6e0 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
@@ -18,7 +18,8 @@ namespace {
 class ARMAsmBackendDarwin : public ARMAsmBackend {
 public:
   const MachO::CPUSubTypeARM Subtype;
-  ARMAsmBackendDarwin(const Target &T, StringRef TT, MachO::CPUSubTypeARM st)
+  ARMAsmBackendDarwin(const Target &T, const Triple &TT,
+                      MachO::CPUSubTypeARM st)
       : ARMAsmBackend(T, TT, /* IsLittleEndian */ true), Subtype(st) {
     HasDataInCodeSupport = true;
   }
@@ -28,6 +29,6 @@ public:
                                      Subtype);
   }
 };
-}
+} // namespace
 
 #endif
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
index 263c4c4..412feb8 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
@@ -15,13 +15,14 @@ namespace {
 class ARMAsmBackendELF : public ARMAsmBackend {
 public:
   uint8_t OSABI;
-  ARMAsmBackendELF(const Target &T, StringRef TT, uint8_t OSABI, bool IsLittle)
+  ARMAsmBackendELF(const Target &T, const Triple &TT, uint8_t OSABI,
+                   bool IsLittle)
       : ARMAsmBackend(T, TT, IsLittle), OSABI(OSABI) {}
 
   MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createARMELFObjectWriter(OS, OSABI, isLittle());
   }
 };
-}
+} // namespace
 
 #endif
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
index f2c4358..170f59a 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
@@ -15,8 +15,8 @@ using namespace llvm;
 namespace {
 class ARMAsmBackendWinCOFF : public ARMAsmBackend {
 public:
-  ARMAsmBackendWinCOFF(const Target &T, StringRef Triple)
-      : ARMAsmBackend(T, Triple, true) {}
+  ARMAsmBackendWinCOFF(const Target &T, const Triple &TheTriple)
+      : ARMAsmBackend(T, TheTriple, true) {}
   MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createARMWinCOFFObjectWriter(OS, /*Is64Bit=*/false);
   }
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index 4289a73..1975bca 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -114,7 +114,7 @@ namespace ARM_PROC {
     case ID: return "id";
     }
   }
-}
+} // namespace ARM_PROC
 
 namespace ARM_MB {
   // The Memory Barrier Option constants map directly to the 4-bit encoding of
@@ -459,6 +459,6 @@ namespace ARMII {
 
 } // end namespace ARMII
 
-} // end namespace llvm;
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index 804d353..9fe27fb 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -40,7 +40,7 @@ namespace {
     bool needsRelocateWithSymbol(const MCSymbol &Sym,
                                  unsigned Type) const override;
   };
-}
+} // namespace
 
 ARMELFObjectWriter::ARMELFObjectWriter(uint8_t OSABI)
   : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI,
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 6e3af73..bbc0b37 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -1324,7 +1324,7 @@ MCTargetStreamer *createARMNullTargetStreamer(MCStreamer &S) {
 
 MCTargetStreamer *createARMObjectTargetStreamer(MCStreamer &S,
                                                 const MCSubtargetInfo &STI) {
-  Triple TT(STI.getTargetTriple());
+  const Triple &TT = STI.getTargetTriple();
   if (TT.getObjectFormat() == Triple::ELF)
     return new ARMTargetELFStreamer(S);
   return new ARMTargetStreamer(S);
@@ -1345,6 +1345,6 @@ MCELFStreamer *createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
     return S;
   }
 
-}
+} // namespace llvm
 
 
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
index 46ba571..23ef501 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
@@ -104,7 +104,7 @@ enum Fixups {
   LastTargetFixupKind,
   NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
 };
-}
-}
+} // namespace ARM
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 84bb092..b885783 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -57,7 +57,7 @@ public:
     return isThumb(STI) && STI.getFeatureBits()[ARM::FeatureThumb2];
   }
   bool isTargetMachO(const MCSubtargetInfo &STI) const {
-    Triple TT(STI.getTargetTriple());
+    const Triple &TT = STI.getTargetTriple();
     return TT.isOSBinFormatMachO();
   }
 
@@ -1065,7 +1065,7 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
   // it's just a plain immediate expression, previously those evaluated to
   // the lower 16 bits of the expression regardless of whether
   // we have a movt or a movw, but that led to misleadingly results.
-  // This is now disallowed in the the AsmParser in validateInstruction()
+  // This is disallowed in the AsmParser in validateInstruction()
   // so this should never happen.
   llvm_unreachable("expression without :upper16: or :lower16:");
 }
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 92c4d6a..0fb395e 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -130,16 +130,13 @@ static bool getARMLoadDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
 #define GET_SUBTARGETINFO_MC_DESC
 #include "ARMGenSubtargetInfo.inc"
 
-
-std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
-  Triple triple(TT);
-
-  bool isThumb = triple.getArch() == Triple::thumb ||
-                 triple.getArch() == Triple::thumbeb;
+std::string ARM_MC::ParseARMTriple(const Triple &TT, StringRef CPU) {
+  bool isThumb =
+      TT.getArch() == Triple::thumb || TT.getArch() == Triple::thumbeb;
 
   bool NoCPU = CPU == "generic" || CPU.empty();
   std::string ARMArchFeature;
-  switch (triple.getSubArch()) {
+  switch (TT.getSubArch()) {
   default:
     llvm_unreachable("invalid sub-architecture for ARM");
   case Triple::ARMSubArch_v8:
@@ -240,7 +237,7 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
       ARMArchFeature += ",+thumb-mode";
   }
 
-  if (triple.isOSNaCl()) {
+  if (TT.isOSNaCl()) {
     if (ARMArchFeature.empty())
       ARMArchFeature = "+nacl-trap";
     else
@@ -250,8 +247,8 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
   return ARMArchFeature;
 }
 
-MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(StringRef TT, StringRef CPU,
-                                                  StringRef FS) {
+MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(const Triple &TT,
+                                                  StringRef CPU, StringRef FS) {
   std::string ArchFS = ARM_MC::ParseARMTriple(TT, CPU);
   if (!FS.empty()) {
     if (!ArchFS.empty())
@@ -332,10 +329,9 @@ static MCInstPrinter *createARMMCInstPrinter(const Triple &T,
   return nullptr;
 }
 
-static MCRelocationInfo *createARMMCRelocationInfo(StringRef TT,
+static MCRelocationInfo *createARMMCRelocationInfo(const Triple &TT,
                                                    MCContext &Ctx) {
-  Triple TheTriple(TT);
-  if (TheTriple.isOSBinFormatMachO())
+  if (TT.isOSBinFormatMachO())
     return createARMMachORelocationInfo(Ctx);
   // Default to the stock relocation info.
   return llvm::createMCRelocationInfo(TT, Ctx);
@@ -374,7 +370,7 @@ public:
   }
 };
 
-}
+} // namespace
 
 static MCInstrAnalysis *createARMMCInstrAnalysis(const MCInstrInfo *Info) {
   return new ARMMCInstrAnalysis(Info);
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index 24ca567..c6f2d13 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -40,12 +40,12 @@ extern Target TheARMLETarget, TheThumbLETarget;
 extern Target TheARMBETarget, TheThumbBETarget;
 
 namespace ARM_MC {
-  std::string ParseARMTriple(StringRef TT, StringRef CPU);
+std::string ParseARMTriple(const Triple &TT, StringRef CPU);
 
-  /// Create a ARM MCSubtargetInfo instance. This is exposed so Asm parser, etc.
-  /// do not need to go through TargetRegistry.
-  MCSubtargetInfo *createARMMCSubtargetInfo(StringRef TT, StringRef CPU,
-                                            StringRef FS);
+/// Create a ARM MCSubtargetInfo instance. This is exposed so Asm parser, etc.
+/// do not need to go through TargetRegistry.
+MCSubtargetInfo *createARMMCSubtargetInfo(const Triple &TT, StringRef CPU,
+                                          StringRef FS);
 }
 
 MCTargetStreamer *createARMNullTargetStreamer(MCStreamer &S);
@@ -65,20 +65,22 @@ MCCodeEmitter *createARMBEMCCodeEmitter(const MCInstrInfo &MCII,
                                         MCContext &Ctx);
 
 MCAsmBackend *createARMAsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                  StringRef TT, StringRef CPU,
+                                  const Triple &TT, StringRef CPU,
                                   bool IsLittleEndian);
 
 MCAsmBackend *createARMLEAsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                  StringRef TT, StringRef CPU);
+                                    const Triple &TT, StringRef CPU);
 
 MCAsmBackend *createARMBEAsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                  StringRef TT, StringRef CPU);
+                                    const Triple &TT, StringRef CPU);
 
-MCAsmBackend *createThumbLEAsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                      StringRef TT, StringRef CPU);
+MCAsmBackend *createThumbLEAsmBackend(const Target &T,
+                                      const MCRegisterInfo &MRI,
+                                      const Triple &TT, StringRef CPU);
 
-MCAsmBackend *createThumbBEAsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                      StringRef TT, StringRef CPU);
+MCAsmBackend *createThumbBEAsmBackend(const Target &T,
+                                      const MCRegisterInfo &MRI,
+                                      const Triple &TT, StringRef CPU);
 
 // Construct a PE/COFF machine code streamer which will generate a PE/COFF
 // object file.
@@ -101,7 +103,7 @@ MCObjectWriter *createARMWinCOFFObjectWriter(raw_pwrite_stream &OS,
 
 /// Construct ARM Mach-O relocation info.
 MCRelocationInfo *createARMMachORelocationInfo(MCContext &Ctx);
-} // End llvm namespace
+} // namespace llvm
 
 // Defines symbolic names for ARM registers.  This defines a mapping from
 // register name to register number.
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 95d7ea7..6ac778e 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -56,7 +56,7 @@ public:
                         const MCFixup &Fixup, MCValue Target,
                         uint64_t &FixedValue) override;
 };
-}
+} // namespace
 
 static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
                               unsigned &Log2Size) {
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
index 173cc93..32481e2 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
@@ -60,7 +60,7 @@ namespace {
         EmitByte(ARM::EHABI::UNWIND_OPCODE_FINISH);
     }
   };
-}
+} // namespace
 
 void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) {
   if (RegSave == 0u)
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
index 166c04b..34b552f 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
@@ -79,7 +79,7 @@ unsigned ARMWinCOFFObjectWriter::getRelocType(const MCValue &Target,
 bool ARMWinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const {
   return static_cast<unsigned>(Fixup.getKind()) != ARM::fixup_t2_movt_hi16;
 }
-}
+} // namespace
 
 namespace llvm {
 MCObjectWriter *createARMWinCOFFObjectWriter(raw_pwrite_stream &OS,
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
index b993b1b..6515a65 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
@@ -35,7 +35,7 @@ void ARMWinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
 void ARMWinCOFFStreamer::EmitThumbFunc(MCSymbol *Symbol) {
   getAssembler().setIsThumbFunc(Symbol);
 }
-}
+} // namespace
 
 MCStreamer *llvm::createARMWinCOFFStreamer(MCContext &Context,
                                            MCAsmBackend &MAB,
diff --git a/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp
index ed2deea..ca98f69 100644
--- a/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp
@@ -71,7 +71,7 @@ namespace {
     bool ExpandFPMLxInstructions(MachineBasicBlock &MBB);
   };
   char MLxExpansion::ID = 0;
-}
+} // namespace
 
 void MLxExpansion::clearStack() {
   std::fill(LastMIs, LastMIs + 4, nullptr);
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
index 31d5732..e5e89fa 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
@@ -47,6 +47,6 @@ public:
                                 MachineBasicBlock::iterator MI) const override;
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h
index f3f493d..31b4df2 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h
@@ -58,6 +58,6 @@ private:
   void expandLoadStackGuard(MachineBasicBlock::iterator MI,
                             Reloc::Model RM) const override;
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
index 68736bc..7ce602d 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -48,7 +48,7 @@ namespace {
     bool InsertITInstructions(MachineBasicBlock &MBB);
   };
   char Thumb2ITBlockPass::ID = 0;
-}
+} // namespace
 
 /// TrackDefUses - Tracking what registers are being defined and used by
 /// instructions in the IT block. This also tracks "dependencies", i.e. uses
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h
index 916ab06..d186dfb 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h
@@ -73,6 +73,6 @@ private:
 ARMCC::CondCodes getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
 
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
index d9ab824..0dd1b4c 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -202,7 +202,7 @@ namespace {
     std::function<bool(const Function &)> PredicateFtor;
   };
   char Thumb2SizeReduce::ID = 0;
-}
+} // namespace
 
 Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor)
     : MachineFunctionPass(ID), PredicateFtor(Ftor) {
diff --git a/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.h
index 23aaff3..e55f88f 100644
--- a/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.h
@@ -60,6 +60,6 @@ public:
                            int SPAdj, unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/BPF/BPFAsmPrinter.cpp b/contrib/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
index 10ec658..9d0aa7a9 100644
--- a/contrib/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
@@ -44,7 +44,7 @@ public:
                     const char *Modifier = nullptr);
   void EmitInstruction(const MachineInstr *MI) override;
 };
-}
+} // namespace
 
 void BPFAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
                                  raw_ostream &O, const char *Modifier) {
diff --git a/contrib/llvm/lib/Target/BPF/BPFFrameLowering.h b/contrib/llvm/lib/Target/BPF/BPFFrameLowering.h
index 3b9fc44..a6fe7c9 100644
--- a/contrib/llvm/lib/Target/BPF/BPFFrameLowering.h
+++ b/contrib/llvm/lib/Target/BPF/BPFFrameLowering.h
@@ -37,5 +37,5 @@ public:
     MBB.erase(MI);
   }
 };
-}
+} // namespace llvm
 #endif
diff --git a/contrib/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp b/contrib/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
index d9e654c..b49de3a 100644
--- a/contrib/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
@@ -51,7 +51,7 @@ private:
   // Complex Pattern for address selection.
   bool SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset);
 };
-}
+} // namespace
 
 // ComplexPattern used on BPF Load/Store instructions
 bool BPFDAGToDAGISel::SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
diff --git a/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp b/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp
index 38c56bb..21d160d 100644
--- a/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp
+++ b/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -86,7 +86,7 @@ public:
 };
 
 int DiagnosticInfoUnsupported::KindID = 0;
-}
+} // namespace
 
 BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
                                      const BPFSubtarget &STI)
diff --git a/contrib/llvm/lib/Target/BPF/BPFISelLowering.h b/contrib/llvm/lib/Target/BPF/BPFISelLowering.h
index ec71dca..b56bb39 100644
--- a/contrib/llvm/lib/Target/BPF/BPFISelLowering.h
+++ b/contrib/llvm/lib/Target/BPF/BPFISelLowering.h
@@ -85,6 +85,6 @@ private:
     return true;
   }
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/BPF/BPFInstrInfo.cpp b/contrib/llvm/lib/Target/BPF/BPFInstrInfo.cpp
index 28bd0ec..83d14ef 100644
--- a/contrib/llvm/lib/Target/BPF/BPFInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/BPF/BPFInstrInfo.cpp
@@ -133,7 +133,7 @@ bool BPFInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
 unsigned BPFInstrInfo::InsertBranch(MachineBasicBlock &MBB,
                                     MachineBasicBlock *TBB,
                                     MachineBasicBlock *FBB,
-                                    const SmallVectorImpl<MachineOperand> &Cond,
+                                    ArrayRef<MachineOperand> Cond,
                                     DebugLoc DL) const {
   // Shouldn't be a fall through.
   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
diff --git a/contrib/llvm/lib/Target/BPF/BPFInstrInfo.h b/contrib/llvm/lib/Target/BPF/BPFInstrInfo.h
index 4056c2e..bd96f76 100644
--- a/contrib/llvm/lib/Target/BPF/BPFInstrInfo.h
+++ b/contrib/llvm/lib/Target/BPF/BPFInstrInfo.h
@@ -51,10 +51,9 @@ public:
 
   unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
   unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
-                        MachineBasicBlock *FBB,
-                        const SmallVectorImpl<MachineOperand> &Cond,
+                        MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
                         DebugLoc DL) const override;
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/BPF/BPFMCInstLower.h b/contrib/llvm/lib/Target/BPF/BPFMCInstLower.h
index 054e894..ba91897 100644
--- a/contrib/llvm/lib/Target/BPF/BPFMCInstLower.h
+++ b/contrib/llvm/lib/Target/BPF/BPFMCInstLower.h
@@ -38,6 +38,6 @@ public:
 
   MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.h b/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.h
index 7072dd0..44977a2 100644
--- a/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.h
+++ b/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.h
@@ -35,6 +35,6 @@ struct BPFRegisterInfo : public BPFGenRegisterInfo {
 
   unsigned getFrameRegister(const MachineFunction &MF) const override;
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/BPF/BPFSubtarget.cpp b/contrib/llvm/lib/Target/BPF/BPFSubtarget.cpp
index 7f7a262..65acd58 100644
--- a/contrib/llvm/lib/Target/BPF/BPFSubtarget.cpp
+++ b/contrib/llvm/lib/Target/BPF/BPFSubtarget.cpp
@@ -25,7 +25,7 @@ using namespace llvm;
 
 void BPFSubtarget::anchor() {}
 
-BPFSubtarget::BPFSubtarget(const std::string &TT, const std::string &CPU,
+BPFSubtarget::BPFSubtarget(const Triple &TT, const std::string &CPU,
                            const std::string &FS, const TargetMachine &TM)
     : BPFGenSubtargetInfo(TT, CPU, FS), InstrInfo(), FrameLowering(*this),
       TLInfo(TM, *this), TSInfo(TM.getDataLayout()) {}
diff --git a/contrib/llvm/lib/Target/BPF/BPFSubtarget.h b/contrib/llvm/lib/Target/BPF/BPFSubtarget.h
index 347cffd..701ac57 100644
--- a/contrib/llvm/lib/Target/BPF/BPFSubtarget.h
+++ b/contrib/llvm/lib/Target/BPF/BPFSubtarget.h
@@ -38,8 +38,8 @@ class BPFSubtarget : public BPFGenSubtargetInfo {
 public:
   // This constructor initializes the data members to match that
   // of the specified triple.
-  BPFSubtarget(const std::string &TT, const std::string &CPU,
-               const std::string &FS, const TargetMachine &TM);
+  BPFSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS,
+               const TargetMachine &TM);
 
   // ParseSubtargetFeatures - Parses features string setting specified
   // subtarget options.  Definition of function is auto generated by tblgen.
@@ -59,6 +59,6 @@ public:
     return &InstrInfo.getRegisterInfo();
   }
 };
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/contrib/llvm/lib/Target/BPF/BPFTargetMachine.cpp
index 3329d5f..5a888a9 100644
--- a/contrib/llvm/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/BPF/BPFTargetMachine.cpp
@@ -29,19 +29,20 @@ extern "C" void LLVMInitializeBPFTarget() {
 }
 
 // DataLayout: little or big endian
-static std::string computeDataLayout(StringRef TT) {
-  if (Triple(TT).getArch() == Triple::bpfeb)
+static std::string computeDataLayout(const Triple &TT) {
+  if (TT.getArch() == Triple::bpfeb)
     return "E-m:e-p:64:64-i64:64-n32:64-S128";
   else
     return "e-m:e-p:64:64-i64:64-n32:64-S128";
 }
 
-BPFTargetMachine::BPFTargetMachine(const Target &T, StringRef TT, StringRef CPU,
-                                   StringRef FS, const TargetOptions &Options,
+BPFTargetMachine::BPFTargetMachine(const Target &T, const Triple &TT,
+                                   StringRef CPU, StringRef FS,
+                                   const TargetOptions &Options,
                                    Reloc::Model RM, CodeModel::Model CM,
                                    CodeGenOpt::Level OL)
-    : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS,
-                        Options, RM, CM, OL),
+    : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, RM, CM,
+                        OL),
       TLOF(make_unique<TargetLoweringObjectFileELF>()),
       Subtarget(TT, CPU, FS, *this) {
   initAsmInfo();
@@ -59,7 +60,7 @@ public:
 
   bool addInstSelector() override;
 };
-}
+} // namespace
 
 TargetPassConfig *BPFTargetMachine::createPassConfig(PassManagerBase &PM) {
   return new BPFPassConfig(this, PM);
diff --git a/contrib/llvm/lib/Target/BPF/BPFTargetMachine.h b/contrib/llvm/lib/Target/BPF/BPFTargetMachine.h
index 6aeafb9..c715fd5 100644
--- a/contrib/llvm/lib/Target/BPF/BPFTargetMachine.h
+++ b/contrib/llvm/lib/Target/BPF/BPFTargetMachine.h
@@ -23,8 +23,8 @@ class BPFTargetMachine : public LLVMTargetMachine {
   BPFSubtarget Subtarget;
 
 public:
-  BPFTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
-                   const TargetOptions &Options, Reloc::Model RM,
+  BPFTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+                   StringRef FS, const TargetOptions &Options, Reloc::Model RM,
                    CodeModel::Model CM, CodeGenOpt::Level OL);
 
   const BPFSubtarget *getSubtargetImpl() const { return &Subtarget; }
@@ -38,6 +38,6 @@ public:
     return TLOF.get();
   }
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/BPF/InstPrinter/BPFInstPrinter.h b/contrib/llvm/lib/Target/BPF/InstPrinter/BPFInstPrinter.h
index adcaff6..cb07471 100644
--- a/contrib/llvm/lib/Target/BPF/InstPrinter/BPFInstPrinter.h
+++ b/contrib/llvm/lib/Target/BPF/InstPrinter/BPFInstPrinter.h
@@ -37,6 +37,6 @@ public:
   void printInstruction(const MCInst *MI, raw_ostream &O);
   static const char *getRegisterName(unsigned RegNo);
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
index 7b1d925..33aecb7 100644
--- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
@@ -84,16 +84,16 @@ void BPFAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
 MCObjectWriter *BPFAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
   return createBPFELFObjectWriter(OS, 0, IsLittleEndian);
 }
-}
+} // namespace
 
 MCAsmBackend *llvm::createBPFAsmBackend(const Target &T,
-                                        const MCRegisterInfo &MRI, StringRef TT,
-                                        StringRef CPU) {
+                                        const MCRegisterInfo &MRI,
+                                        const Triple &TT, StringRef CPU) {
   return new BPFAsmBackend(/*IsLittleEndian=*/true);
 }
 
 MCAsmBackend *llvm::createBPFbeAsmBackend(const Target &T,
-                                          const MCRegisterInfo &MRI, StringRef TT,
-                                          StringRef CPU) {
+                                          const MCRegisterInfo &MRI,
+                                          const Triple &TT, StringRef CPU) {
   return new BPFAsmBackend(/*IsLittleEndian=*/false);
 }
diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
index 05ba618..ef4f05f 100644
--- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
@@ -25,7 +25,7 @@ protected:
   unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
                         bool IsPCRel) const override;
 };
-}
+} // namespace
 
 BPFELFObjectWriter::BPFELFObjectWriter(uint8_t OSABI)
     : MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_NONE,
diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
index d63bbf4..2237654 100644
--- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
@@ -36,6 +36,6 @@ public:
     HasDotTypeDotSizeDirective = false;
   }
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
index dc4ede3..b579afd 100644
--- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
@@ -58,7 +58,7 @@ public:
                          SmallVectorImpl<MCFixup> &Fixups,
                          const MCSubtargetInfo &STI) const override;
 };
-}
+} // namespace
 
 MCCodeEmitter *llvm::createBPFMCCodeEmitter(const MCInstrInfo &MCII,
                                             const MCRegisterInfo &MRI,
diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
index 7cedba9..3e928fc 100644
--- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
@@ -46,8 +46,8 @@ static MCRegisterInfo *createBPFMCRegisterInfo(StringRef TT) {
   return X;
 }
 
-static MCSubtargetInfo *createBPFMCSubtargetInfo(StringRef TT, StringRef CPU,
-                                                 StringRef FS) {
+static MCSubtargetInfo *createBPFMCSubtargetInfo(const Triple &TT,
+                                                 StringRef CPU, StringRef FS) {
   MCSubtargetInfo *X = new MCSubtargetInfo();
   InitBPFMCSubtargetInfo(X, TT, CPU, FS);
   return X;
diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
index a9ba7d9..3d2583a 100644
--- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
@@ -25,8 +25,9 @@ class MCInstrInfo;
 class MCObjectWriter;
 class MCRegisterInfo;
 class MCSubtargetInfo;
-class Target;
 class StringRef;
+class Target;
+class Triple;
 class raw_ostream;
 class raw_pwrite_stream;
 
@@ -42,13 +43,13 @@ MCCodeEmitter *createBPFbeMCCodeEmitter(const MCInstrInfo &MCII,
                                         MCContext &Ctx);
 
 MCAsmBackend *createBPFAsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                  StringRef TT, StringRef CPU);
+                                  const Triple &TT, StringRef CPU);
 MCAsmBackend *createBPFbeAsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                    StringRef TT, StringRef CPU);
+                                    const Triple &TT, StringRef CPU);
 
 MCObjectWriter *createBPFELFObjectWriter(raw_pwrite_stream &OS,
                                          uint8_t OSABI, bool IsLittleEndian);
-}
+} // namespace llvm
 
 // Defines symbolic names for BPF registers.  This defines a mapping from
 // register name to register number.
diff --git a/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp b/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp
index b837798..9c9c097 100644
--- a/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp
+++ b/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp
@@ -513,6 +513,7 @@ void CppWriter::printAttributes(const AttributeSet &PAL,
       HANDLE_ATTR(StackProtect);
       HANDLE_ATTR(StackProtectReq);
       HANDLE_ATTR(StackProtectStrong);
+      HANDLE_ATTR(SafeStack);
       HANDLE_ATTR(NoCapture);
       HANDLE_ATTR(NoRedZone);
       HANDLE_ATTR(NoImplicitFloat);
@@ -2148,7 +2149,8 @@ char CppWriter::ID = 0;
 
 bool CPPTargetMachine::addPassesToEmitFile(
     PassManagerBase &PM, raw_pwrite_stream &o, CodeGenFileType FileType,
-    bool DisableVerify, AnalysisID StartAfter, AnalysisID StopAfter) {
+    bool DisableVerify, AnalysisID StartAfter, AnalysisID StopAfter,
+    MachineFunctionInitializer *MFInitializer) {
   if (FileType != TargetMachine::CGFT_AssemblyFile)
     return true;
   auto FOut = llvm::make_unique<formatted_raw_ostream>(o);
diff --git a/contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h b/contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h
index 02d705e..0cd20da 100644
--- a/contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h
@@ -23,21 +23,21 @@ namespace llvm {
 class formatted_raw_ostream;
 
 struct CPPTargetMachine : public TargetMachine {
-  CPPTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
-                   const TargetOptions &Options, Reloc::Model RM,
+  CPPTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+                   StringRef FS, const TargetOptions &Options, Reloc::Model RM,
                    CodeModel::Model CM, CodeGenOpt::Level OL)
       : TargetMachine(T, "", TT, CPU, FS, Options) {}
 
 public:
   bool addPassesToEmitFile(PassManagerBase &PM, raw_pwrite_stream &Out,
                            CodeGenFileType FileType, bool DisableVerify,
-                           AnalysisID StartAfter,
-                           AnalysisID StopAfter) override;
+                           AnalysisID StartAfter, AnalysisID StopAfter,
+                           MachineFunctionInitializer *MFInitializer) override;
 };
 
 extern Target TheCppBackendTarget;
 
-} // End llvm namespace
+} // namespace llvm
 
 
 #endif
diff --git a/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 14f9d77..837838a 100644
--- a/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -53,7 +53,7 @@ public:
                               raw_ostream &VStream,
                               raw_ostream &CStream) const override;
 };
-}
+} // namespace
 
 static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
                                                uint64_t Address,
@@ -69,6 +69,33 @@ static unsigned GetSubinstOpcode(unsigned IClass, unsigned inst, unsigned &op,
                                  raw_ostream &os);
 static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst);
 
+static DecodeStatus s16ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+                                  const void *Decoder);
+static DecodeStatus s12ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+                                  const void *Decoder);
+static DecodeStatus s11_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+                                    const void *Decoder);
+static DecodeStatus s11_1ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+                                    const void *Decoder);
+static DecodeStatus s11_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+                                    const void *Decoder);
+static DecodeStatus s11_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+                                    const void *Decoder);
+static DecodeStatus s10ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+                                  const void *Decoder);
+static DecodeStatus s8ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+                                 const void *Decoder);
+static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+                                   const void *Decoder);
+static DecodeStatus s4_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+                                   const void *Decoder);
+static DecodeStatus s4_1ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+                                   const void *Decoder);
+static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+                                   const void *Decoder);
+static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+                                   const void *Decoder);
+
 static const uint16_t IntRegDecoderTable[] = {
     Hexagon::R0,  Hexagon::R1,  Hexagon::R2,  Hexagon::R3,  Hexagon::R4,
     Hexagon::R5,  Hexagon::R6,  Hexagon::R7,  Hexagon::R8,  Hexagon::R9,
@@ -356,6 +383,97 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(
   return Result;
 }
 
+static DecodeStatus s16ImmDecoder(MCInst &MI, unsigned tmp,
+                                  uint64_t /*Address*/, const void *Decoder) {
+  uint64_t imm = SignExtend64<16>(tmp);
+  MI.addOperand(MCOperand::createImm(imm));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus s12ImmDecoder(MCInst &MI, unsigned tmp,
+                                  uint64_t /*Address*/, const void *Decoder) {
+  uint64_t imm = SignExtend64<12>(tmp);
+  MI.addOperand(MCOperand::createImm(imm));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus s11_0ImmDecoder(MCInst &MI, unsigned tmp,
+                                    uint64_t /*Address*/, const void *Decoder) {
+  uint64_t imm = SignExtend64<11>(tmp);
+  MI.addOperand(MCOperand::createImm(imm));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus s11_1ImmDecoder(MCInst &MI, unsigned tmp,
+                                    uint64_t /*Address*/, const void *Decoder) {
+  uint64_t imm = SignExtend64<12>(tmp);
+  MI.addOperand(MCOperand::createImm(imm));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus s11_2ImmDecoder(MCInst &MI, unsigned tmp,
+                                    uint64_t /*Address*/, const void *Decoder) {
+  uint64_t imm = SignExtend64<13>(tmp);
+  MI.addOperand(MCOperand::createImm(imm));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus s11_3ImmDecoder(MCInst &MI, unsigned tmp,
+                                    uint64_t /*Address*/, const void *Decoder) {
+  uint64_t imm = SignExtend64<14>(tmp);
+  MI.addOperand(MCOperand::createImm(imm));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus s10ImmDecoder(MCInst &MI, unsigned tmp,
+                                  uint64_t /*Address*/, const void *Decoder) {
+  uint64_t imm = SignExtend64<10>(tmp);
+  MI.addOperand(MCOperand::createImm(imm));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus s8ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/,
+                                 const void *Decoder) {
+  uint64_t imm = SignExtend64<8>(tmp);
+  MI.addOperand(MCOperand::createImm(imm));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp,
+                                   uint64_t /*Address*/, const void *Decoder) {
+  uint64_t imm = SignExtend64<6>(tmp);
+  MI.addOperand(MCOperand::createImm(imm));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus s4_0ImmDecoder(MCInst &MI, unsigned tmp,
+                                   uint64_t /*Address*/, const void *Decoder) {
+  uint64_t imm = SignExtend64<4>(tmp);
+  MI.addOperand(MCOperand::createImm(imm));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus s4_1ImmDecoder(MCInst &MI, unsigned tmp,
+                                   uint64_t /*Address*/, const void *Decoder) {
+  uint64_t imm = SignExtend64<5>(tmp);
+  MI.addOperand(MCOperand::createImm(imm));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp,
+                                   uint64_t /*Address*/, const void *Decoder) {
+  uint64_t imm = SignExtend64<6>(tmp);
+  MI.addOperand(MCOperand::createImm(imm));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp,
+                                   uint64_t /*Address*/, const void *Decoder) {
+  uint64_t imm = SignExtend64<7>(tmp);
+  MI.addOperand(MCOperand::createImm(imm));
+  return MCDisassembler::Success;
+}
+
 // These values are from HexagonGenMCCodeEmitter.inc and HexagonIsetDx.td
 enum subInstBinaryValues {
   V4_SA1_addi_BITS = 0x0000,
diff --git a/contrib/llvm/lib/Target/Hexagon/Hexagon.h b/contrib/llvm/lib/Target/Hexagon/Hexagon.h
index 6e2ecaf..b24d24a 100644
--- a/contrib/llvm/lib/Target/Hexagon/Hexagon.h
+++ b/contrib/llvm/lib/Target/Hexagon/Hexagon.h
@@ -15,50 +15,6 @@
 #ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGON_H
 #define LLVM_LIB_TARGET_HEXAGON_HEXAGON_H
 
-#include "MCTargetDesc/HexagonMCTargetDesc.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
-
-namespace llvm {
-  class FunctionPass;
-  class HexagonAsmPrinter;
-  class HexagonTargetMachine;
-  class MachineInstr;
-  class MCInst;
-  class ModulePass;
-  class raw_ostream;
-  class TargetMachine;
-
-  FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
-                                     CodeGenOpt::Level OptLevel);
-  FunctionPass *createHexagonDelaySlotFillerPass(const TargetMachine &TM);
-  FunctionPass *createHexagonFPMoverPass(const TargetMachine &TM);
-  FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM);
-  FunctionPass *createHexagonCFGOptimizer();
-
-  FunctionPass *createHexagonSplitConst32AndConst64();
-  FunctionPass *createHexagonExpandPredSpillCode();
-  FunctionPass *createHexagonHardwareLoops();
-  FunctionPass *createHexagonPeephole();
-  FunctionPass *createHexagonFixupHwLoops();
-  FunctionPass *createHexagonNewValueJump();
-  FunctionPass *createHexagonCopyToCombine();
-  FunctionPass *createHexagonPacketizer();
-  FunctionPass *createHexagonNewValueJump();
-
-/* TODO: object output.
-  MCCodeEmitter *createHexagonMCCodeEmitter(const Target &,
-                                            const TargetMachine &TM,
-                                            MCContext &Ctx);
-*/
-/* TODO: assembler input.
-  TargetAsmBackend *createHexagonAsmBackend(const Target &,
-                                                  const std::string &);
-*/
-  void HexagonLowerToMC(MachineInstr const *MI, MCInst &MCI,
-                        HexagonAsmPrinter &AP);
-} // end namespace llvm;
-
 #define Hexagon_POINTER_SIZE 4
 
 #define Hexagon_PointerSize (Hexagon_POINTER_SIZE)
@@ -75,7 +31,7 @@ namespace llvm {
 
 // Maximum number of words and instructions in a packet.
 #define HEXAGON_PACKET_SIZE 4
-
+#define HEXAGON_MAX_PACKET_SIZE (HEXAGON_PACKET_SIZE * HEXAGON_INSTR_SIZE)
 // Minimum number of instructions in an end-loop packet.
 #define HEXAGON_PACKET_INNER_SIZE 2
 #define HEXAGON_PACKET_OUTER_SIZE 3
@@ -83,4 +39,25 @@ namespace llvm {
 // including a compound one or a duplex or an extender.
 #define HEXAGON_PRESHUFFLE_PACKET_SIZE (HEXAGON_PACKET_SIZE + 3)
 
+// Name of the global offset table as defined by the Hexagon ABI
+#define HEXAGON_GOT_SYM_NAME "_GLOBAL_OFFSET_TABLE_"
+
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+  class MachineInstr;
+  class MCInst;
+  class MCInstrInfo;
+  class HexagonAsmPrinter;
+  class HexagonTargetMachine;
+
+  void HexagonLowerToMC(const MachineInstr *MI, MCInst &MCI,
+                        HexagonAsmPrinter &AP);
+
+  /// \brief Creates a Hexagon-specific Target Transformation Info pass.
+  ImmutablePass *createHexagonTargetTransformInfoPass(const HexagonTargetMachine *TM);
+} // namespace llvm
+
 #endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h
index 792fc8b..f09a5b9 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h
@@ -53,6 +53,6 @@ namespace llvm {
     static const char *getRegisterName(unsigned RegNo);
   };
 
-} // end of llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
index 703e691..ff1a4fe 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
@@ -28,6 +28,7 @@ using namespace llvm;
 #define DEBUG_TYPE "hexagon_cfg"
 
 namespace llvm {
+  FunctionPass *createHexagonCFGOptimizer();
   void initializeHexagonCFGOptimizerPass(PassRegistry&);
 }
 
@@ -227,7 +228,7 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
   }
   return true;
 }
-}
+} // namespace
 
 
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
index 1d6455c..9fd863f 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
@@ -49,6 +49,7 @@ MaxNumOfInstsBetweenNewValueStoreAndTFR("max-num-inst-between-tfr-and-nv-store",
                             "consider the store still to be newifiable"));
 
 namespace llvm {
+  FunctionPass *createHexagonCopyToCombine();
   void initializeHexagonCopyToCombinePass(PassRegistry&);
 }
 
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
index 37ed173..33766df 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
@@ -173,7 +173,7 @@ namespace {
     bool coalesceRegisters(RegisterRef R1, RegisterRef R2);
     bool coalesceSegments(MachineFunction &MF);
   };
-}
+} // namespace
 
 char HexagonExpandCondsets::ID = 0;
 
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
index 40059fb..1657d88 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
@@ -41,6 +41,7 @@ using namespace llvm;
 
 
 namespace llvm {
+  FunctionPass *createHexagonExpandPredSpillCode();
   void initializeHexagonExpandPredSpillCodePass(PassRegistry&);
 }
 
@@ -332,7 +333,7 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
   return true;
 }
 
-}
+} // namespace
 
 //===----------------------------------------------------------------------===//
 //                         Public Constructor Functions
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
index 3d786a9..3ea77cd 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
@@ -30,6 +30,7 @@ static cl::opt<unsigned> MaxLoopRange(
     cl::desc("Restrict range of loopN instructions (testing only)"));
 
 namespace llvm {
+  FunctionPass *createHexagonFixupHwLoops();
   void initializeHexagonFixupHwLoopsPass(PassRegistry&);
 }
 
@@ -66,7 +67,7 @@ namespace {
   };
 
   char HexagonFixupHwLoops::ID = 0;
-}
+} // namespace
 
 INITIALIZE_PASS(HexagonFixupHwLoops, "hwloopsfixup",
                 "Hexagon Hardware Loops Fixup", false, false)
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 868f87e..9797134 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -238,7 +238,7 @@ namespace {
         return true;
     return false;
   }
-}
+} // namespace
 
 
 /// Implements shrink-wrapping of the stack frame. By default, stack frame
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
index 89500cb..767e13c 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -99,6 +99,6 @@ private:
   bool useRestoreFunction(MachineFunction &MF, const CSIVect &CSI) const;
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index db72899..53b6bf6 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -63,6 +63,7 @@ static cl::opt<bool> HWCreatePreheader("hexagon-hwloop-preheader",
 STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
 
 namespace llvm {
+  FunctionPass *createHexagonHardwareLoops();
   void initializeHexagonHardwareLoopsPass(PassRegistry&);
 }
 
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index 7a213aa..9123057 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -104,6 +104,7 @@ public:
   SDNode *SelectConstantFP(SDNode *N);
   SDNode *SelectAdd(SDNode *N);
   SDNode *SelectBitOp(SDNode *N);
+  bool isConstExtProfitable(SDNode *N) const;
 
   // XformMskToBitPosU5Imm - Returns the bit position which
   // the single bit 32 bit mask represents.
@@ -1327,6 +1328,20 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
   return false;
 }
 
+bool HexagonDAGToDAGISel::isConstExtProfitable(SDNode *N) const {
+  unsigned UseCount = 0;
+  unsigned CallCount = 0;
+  for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+    // Ignore call instructions.
+    if (I->getOpcode() == ISD::CopyToReg)
+      ++CallCount;
+    UseCount++;
+  }
+
+  return (UseCount <= 1) || (CallCount > 1);
+
+}
+
 void HexagonDAGToDAGISel::PreprocessISelDAG() {
   SelectionDAG &DAG = *CurDAG;
   std::vector<SDNode*> Nodes;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 74d92ae..1a14c88 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -95,7 +95,7 @@ public:
 
   unsigned getNumNamedVarArgParams() const { return NumNamedVarArgParams; }
 };
-}
+} // namespace
 
 // Implement calling convention for Hexagon.
 static bool
@@ -397,7 +397,9 @@ HexagonTargetLowering::LowerReturn(SDValue Chain,
 
 bool HexagonTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
   // If either no tail call or told not to tail call at all, don't.
-  if (!CI->isTailCall() || HTM.Options.DisableTailCalls)
+  auto Attr =
+      CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
+  if (!CI->isTailCall() || Attr.getValueAsString() == "true")
     return false;
 
   return true;
@@ -486,7 +488,8 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   else
     CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon);
 
-  if (DAG.getTarget().Options.DisableTailCalls)
+  auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
+  if (Attr.getValueAsString() == "true")
     isTailCall = false;
 
   if (isTailCall) {
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index b80e847..b9d18df 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -86,7 +86,7 @@ bool isPositiveHalfWord(SDNode *N);
 
       OP_END
     };
-  }
+  } // namespace HexagonISD
 
   class HexagonSubtarget;
 
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index e566a97..3cb0823 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -159,7 +159,7 @@ findLoopInstr(MachineBasicBlock *BB, int EndLoopOp,
 
 unsigned HexagonInstrInfo::InsertBranch(
     MachineBasicBlock &MBB,MachineBasicBlock *TBB, MachineBasicBlock *FBB,
-    const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const {
+    ArrayRef<MachineOperand> Cond, DebugLoc DL) const {
 
   Opcode_t BOpc   = Hexagon::J2_jump;
   Opcode_t BccOpc = Hexagon::J2_jumpt;
@@ -1013,7 +1013,7 @@ int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const {
 
 bool HexagonInstrInfo::
 PredicateInstruction(MachineInstr *MI,
-                     const SmallVectorImpl<MachineOperand> &Cond) const {
+                     ArrayRef<MachineOperand> Cond) const {
   if (Cond.empty() || isEndLoopN(Cond[0].getImm())) {
     DEBUG(dbgs() << "\nCannot predicate:"; MI->dump(););
     return false;
@@ -1162,8 +1162,8 @@ HexagonInstrInfo::DefinesPredicate(MachineInstr *MI,
 
 bool
 HexagonInstrInfo::
-SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
-                  const SmallVectorImpl<MachineOperand> &Pred2) const {
+SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
+                  ArrayRef<MachineOperand> Pred2) const {
   // TODO: Fix this
   return false;
 }
@@ -1982,8 +1982,7 @@ bool HexagonInstrInfo::PredOpcodeHasJMP_c(Opcode_t Opcode) const {
          (Opcode == Hexagon::J2_jumpf);
 }
 
-bool HexagonInstrInfo::predOpcodeHasNot(
-    const SmallVectorImpl<MachineOperand> &Cond) const {
+bool HexagonInstrInfo::predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const {
   if (Cond.empty() || !isPredicated(Cond[0].getImm()))
     return false;
   return !isPredicatedTrue(Cond[0].getImm());
@@ -1994,7 +1993,7 @@ bool HexagonInstrInfo::isEndLoopN(Opcode_t Opcode) const {
           Opcode == Hexagon::ENDLOOP1);
 }
 
-bool HexagonInstrInfo::getPredReg(const SmallVectorImpl<MachineOperand> &Cond,
+bool HexagonInstrInfo::getPredReg(ArrayRef<MachineOperand> Cond,
                                   unsigned &PredReg, unsigned &PredRegPos,
                                   unsigned &PredRegFlags) const {
   if (Cond.empty())
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
index a7ae65e..91f508e 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -69,8 +69,7 @@ public:
   unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
 
   unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
-                        MachineBasicBlock *FBB,
-                        const SmallVectorImpl<MachineOperand> &Cond,
+                        MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
                         DebugLoc DL) const override;
 
   bool analyzeCompare(const MachineInstr *MI,
@@ -129,7 +128,7 @@ public:
   bool isBranch(const MachineInstr *MI) const;
   bool isPredicable(MachineInstr *MI) const override;
   bool PredicateInstruction(MachineInstr *MI,
-                    const SmallVectorImpl<MachineOperand> &Cond) const override;
+                            ArrayRef<MachineOperand> Cond) const override;
 
   bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
                            unsigned ExtraPredCycles,
@@ -149,8 +148,8 @@ public:
   bool isPredicatedNew(unsigned Opcode) const;
   bool DefinesPredicate(MachineInstr *MI,
                         std::vector<MachineOperand> &Pred) const override;
-  bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
-                   const SmallVectorImpl<MachineOperand> &Pred2) const override;
+  bool SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
+                         ArrayRef<MachineOperand> Pred2) const override;
 
   bool
   ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
@@ -222,15 +221,14 @@ public:
   bool NonExtEquivalentExists (const MachineInstr *MI) const;
   short getNonExtOpcode(const MachineInstr *MI) const;
   bool PredOpcodeHasJMP_c(Opcode_t Opcode) const;
-  bool predOpcodeHasNot(const SmallVectorImpl<MachineOperand> &Cond) const;
+  bool predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const;
   bool isEndLoopN(Opcode_t Opcode) const;
-  bool getPredReg(const SmallVectorImpl<MachineOperand> &Cond,
-                  unsigned &PredReg, unsigned &PredRegPos,
-                  unsigned &PredRegFlags) const;
+  bool getPredReg(ArrayRef<MachineOperand> Cond, unsigned &PredReg,
+                  unsigned &PredRegPos, unsigned &PredRegFlags) const;
   int getCondOpcode(int Opc, bool sense) const;
 
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
index 4275230..1d0d015 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -676,6 +676,7 @@ def  : Pat <(int_hexagon_A2_tfrih (I32:$Rs), u16_0ImmPred:$Is),
 //  Transfer Register/immediate.
 def : T_R_pat <A2_tfr, int_hexagon_A2_tfr>;
 def : T_I_pat <A2_tfrsi, int_hexagon_A2_tfrsi>;
+def : T_I_pat <A2_tfrpi, int_hexagon_A2_tfrpi>;
 
 // Assembler mapped from Rdd32=Rss32 to Rdd32=combine(Rss.H32,Rss.L32)
 def : Pat<(int_hexagon_A2_tfrp DoubleRegs:$src),
@@ -690,15 +691,15 @@ def: T_RR_pat<A2_combine_hl, int_hexagon_A2_combine_hl>;
 def: T_RR_pat<A2_combine_lh, int_hexagon_A2_combine_lh>;
 def: T_RR_pat<A2_combine_ll, int_hexagon_A2_combine_ll>;
 
-def: T_II_pat<A2_combineii, int_hexagon_A2_combineii, s32ImmPred, s8ImmPred>;
+def: T_II_pat<A2_combineii, int_hexagon_A2_combineii, s8ExtPred, s8ImmPred>;
 
 def: Pat<(i32 (int_hexagon_C2_mux (I32:$Rp), (I32:$Rs), (I32:$Rt))),
          (i32 (C2_mux (C2_tfrrp IntRegs:$Rp), IntRegs:$Rs, IntRegs:$Rt))>;
 
 // Mux
-def : T_QRI_pat<C2_muxir, int_hexagon_C2_muxir, s32ImmPred>;
-def : T_QIR_pat<C2_muxri, int_hexagon_C2_muxri, s32ImmPred>;
-def : T_QII_pat<C2_muxii, int_hexagon_C2_muxii, s32ImmPred, s8ImmPred>;
+def : T_QRI_pat<C2_muxir, int_hexagon_C2_muxir, s8ExtPred>;
+def : T_QIR_pat<C2_muxri, int_hexagon_C2_muxri, s8ExtPred>;
+def : T_QII_pat<C2_muxii, int_hexagon_C2_muxii, s8ExtPred, s8ImmPred>;
 
 // Shift halfword
 def : T_R_pat<A2_aslh, int_hexagon_A2_aslh>;
@@ -719,17 +720,17 @@ def : T_RR_pat<C2_cmpeq,  int_hexagon_C2_cmpeq>;
 def : T_RR_pat<C2_cmpgt,  int_hexagon_C2_cmpgt>;
 def : T_RR_pat<C2_cmpgtu, int_hexagon_C2_cmpgtu>;
 
-def : T_RI_pat<C2_cmpeqi, int_hexagon_C2_cmpeqi, s32ImmPred>;
-def : T_RI_pat<C2_cmpgti, int_hexagon_C2_cmpgti, s32ImmPred>;
-def : T_RI_pat<C2_cmpgtui, int_hexagon_C2_cmpgtui, u32ImmPred>;
+def : T_RI_pat<C2_cmpeqi, int_hexagon_C2_cmpeqi, s10ExtPred>;
+def : T_RI_pat<C2_cmpgti, int_hexagon_C2_cmpgti, s10ExtPred>;
+def : T_RI_pat<C2_cmpgtui, int_hexagon_C2_cmpgtui, u9ExtPred>;
 
-def : Pat <(i32 (int_hexagon_C2_cmpgei (I32:$src1), s32ImmPred:$src2)),
+def : Pat <(i32 (int_hexagon_C2_cmpgei (I32:$src1), s8ExtPred:$src2)),
       (i32 (C2_cmpgti (I32:$src1),
-                      (DEC_CONST_SIGNED s32ImmPred:$src2)))>;
+                      (DEC_CONST_SIGNED s8ExtPred:$src2)))>;
 
-def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), u32ImmPred:$src2)),
+def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), u8ExtPred:$src2)),
       (i32 (C2_cmpgtui (I32:$src1),
-                       (DEC_CONST_UNSIGNED u32ImmPred:$src2)))>;
+                       (DEC_CONST_UNSIGNED u8ExtPred:$src2)))>;
 
 // The instruction, Pd=cmp.geu(Rs, #u8) -> Pd=cmp.eq(Rs,Rs) when #u8 == 0.
 def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), 0)),
@@ -923,6 +924,10 @@ def: qi_CRInst_qiqi_pat<C2_or,    int_hexagon_C2_or>;
 def: qi_CRInst_qiqi_pat<C2_orn,   int_hexagon_C2_orn>;
 def: qi_CRInst_qiqi_pat<C2_xor,   int_hexagon_C2_xor>;
 
+// Assembler mapped from  Pd4=Ps4 to Pd4=or(Ps4,Ps4)
+def : Pat<(int_hexagon_C2_pxfer_map PredRegs:$src),
+          (C2_pxfer_map PredRegs:$src)>;
+
 // Multiply 32x32 and use lower result
 def : T_RRI_pat <M2_macsip, int_hexagon_M2_macsip>;
 def : T_RRI_pat <M2_macsin, int_hexagon_M2_macsin>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
index 7672358..5681ae2 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
@@ -80,6 +80,6 @@ public:
   void setStackAlignBaseVReg(unsigned R) { StackAlignBaseReg = R; }
   unsigned getStackAlignBaseVReg() const { return StackAlignBaseReg; }
 };
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h
index 6034344..fae16e2 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h
@@ -238,7 +238,7 @@ protected:
 #endif
 };
 
-} // namespace
+} // namespace llvm
 
 
 #endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
index 81af4db..707bfdb 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -60,6 +60,7 @@ static cl::opt<bool> DisableNewValueJumps("disable-nvjump", cl::Hidden,
     cl::desc("Disable New Value Jumps"));
 
 namespace llvm {
+  FunctionPass *createHexagonNewValueJump();
   void initializeHexagonNewValueJumpPass(PassRegistry&);
 }
 
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td b/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td
index be8204b..2bece8f 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td
@@ -7,32 +7,24 @@
 //
 //===----------------------------------------------------------------------===//
 
+def s4_0ImmOperand : AsmOperandClass { let Name = "s4_0Imm"; }
+def s4_1ImmOperand : AsmOperandClass { let Name = "s4_1Imm"; }
+def s4_2ImmOperand : AsmOperandClass { let Name = "s4_2Imm"; }
+def s4_3ImmOperand : AsmOperandClass { let Name = "s4_3Imm"; }
+
 // Immediate operands.
 
 let PrintMethod = "printImmOperand" in {
-  // f32Ext type is used to identify constant extended floating point immediates.
-  def f32Ext : Operand<f32>;
   def s32Imm : Operand<i32>;
-  def s26_6Imm : Operand<i32>;
-  def s16Imm : Operand<i32>;
-  def s12Imm : Operand<i32>;
-  def s11Imm : Operand<i32>;
-  def s11_0Imm : Operand<i32>;
-  def s11_1Imm : Operand<i32>;
-  def s11_2Imm : Operand<i32>;
-  def s11_3Imm : Operand<i32>;
-  def s10Imm : Operand<i32>;
-  def s9Imm : Operand<i32>;
-  def m9Imm : Operand<i32>;
   def s8Imm : Operand<i32>;
   def s8Imm64 : Operand<i64>;
   def s6Imm : Operand<i32>;
   def s6_3Imm : Operand<i32>;
   def s4Imm : Operand<i32>;
-  def s4_0Imm : Operand<i32>;
-  def s4_1Imm : Operand<i32>;
-  def s4_2Imm : Operand<i32>;
-  def s4_3Imm : Operand<i32>;
+  def s4_0Imm : Operand<i32> { let DecoderMethod = "s4_0ImmDecoder"; }
+  def s4_1Imm : Operand<i32> { let DecoderMethod = "s4_1ImmDecoder"; }
+  def s4_2Imm : Operand<i32> { let DecoderMethod = "s4_2ImmDecoder"; }
+  def s4_3Imm : Operand<i32> { let DecoderMethod = "s4_3ImmDecoder"; }
   def u64Imm : Operand<i64>;
   def u32Imm : Operand<i32>;
   def u26_6Imm : Operand<i32>;
@@ -446,17 +438,18 @@ def SetClr3ImmPred : PatLeaf<(i32 imm), [{
 // Extendable immediate operands.
 
 let PrintMethod = "printExtOperand" in {
-  def s16Ext : Operand<i32>;
-  def s12Ext : Operand<i32>;
-  def s10Ext : Operand<i32>;
-  def s9Ext : Operand<i32>;
-  def s8Ext : Operand<i32>;
+  def f32Ext : Operand<f32>;
+  def s16Ext : Operand<i32> { let DecoderMethod = "s16ImmDecoder"; }
+  def s12Ext : Operand<i32> { let DecoderMethod = "s12ImmDecoder"; }
+  def s11_0Ext : Operand<i32> { let DecoderMethod = "s11_0ImmDecoder"; }
+  def s11_1Ext : Operand<i32> { let DecoderMethod = "s11_1ImmDecoder"; }
+  def s11_2Ext : Operand<i32> { let DecoderMethod = "s11_2ImmDecoder"; }
+  def s11_3Ext : Operand<i32> { let DecoderMethod = "s11_3ImmDecoder"; }
+  def s10Ext : Operand<i32> { let DecoderMethod = "s10ImmDecoder"; }
+  def s9Ext : Operand<i32> { let DecoderMethod = "s90ImmDecoder"; }
+  def s8Ext : Operand<i32> { let DecoderMethod = "s8ImmDecoder"; }
   def s7Ext : Operand<i32>;
-  def s6Ext : Operand<i32>;
-  def s11_0Ext : Operand<i32>;
-  def s11_1Ext : Operand<i32>;
-  def s11_2Ext : Operand<i32>;
-  def s11_3Ext : Operand<i32>;
+  def s6Ext : Operand<i32> { let DecoderMethod = "s6_0ImmDecoder"; }
   def u6Ext : Operand<i32>;
   def u7Ext : Operand<i32>;
   def u8Ext : Operand<i32>;
@@ -468,6 +461,46 @@ let PrintMethod = "printExtOperand" in {
   def u6_3Ext : Operand<i32>;
 }
 
+def s10ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (isInt<10>(v))
+    return true;
+
+  // Return true if extending this immediate is profitable and the value
+  // can fit in a 32-bit signed field.
+  return isConstExtProfitable(Node) && isInt<32>(v);
+}]>;
+
+def s8ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (isInt<8>(v))
+    return true;
+
+  // Return true if extending this immediate is profitable and the value
+  // can fit in a 32-bit signed field.
+  return isConstExtProfitable(Node) && isInt<32>(v);
+}]>;
+
+def u8ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (isUInt<8>(v))
+    return true;
+
+  // Return true if extending this immediate is profitable and the value
+  // can fit in a 32-bit unsigned field.
+  return isConstExtProfitable(Node) && isUInt<32>(v);
+}]>;
+
+def u9ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (isUInt<9>(v))
+    return true;
+
+  // Return true if extending this immediate is profitable and the value
+  // can fit in a 32-bit unsigned field.
+  return isConstExtProfitable(Node) && isUInt<32>(v);
+}]>;
+
 
 // This complex pattern exists only to create a machine instruction operand
 // of type "frame index". There doesn't seem to be a way to do that directly
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
index 503bfdb..94ec2e7 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -75,6 +75,7 @@ static cl::opt<bool> DisableOptExtTo64("disable-hexagon-opt-ext-to-64",
     cl::desc("Disable Optimization of extensions to i64."));
 
 namespace llvm {
+  FunctionPass *createHexagonPeephole();
   void initializeHexagonPeepholePass(PassRegistry&);
 }
 
@@ -103,7 +104,7 @@ namespace {
   private:
     void ChangeOpInto(MachineOperand &Dst, MachineOperand &Src);
   };
-}
+} // namespace
 
 char HexagonPeephole::ID = 0;
 
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
index 0c24075..d586c39 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
@@ -24,6 +24,7 @@
 using namespace llvm;
 
 namespace llvm {
+  FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM);
   void initializeHexagonRemoveExtendArgsPass(PassRegistry&);
 }
 
@@ -47,7 +48,7 @@ namespace {
       FunctionPass::getAnalysisUsage(AU);
     }
   };
-}
+} // namespace
 
 char HexagonRemoveExtendArgs::ID = 0;
 
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
index 8ac2e43..c72051c 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
@@ -32,6 +32,6 @@ public:
                                   MachinePointerInfo SrcPtrInfo) const override;
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
index 4efb5f7..61bb7c5 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
@@ -45,6 +45,11 @@ using namespace llvm;
 
 #define DEBUG_TYPE "xfer"
 
+namespace llvm {
+  FunctionPass *createHexagonSplitConst32AndConst64();
+  void initializeHexagonSplitConst32AndConst64Pass(PassRegistry&);
+}
+
 namespace {
 
 class HexagonSplitConst32AndConst64 : public MachineFunctionPass {
@@ -151,7 +156,7 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) {
   return true;
 }
 
-}
+} // namespace
 
 //===----------------------------------------------------------------------===//
 //                         Public Constructor Functions
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
index d61cc54..fe6c4f4 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -70,8 +70,8 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
   return *this;
 }
 
-HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS,
-                                   const TargetMachine &TM)
+HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU,
+                                   StringRef FS, const TargetMachine &TM)
     : HexagonGenSubtargetInfo(TT, CPU, FS), CPUString(CPU),
       InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
       TSInfo(*TM.getDataLayout()), FrameLowering() {
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h
index 780567b..34cdad7 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h
@@ -52,7 +52,7 @@ private:
   InstrItineraryData InstrItins;
 
 public:
-  HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS,
+  HexagonSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
                    const TargetMachine &TM);
 
   /// getInstrItins - Return the instruction itineraries based on subtarget
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 0679866..90f1ced 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -61,14 +61,30 @@ SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler",
 
 namespace llvm {
   FunctionPass *createHexagonExpandCondsets();
-}
+  FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
+                                     CodeGenOpt::Level OptLevel);
+  FunctionPass *createHexagonDelaySlotFillerPass(const TargetMachine &TM);
+  FunctionPass *createHexagonFPMoverPass(const TargetMachine &TM);
+  FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM);
+  FunctionPass *createHexagonCFGOptimizer();
+
+  FunctionPass *createHexagonSplitConst32AndConst64();
+  FunctionPass *createHexagonExpandPredSpillCode();
+  FunctionPass *createHexagonHardwareLoops();
+  FunctionPass *createHexagonPeephole();
+  FunctionPass *createHexagonFixupHwLoops();
+  FunctionPass *createHexagonNewValueJump();
+  FunctionPass *createHexagonCopyToCombine();
+  FunctionPass *createHexagonPacketizer();
+  FunctionPass *createHexagonNewValueJump();
+} // namespace llvm
 
 /// HexagonTargetMachine ctor - Create an ILP32 architecture model.
 ///
 
 /// Hexagon_TODO: Do I need an aggregate alignment?
 ///
-HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT,
+HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
                                            StringRef CPU, StringRef FS,
                                            const TargetOptions &Options,
                                            Reloc::Model RM, CodeModel::Model CM,
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
index 5774f7e..115eadb 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -27,7 +27,7 @@ class HexagonTargetMachine : public LLVMTargetMachine {
   HexagonSubtarget Subtarget;
 
 public:
-  HexagonTargetMachine(const Target &T, StringRef TT,StringRef CPU,
+  HexagonTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
                        StringRef FS, const TargetOptions &Options,
                        Reloc::Model RM, CodeModel::Model CM,
                        CodeGenOpt::Level OL);
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetStreamer.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetStreamer.h
new file mode 100644
index 0000000..2b4a3ad
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetStreamer.h
@@ -0,0 +1,31 @@
+//===-- HexagonTargetStreamer.h - Hexagon Target Streamer ------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONTARGETSTREAMER_H
+#define HEXAGONTARGETSTREAMER_H
+
+#include "llvm/MC/MCStreamer.h"
+
+namespace llvm {
+class HexagonTargetStreamer : public MCTargetStreamer {
+public:
+  HexagonTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
+  virtual void EmitCodeAlignment(unsigned ByteAlignment,
+                                 unsigned MaxBytesToEmit = 0){};
+  virtual void emitFAlign(unsigned Size, unsigned MaxBytesToEmit){};
+  virtual void EmitCommonSymbolSorted(MCSymbol *Symbol, uint64_t Size,
+                                      unsigned ByteAlignment,
+                                      unsigned AccessGranularity){};
+  virtual void EmitLocalCommonSymbolSorted(MCSymbol *Symbol, uint64_t Size,
+                                           unsigned ByteAlign,
+                                           unsigned AccessGranularity){};
+};
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index 0cc59bc..66fdd65 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -57,6 +57,7 @@ static cl::opt<bool> PacketizeVolatiles("hexagon-packetize-volatiles",
       cl::desc("Allow non-solo packetization of volatile memory references"));
 
 namespace llvm {
+  FunctionPass *createHexagonPacketizer();
   void initializeHexagonPacketizerPass(PassRegistry&);
 }
 
@@ -169,7 +170,7 @@ namespace {
     void reserveResourcesForConstExt(MachineInstr* MI);
     bool isNewValueInst(MachineInstr* MI);
   };
-}
+} // namespace
 
 INITIALIZE_PASS_BEGIN(HexagonPacketizer, "packets", "Hexagon Packetizer",
                       false, false)
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index 7689484..99ea2fa 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -17,11 +17,14 @@
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCFixupKindInfo.h"
 #include "llvm/MC/MCInstrInfo.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/TargetRegistry.h"
 
 using namespace llvm;
 using namespace Hexagon;
 
+#define DEBUG_TYPE "hexagon-asm-backend"
+
 namespace {
 
 class HexagonAsmBackend : public MCAsmBackend {
@@ -278,8 +281,26 @@ public:
     llvm_unreachable("relaxInstruction() unimplemented");
   }
 
-  bool writeNopData(uint64_t /*Count*/,
-                    MCObjectWriter * /*OW*/) const override {
+  bool writeNopData(uint64_t Count,
+                    MCObjectWriter * OW) const override {
+    static const uint32_t Nopcode  = 0x7f000000, // Hard-coded NOP.
+                          ParseIn  = 0x00004000, // In packet parse-bits.
+                          ParseEnd = 0x0000c000; // End of packet parse-bits.
+
+    while(Count % HEXAGON_INSTR_SIZE) {
+      DEBUG(dbgs() << "Alignment not a multiple of the instruction size:" <<
+          Count % HEXAGON_INSTR_SIZE << "/" << HEXAGON_INSTR_SIZE << "\n");
+      --Count;
+      OW->write8(0);
+    }
+
+    while(Count) {
+      Count -= HEXAGON_INSTR_SIZE;
+      // Close the packet whenever a multiple of the maximum packet size remains
+      uint32_t ParseBits = (Count % (HEXAGON_PACKET_SIZE * HEXAGON_INSTR_SIZE))?
+                           ParseIn: ParseEnd;
+      OW->write32(Nopcode | ParseBits);
+    }
     return true;
   }
 };
@@ -288,8 +309,8 @@ public:
 namespace llvm {
 MCAsmBackend *createHexagonAsmBackend(Target const &T,
                                       MCRegisterInfo const & /*MRI*/,
-                                      StringRef TT, StringRef CPU) {
-  uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS());
+                                      const Triple &TT, StringRef CPU) {
+  uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
   return new HexagonAsmBackend(T, OSABI, CPU);
 }
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
index 8430723..0f7cf0e 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
@@ -31,318 +31,216 @@ public:
   unsigned GetRelocType(MCValue const &Target, MCFixup const &Fixup,
                         bool IsPCRel) const override;
 };
-}
+} // namespace
 
 HexagonELFObjectWriter::HexagonELFObjectWriter(uint8_t OSABI, StringRef C)
     : MCELFObjectTargetWriter(/*Is64bit*/ false, OSABI, ELF::EM_HEXAGON,
                               /*HasRelocationAddend*/ true),
       CPU(C) {}
 
-unsigned HexagonELFObjectWriter::GetRelocType(MCValue const &/*Target*/,
+unsigned HexagonELFObjectWriter::GetRelocType(MCValue const & /*Target*/,
                                               MCFixup const &Fixup,
                                               bool IsPCRel) const {
-  // determine the type of the relocation
-  unsigned Type = (unsigned)ELF::R_HEX_NONE;
-  unsigned Kind = (unsigned)Fixup.getKind();
-
-  switch (Kind) {
-    default:
-      DEBUG(dbgs() << "unrecognized relocation " << Fixup.getKind() << "\n");
-      llvm_unreachable("Unimplemented Fixup kind!");
-      break;
-    case FK_Data_4:
-      Type = (IsPCRel) ? ELF::R_HEX_32_PCREL : ELF::R_HEX_32;
-      break;
-    case FK_PCRel_4:
-      Type = ELF::R_HEX_32_PCREL;
-      break;
-    case FK_Data_2:
-      Type = ELF::R_HEX_16;
-      break;
-   case FK_Data_1:
-      Type = ELF::R_HEX_8;
-      break;
-    case fixup_Hexagon_B22_PCREL:
-      Type = ELF::R_HEX_B22_PCREL;
-      break;
-    case fixup_Hexagon_B15_PCREL:
-      Type = ELF::R_HEX_B15_PCREL;
-      break;
-    case fixup_Hexagon_B7_PCREL:
-      Type = ELF::R_HEX_B7_PCREL;
-      break;
-    case fixup_Hexagon_LO16:
-      Type = ELF::R_HEX_LO16;
-      break;
-    case fixup_Hexagon_HI16:
-      Type = ELF::R_HEX_HI16;
-      break;
-    case fixup_Hexagon_32:
-      Type = ELF::R_HEX_32;
-      break;
-    case fixup_Hexagon_16:
-      Type = ELF::R_HEX_16;
-      break;
-    case fixup_Hexagon_8:
-      Type = ELF::R_HEX_8;
-      break;
-    case fixup_Hexagon_GPREL16_0:
-      Type = ELF::R_HEX_GPREL16_0;
-      break;
-    case fixup_Hexagon_GPREL16_1:
-      Type = ELF::R_HEX_GPREL16_1;
-      break;
-    case fixup_Hexagon_GPREL16_2:
-      Type = ELF::R_HEX_GPREL16_2;
-      break;
-    case fixup_Hexagon_GPREL16_3:
-      Type = ELF::R_HEX_GPREL16_3;
-      break;
-    case fixup_Hexagon_HL16:
-      Type = ELF::R_HEX_HL16;
-      break;
-    case fixup_Hexagon_B13_PCREL:
-      Type = ELF::R_HEX_B13_PCREL;
-      break;
-    case fixup_Hexagon_B9_PCREL:
-      Type = ELF::R_HEX_B9_PCREL;
-      break;
-    case fixup_Hexagon_B32_PCREL_X:
-      Type = ELF::R_HEX_B32_PCREL_X;
-      break;
-    case fixup_Hexagon_32_6_X:
-      Type = ELF::R_HEX_32_6_X;
-      break;
-    case fixup_Hexagon_B22_PCREL_X:
-      Type = ELF::R_HEX_B22_PCREL_X;
-      break;
-    case fixup_Hexagon_B15_PCREL_X:
-      Type = ELF::R_HEX_B15_PCREL_X;
-      break;
-    case fixup_Hexagon_B13_PCREL_X:
-      Type = ELF::R_HEX_B13_PCREL_X;
-      break;
-    case fixup_Hexagon_B9_PCREL_X:
-      Type = ELF::R_HEX_B9_PCREL_X;
-      break;
-    case fixup_Hexagon_B7_PCREL_X:
-      Type = ELF::R_HEX_B7_PCREL_X;
-      break;
-    case fixup_Hexagon_16_X:
-      Type = ELF::R_HEX_16_X;
-      break;
-    case fixup_Hexagon_12_X:
-      Type = ELF::R_HEX_12_X;
-      break;
-    case fixup_Hexagon_11_X:
-      Type = ELF::R_HEX_11_X;
-      break;
-    case fixup_Hexagon_10_X:
-      Type = ELF::R_HEX_10_X;
-      break;
-    case fixup_Hexagon_9_X:
-      Type = ELF::R_HEX_9_X;
-      break;
-    case fixup_Hexagon_8_X:
-      Type = ELF::R_HEX_8_X;
-      break;
-    case fixup_Hexagon_7_X:
-      Type = ELF::R_HEX_7_X;
-      break;
-    case fixup_Hexagon_6_X:
-      Type = ELF::R_HEX_6_X;
-      break;
-    case fixup_Hexagon_32_PCREL:
-      Type = ELF::R_HEX_32_PCREL;
-      break;
-    case fixup_Hexagon_COPY:
-      Type = ELF::R_HEX_COPY;
-      break;
-    case fixup_Hexagon_GLOB_DAT:
-      Type = ELF::R_HEX_GLOB_DAT;
-      break;
-    case fixup_Hexagon_JMP_SLOT:
-      Type = ELF::R_HEX_JMP_SLOT;
-      break;
-    case fixup_Hexagon_RELATIVE:
-      Type = ELF::R_HEX_RELATIVE;
-      break;
-    case fixup_Hexagon_PLT_B22_PCREL:
-      Type = ELF::R_HEX_PLT_B22_PCREL;
-      break;
-    case fixup_Hexagon_GOTREL_LO16:
-      Type = ELF::R_HEX_GOTREL_LO16;
-      break;
-    case fixup_Hexagon_GOTREL_HI16:
-      Type = ELF::R_HEX_GOTREL_HI16;
-      break;
-    case fixup_Hexagon_GOTREL_32:
-      Type = ELF::R_HEX_GOTREL_32;
-      break;
-    case fixup_Hexagon_GOT_LO16:
-      Type = ELF::R_HEX_GOT_LO16;
-      break;
-    case fixup_Hexagon_GOT_HI16:
-      Type = ELF::R_HEX_GOT_HI16;
-      break;
-    case fixup_Hexagon_GOT_32:
-      Type = ELF::R_HEX_GOT_32;
-      break;
-    case fixup_Hexagon_GOT_16:
-      Type = ELF::R_HEX_GOT_16;
-      break;
-    case fixup_Hexagon_DTPMOD_32:
-      Type = ELF::R_HEX_DTPMOD_32;
-      break;
-    case fixup_Hexagon_DTPREL_LO16:
-      Type = ELF::R_HEX_DTPREL_LO16;
-      break;
-    case fixup_Hexagon_DTPREL_HI16:
-      Type = ELF::R_HEX_DTPREL_HI16;
-      break;
-    case fixup_Hexagon_DTPREL_32:
-      Type = ELF::R_HEX_DTPREL_32;
-      break;
-    case fixup_Hexagon_DTPREL_16:
-      Type = ELF::R_HEX_DTPREL_16;
-      break;
-    case fixup_Hexagon_GD_PLT_B22_PCREL:
-      Type = ELF::R_HEX_GD_PLT_B22_PCREL;
-      break;
-    case fixup_Hexagon_LD_PLT_B22_PCREL:
-      Type = ELF::R_HEX_LD_PLT_B22_PCREL;
-      break;
-    case fixup_Hexagon_GD_GOT_LO16:
-      Type = ELF::R_HEX_GD_GOT_LO16;
-      break;
-    case fixup_Hexagon_GD_GOT_HI16:
-      Type = ELF::R_HEX_GD_GOT_HI16;
-      break;
-    case fixup_Hexagon_GD_GOT_32:
-      Type = ELF::R_HEX_GD_GOT_32;
-      break;
-    case fixup_Hexagon_GD_GOT_16:
-      Type = ELF::R_HEX_GD_GOT_16;
-      break;
-    case fixup_Hexagon_LD_GOT_LO16:
-      Type = ELF::R_HEX_LD_GOT_LO16;
-      break;
-    case fixup_Hexagon_LD_GOT_HI16:
-      Type = ELF::R_HEX_LD_GOT_HI16;
-      break;
-    case fixup_Hexagon_LD_GOT_32:
-      Type = ELF::R_HEX_LD_GOT_32;
-      break;
-    case fixup_Hexagon_LD_GOT_16:
-      Type = ELF::R_HEX_LD_GOT_16;
-      break;
-    case fixup_Hexagon_IE_LO16:
-      Type = ELF::R_HEX_IE_LO16;
-      break;
-    case fixup_Hexagon_IE_HI16:
-      Type = ELF::R_HEX_IE_HI16;
-      break;
-    case fixup_Hexagon_IE_32:
-      Type = ELF::R_HEX_IE_32;
-      break;
-    case fixup_Hexagon_IE_GOT_LO16:
-      Type = ELF::R_HEX_IE_GOT_LO16;
-      break;
-    case fixup_Hexagon_IE_GOT_HI16:
-      Type = ELF::R_HEX_IE_GOT_HI16;
-      break;
-    case fixup_Hexagon_IE_GOT_32:
-      Type = ELF::R_HEX_IE_GOT_32;
-      break;
-    case fixup_Hexagon_IE_GOT_16:
-      Type = ELF::R_HEX_IE_GOT_16;
-      break;
-    case fixup_Hexagon_TPREL_LO16:
-      Type = ELF::R_HEX_TPREL_LO16;
-      break;
-    case fixup_Hexagon_TPREL_HI16:
-      Type = ELF::R_HEX_TPREL_HI16;
-      break;
-    case fixup_Hexagon_TPREL_32:
-      Type = ELF::R_HEX_TPREL_32;
-      break;
-    case fixup_Hexagon_TPREL_16:
-      Type = ELF::R_HEX_TPREL_16;
-      break;
-    case fixup_Hexagon_6_PCREL_X:
-      Type = ELF::R_HEX_6_PCREL_X;
-      break;
-    case fixup_Hexagon_GOTREL_32_6_X:
-      Type = ELF::R_HEX_GOTREL_32_6_X;
-      break;
-    case fixup_Hexagon_GOTREL_16_X:
-      Type = ELF::R_HEX_GOTREL_16_X;
-      break;
-    case fixup_Hexagon_GOTREL_11_X:
-      Type = ELF::R_HEX_GOTREL_11_X;
-      break;
-    case fixup_Hexagon_GOT_32_6_X:
-      Type = ELF::R_HEX_GOT_32_6_X;
-      break;
-    case fixup_Hexagon_GOT_16_X:
-      Type = ELF::R_HEX_GOT_16_X;
-      break;
-    case fixup_Hexagon_GOT_11_X:
-      Type = ELF::R_HEX_GOT_11_X;
-      break;
-    case fixup_Hexagon_DTPREL_32_6_X:
-      Type = ELF::R_HEX_DTPREL_32_6_X;
-      break;
-    case fixup_Hexagon_DTPREL_16_X:
-      Type = ELF::R_HEX_DTPREL_16_X;
-      break;
-    case fixup_Hexagon_DTPREL_11_X:
-      Type = ELF::R_HEX_DTPREL_11_X;
-      break;
-    case fixup_Hexagon_GD_GOT_32_6_X:
-      Type = ELF::R_HEX_GD_GOT_32_6_X;
-      break;
-    case fixup_Hexagon_GD_GOT_16_X:
-      Type = ELF::R_HEX_GD_GOT_16_X;
-      break;
-    case fixup_Hexagon_GD_GOT_11_X:
-      Type = ELF::R_HEX_GD_GOT_11_X;
-      break;
-    case fixup_Hexagon_LD_GOT_32_6_X:
-      Type = ELF::R_HEX_LD_GOT_32_6_X;
-      break;
-    case fixup_Hexagon_LD_GOT_16_X:
-      Type = ELF::R_HEX_LD_GOT_16_X;
-      break;
-    case fixup_Hexagon_LD_GOT_11_X:
-      Type = ELF::R_HEX_LD_GOT_11_X;
-      break;
-    case fixup_Hexagon_IE_32_6_X:
-      Type = ELF::R_HEX_IE_32_6_X;
-      break;
-    case fixup_Hexagon_IE_16_X:
-      Type = ELF::R_HEX_IE_16_X;
-      break;
-    case fixup_Hexagon_IE_GOT_32_6_X:
-      Type = ELF::R_HEX_IE_GOT_32_6_X;
-      break;
-    case fixup_Hexagon_IE_GOT_16_X:
-      Type = ELF::R_HEX_IE_GOT_16_X;
-      break;
-    case fixup_Hexagon_IE_GOT_11_X:
-      Type = ELF::R_HEX_IE_GOT_11_X;
-      break;
-    case fixup_Hexagon_TPREL_32_6_X:
-      Type = ELF::R_HEX_TPREL_32_6_X;
-      break;
-    case fixup_Hexagon_TPREL_16_X:
-      Type = ELF::R_HEX_TPREL_16_X;
-      break;
-    case fixup_Hexagon_TPREL_11_X:
-      Type = ELF::R_HEX_TPREL_11_X;
-      break;
+  switch ((unsigned)Fixup.getKind()) {
+  default:
+    DEBUG(dbgs() << "unrecognized relocation " << Fixup.getKind() << "\n");
+    llvm_unreachable("Unimplemented Fixup kind!");
+    return ELF::R_HEX_NONE;
+  case FK_Data_4:
+    return (IsPCRel) ? ELF::R_HEX_32_PCREL : ELF::R_HEX_32;
+  case FK_PCRel_4:
+    return ELF::R_HEX_32_PCREL;
+  case FK_Data_2:
+    return ELF::R_HEX_16;
+  case FK_Data_1:
+    return ELF::R_HEX_8;
+  case fixup_Hexagon_B22_PCREL:
+    return ELF::R_HEX_B22_PCREL;
+  case fixup_Hexagon_B15_PCREL:
+    return ELF::R_HEX_B15_PCREL;
+  case fixup_Hexagon_B7_PCREL:
+    return ELF::R_HEX_B7_PCREL;
+  case fixup_Hexagon_LO16:
+    return ELF::R_HEX_LO16;
+  case fixup_Hexagon_HI16:
+    return ELF::R_HEX_HI16;
+  case fixup_Hexagon_32:
+    return ELF::R_HEX_32;
+  case fixup_Hexagon_16:
+    return ELF::R_HEX_16;
+  case fixup_Hexagon_8:
+    return ELF::R_HEX_8;
+  case fixup_Hexagon_GPREL16_0:
+    return ELF::R_HEX_GPREL16_0;
+  case fixup_Hexagon_GPREL16_1:
+    return ELF::R_HEX_GPREL16_1;
+  case fixup_Hexagon_GPREL16_2:
+    return ELF::R_HEX_GPREL16_2;
+  case fixup_Hexagon_GPREL16_3:
+    return ELF::R_HEX_GPREL16_3;
+  case fixup_Hexagon_HL16:
+    return ELF::R_HEX_HL16;
+  case fixup_Hexagon_B13_PCREL:
+    return ELF::R_HEX_B13_PCREL;
+  case fixup_Hexagon_B9_PCREL:
+    return ELF::R_HEX_B9_PCREL;
+  case fixup_Hexagon_B32_PCREL_X:
+    return ELF::R_HEX_B32_PCREL_X;
+  case fixup_Hexagon_32_6_X:
+    return ELF::R_HEX_32_6_X;
+  case fixup_Hexagon_B22_PCREL_X:
+    return ELF::R_HEX_B22_PCREL_X;
+  case fixup_Hexagon_B15_PCREL_X:
+    return ELF::R_HEX_B15_PCREL_X;
+  case fixup_Hexagon_B13_PCREL_X:
+    return ELF::R_HEX_B13_PCREL_X;
+  case fixup_Hexagon_B9_PCREL_X:
+    return ELF::R_HEX_B9_PCREL_X;
+  case fixup_Hexagon_B7_PCREL_X:
+    return ELF::R_HEX_B7_PCREL_X;
+  case fixup_Hexagon_16_X:
+    return ELF::R_HEX_16_X;
+  case fixup_Hexagon_12_X:
+    return ELF::R_HEX_12_X;
+  case fixup_Hexagon_11_X:
+    return ELF::R_HEX_11_X;
+  case fixup_Hexagon_10_X:
+    return ELF::R_HEX_10_X;
+  case fixup_Hexagon_9_X:
+    return ELF::R_HEX_9_X;
+  case fixup_Hexagon_8_X:
+    return ELF::R_HEX_8_X;
+  case fixup_Hexagon_7_X:
+    return ELF::R_HEX_7_X;
+  case fixup_Hexagon_6_X:
+    return ELF::R_HEX_6_X;
+  case fixup_Hexagon_32_PCREL:
+    return ELF::R_HEX_32_PCREL;
+  case fixup_Hexagon_COPY:
+    return ELF::R_HEX_COPY;
+  case fixup_Hexagon_GLOB_DAT:
+    return ELF::R_HEX_GLOB_DAT;
+  case fixup_Hexagon_JMP_SLOT:
+    return ELF::R_HEX_JMP_SLOT;
+  case fixup_Hexagon_RELATIVE:
+    return ELF::R_HEX_RELATIVE;
+  case fixup_Hexagon_PLT_B22_PCREL:
+    return ELF::R_HEX_PLT_B22_PCREL;
+  case fixup_Hexagon_GOTREL_LO16:
+    return ELF::R_HEX_GOTREL_LO16;
+  case fixup_Hexagon_GOTREL_HI16:
+    return ELF::R_HEX_GOTREL_HI16;
+  case fixup_Hexagon_GOTREL_32:
+    return ELF::R_HEX_GOTREL_32;
+  case fixup_Hexagon_GOT_LO16:
+    return ELF::R_HEX_GOT_LO16;
+  case fixup_Hexagon_GOT_HI16:
+    return ELF::R_HEX_GOT_HI16;
+  case fixup_Hexagon_GOT_32:
+    return ELF::R_HEX_GOT_32;
+  case fixup_Hexagon_GOT_16:
+    return ELF::R_HEX_GOT_16;
+  case fixup_Hexagon_DTPMOD_32:
+    return ELF::R_HEX_DTPMOD_32;
+  case fixup_Hexagon_DTPREL_LO16:
+    return ELF::R_HEX_DTPREL_LO16;
+  case fixup_Hexagon_DTPREL_HI16:
+    return ELF::R_HEX_DTPREL_HI16;
+  case fixup_Hexagon_DTPREL_32:
+    return ELF::R_HEX_DTPREL_32;
+  case fixup_Hexagon_DTPREL_16:
+    return ELF::R_HEX_DTPREL_16;
+  case fixup_Hexagon_GD_PLT_B22_PCREL:
+    return ELF::R_HEX_GD_PLT_B22_PCREL;
+  case fixup_Hexagon_LD_PLT_B22_PCREL:
+    return ELF::R_HEX_LD_PLT_B22_PCREL;
+  case fixup_Hexagon_GD_GOT_LO16:
+    return ELF::R_HEX_GD_GOT_LO16;
+  case fixup_Hexagon_GD_GOT_HI16:
+    return ELF::R_HEX_GD_GOT_HI16;
+  case fixup_Hexagon_GD_GOT_32:
+    return ELF::R_HEX_GD_GOT_32;
+  case fixup_Hexagon_GD_GOT_16:
+    return ELF::R_HEX_GD_GOT_16;
+  case fixup_Hexagon_LD_GOT_LO16:
+    return ELF::R_HEX_LD_GOT_LO16;
+  case fixup_Hexagon_LD_GOT_HI16:
+    return ELF::R_HEX_LD_GOT_HI16;
+  case fixup_Hexagon_LD_GOT_32:
+    return ELF::R_HEX_LD_GOT_32;
+  case fixup_Hexagon_LD_GOT_16:
+    return ELF::R_HEX_LD_GOT_16;
+  case fixup_Hexagon_IE_LO16:
+    return ELF::R_HEX_IE_LO16;
+  case fixup_Hexagon_IE_HI16:
+    return ELF::R_HEX_IE_HI16;
+  case fixup_Hexagon_IE_32:
+    return ELF::R_HEX_IE_32;
+  case fixup_Hexagon_IE_GOT_LO16:
+    return ELF::R_HEX_IE_GOT_LO16;
+  case fixup_Hexagon_IE_GOT_HI16:
+    return ELF::R_HEX_IE_GOT_HI16;
+  case fixup_Hexagon_IE_GOT_32:
+    return ELF::R_HEX_IE_GOT_32;
+  case fixup_Hexagon_IE_GOT_16:
+    return ELF::R_HEX_IE_GOT_16;
+  case fixup_Hexagon_TPREL_LO16:
+    return ELF::R_HEX_TPREL_LO16;
+  case fixup_Hexagon_TPREL_HI16:
+    return ELF::R_HEX_TPREL_HI16;
+  case fixup_Hexagon_TPREL_32:
+    return ELF::R_HEX_TPREL_32;
+  case fixup_Hexagon_TPREL_16:
+    return ELF::R_HEX_TPREL_16;
+  case fixup_Hexagon_6_PCREL_X:
+    return ELF::R_HEX_6_PCREL_X;
+  case fixup_Hexagon_GOTREL_32_6_X:
+    return ELF::R_HEX_GOTREL_32_6_X;
+  case fixup_Hexagon_GOTREL_16_X:
+    return ELF::R_HEX_GOTREL_16_X;
+  case fixup_Hexagon_GOTREL_11_X:
+    return ELF::R_HEX_GOTREL_11_X;
+  case fixup_Hexagon_GOT_32_6_X:
+    return ELF::R_HEX_GOT_32_6_X;
+  case fixup_Hexagon_GOT_16_X:
+    return ELF::R_HEX_GOT_16_X;
+  case fixup_Hexagon_GOT_11_X:
+    return ELF::R_HEX_GOT_11_X;
+  case fixup_Hexagon_DTPREL_32_6_X:
+    return ELF::R_HEX_DTPREL_32_6_X;
+  case fixup_Hexagon_DTPREL_16_X:
+    return ELF::R_HEX_DTPREL_16_X;
+  case fixup_Hexagon_DTPREL_11_X:
+    return ELF::R_HEX_DTPREL_11_X;
+  case fixup_Hexagon_GD_GOT_32_6_X:
+    return ELF::R_HEX_GD_GOT_32_6_X;
+  case fixup_Hexagon_GD_GOT_16_X:
+    return ELF::R_HEX_GD_GOT_16_X;
+  case fixup_Hexagon_GD_GOT_11_X:
+    return ELF::R_HEX_GD_GOT_11_X;
+  case fixup_Hexagon_LD_GOT_32_6_X:
+    return ELF::R_HEX_LD_GOT_32_6_X;
+  case fixup_Hexagon_LD_GOT_16_X:
+    return ELF::R_HEX_LD_GOT_16_X;
+  case fixup_Hexagon_LD_GOT_11_X:
+    return ELF::R_HEX_LD_GOT_11_X;
+  case fixup_Hexagon_IE_32_6_X:
+    return ELF::R_HEX_IE_32_6_X;
+  case fixup_Hexagon_IE_16_X:
+    return ELF::R_HEX_IE_16_X;
+  case fixup_Hexagon_IE_GOT_32_6_X:
+    return ELF::R_HEX_IE_GOT_32_6_X;
+  case fixup_Hexagon_IE_GOT_16_X:
+    return ELF::R_HEX_IE_GOT_16_X;
+  case fixup_Hexagon_IE_GOT_11_X:
+    return ELF::R_HEX_IE_GOT_11_X;
+  case fixup_Hexagon_TPREL_32_6_X:
+    return ELF::R_HEX_TPREL_32_6_X;
+  case fixup_Hexagon_TPREL_16_X:
+    return ELF::R_HEX_TPREL_16_X;
+  case fixup_Hexagon_TPREL_11_X:
+    return ELF::R_HEX_TPREL_11_X;
   }
-  return Type;
 }
 
 MCObjectWriter *llvm::createHexagonELFObjectWriter(raw_pwrite_stream &OS,
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
index 1eee852..6f8cb90 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -342,6 +342,36 @@ static Hexagon::Fixups getFixupNoBits(MCInstrInfo const &MCII, const MCInst &MI,
   return LastTargetFixupKind;
 }
 
+namespace llvm {
+extern const MCInstrDesc HexagonInsts[];
+}
+
+namespace {
+  bool isPCRel (unsigned Kind) {
+    switch(Kind){
+    case fixup_Hexagon_B22_PCREL:
+    case fixup_Hexagon_B15_PCREL:
+    case fixup_Hexagon_B7_PCREL:
+    case fixup_Hexagon_B13_PCREL:
+    case fixup_Hexagon_B9_PCREL:
+    case fixup_Hexagon_B32_PCREL_X:
+    case fixup_Hexagon_B22_PCREL_X:
+    case fixup_Hexagon_B15_PCREL_X:
+    case fixup_Hexagon_B13_PCREL_X:
+    case fixup_Hexagon_B9_PCREL_X:
+    case fixup_Hexagon_B7_PCREL_X:
+    case fixup_Hexagon_32_PCREL:
+    case fixup_Hexagon_PLT_B22_PCREL:
+    case fixup_Hexagon_GD_PLT_B22_PCREL:
+    case fixup_Hexagon_LD_PLT_B22_PCREL:
+    case fixup_Hexagon_6_PCREL_X:
+      return true;
+    default:
+      return false;
+    }
+  }
+} // namespace
+
 unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI,
                                               const MCOperand &MO,
                                               const MCExpr *ME,
@@ -363,7 +393,7 @@ unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI,
     Res = getExprOpValue(MI, MO, cast<MCBinaryExpr>(ME)->getLHS(), Fixups, STI);
     Res +=
         getExprOpValue(MI, MO, cast<MCBinaryExpr>(ME)->getRHS(), Fixups, STI);
-    return Res;
+    return 0;
   }
 
   assert(MK == MCExpr::SymbolRef);
@@ -662,8 +692,13 @@ unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI,
     break;
   }
 
-  MCFixup fixup =
-      MCFixup::create(*Addend, MO.getExpr(), MCFixupKind(FixupKind));
+  MCExpr const *FixupExpression = (*Addend > 0 && isPCRel(FixupKind)) ?
+    MCBinaryExpr::createAdd(MO.getExpr(),
+                            MCConstantExpr::create(*Addend, MCT), MCT) :
+    MO.getExpr();
+
+  MCFixup fixup = MCFixup::create(*Addend, FixupExpression, 
+                                  MCFixupKind(FixupKind), MI.getLoc());
   Fixups.push_back(fixup);
   // All of the information is in the fixup.
   return (0);
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
index 9aa258c..2a154da 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
@@ -44,8 +44,6 @@ public:
   uint32_t parseBits(size_t Instruction, size_t Last, MCInst const &MCB,
                     MCInst const &MCI) const;
 
-  MCSubtargetInfo const &getSubtargetInfo() const;
-
   void encodeInstruction(MCInst const &MI, raw_ostream &OS,
                          SmallVectorImpl<MCFixup> &Fixups,
                          MCSubtargetInfo const &STI) const override;
@@ -65,10 +63,6 @@ public:
   unsigned getMachineOpValue(MCInst const &MI, MCOperand const &MO,
                              SmallVectorImpl<MCFixup> &Fixups,
                              MCSubtargetInfo const &STI) const;
-
-private:
-  HexagonMCCodeEmitter(HexagonMCCodeEmitter const &) = delete;
-  void operator=(HexagonMCCodeEmitter const &) = delete;
 }; // class HexagonMCCodeEmitter
 
 } // namespace llvm
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
index 1080935..0d1f1e6 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
@@ -40,39 +40,39 @@ enum OpcodeIndex {
   tp1_jump_t
 };
 
-unsigned tstBitOpcode[8] = {J4_tstbit0_fp0_jump_nt, J4_tstbit0_fp0_jump_t,
-                            J4_tstbit0_fp1_jump_nt, J4_tstbit0_fp1_jump_t,
-                            J4_tstbit0_tp0_jump_nt, J4_tstbit0_tp0_jump_t,
-                            J4_tstbit0_tp1_jump_nt, J4_tstbit0_tp1_jump_t};
-unsigned cmpeqBitOpcode[8] = {J4_cmpeq_fp0_jump_nt, J4_cmpeq_fp0_jump_t,
-                              J4_cmpeq_fp1_jump_nt, J4_cmpeq_fp1_jump_t,
-                              J4_cmpeq_tp0_jump_nt, J4_cmpeq_tp0_jump_t,
-                              J4_cmpeq_tp1_jump_nt, J4_cmpeq_tp1_jump_t};
-unsigned cmpgtBitOpcode[8] = {J4_cmpgt_fp0_jump_nt, J4_cmpgt_fp0_jump_t,
-                              J4_cmpgt_fp1_jump_nt, J4_cmpgt_fp1_jump_t,
-                              J4_cmpgt_tp0_jump_nt, J4_cmpgt_tp0_jump_t,
-                              J4_cmpgt_tp1_jump_nt, J4_cmpgt_tp1_jump_t};
-unsigned cmpgtuBitOpcode[8] = {J4_cmpgtu_fp0_jump_nt, J4_cmpgtu_fp0_jump_t,
-                               J4_cmpgtu_fp1_jump_nt, J4_cmpgtu_fp1_jump_t,
-                               J4_cmpgtu_tp0_jump_nt, J4_cmpgtu_tp0_jump_t,
-                               J4_cmpgtu_tp1_jump_nt, J4_cmpgtu_tp1_jump_t};
-unsigned cmpeqiBitOpcode[8] = {J4_cmpeqi_fp0_jump_nt, J4_cmpeqi_fp0_jump_t,
-                               J4_cmpeqi_fp1_jump_nt, J4_cmpeqi_fp1_jump_t,
-                               J4_cmpeqi_tp0_jump_nt, J4_cmpeqi_tp0_jump_t,
-                               J4_cmpeqi_tp1_jump_nt, J4_cmpeqi_tp1_jump_t};
-unsigned cmpgtiBitOpcode[8] = {J4_cmpgti_fp0_jump_nt, J4_cmpgti_fp0_jump_t,
-                               J4_cmpgti_fp1_jump_nt, J4_cmpgti_fp1_jump_t,
-                               J4_cmpgti_tp0_jump_nt, J4_cmpgti_tp0_jump_t,
-                               J4_cmpgti_tp1_jump_nt, J4_cmpgti_tp1_jump_t};
-unsigned cmpgtuiBitOpcode[8] = {J4_cmpgtui_fp0_jump_nt, J4_cmpgtui_fp0_jump_t,
-                                J4_cmpgtui_fp1_jump_nt, J4_cmpgtui_fp1_jump_t,
-                                J4_cmpgtui_tp0_jump_nt, J4_cmpgtui_tp0_jump_t,
-                                J4_cmpgtui_tp1_jump_nt, J4_cmpgtui_tp1_jump_t};
-unsigned cmpeqn1BitOpcode[8] = {J4_cmpeqn1_fp0_jump_nt, J4_cmpeqn1_fp0_jump_t,
-                                J4_cmpeqn1_fp1_jump_nt, J4_cmpeqn1_fp1_jump_t,
-                                J4_cmpeqn1_tp0_jump_nt, J4_cmpeqn1_tp0_jump_t,
-                                J4_cmpeqn1_tp1_jump_nt, J4_cmpeqn1_tp1_jump_t};
-unsigned cmpgtn1BitOpcode[8] = {
+static const unsigned tstBitOpcode[8] = {
+    J4_tstbit0_fp0_jump_nt, J4_tstbit0_fp0_jump_t,  J4_tstbit0_fp1_jump_nt,
+    J4_tstbit0_fp1_jump_t,  J4_tstbit0_tp0_jump_nt, J4_tstbit0_tp0_jump_t,
+    J4_tstbit0_tp1_jump_nt, J4_tstbit0_tp1_jump_t};
+static const unsigned cmpeqBitOpcode[8] = {
+    J4_cmpeq_fp0_jump_nt, J4_cmpeq_fp0_jump_t,  J4_cmpeq_fp1_jump_nt,
+    J4_cmpeq_fp1_jump_t,  J4_cmpeq_tp0_jump_nt, J4_cmpeq_tp0_jump_t,
+    J4_cmpeq_tp1_jump_nt, J4_cmpeq_tp1_jump_t};
+static const unsigned cmpgtBitOpcode[8] = {
+    J4_cmpgt_fp0_jump_nt, J4_cmpgt_fp0_jump_t,  J4_cmpgt_fp1_jump_nt,
+    J4_cmpgt_fp1_jump_t,  J4_cmpgt_tp0_jump_nt, J4_cmpgt_tp0_jump_t,
+    J4_cmpgt_tp1_jump_nt, J4_cmpgt_tp1_jump_t};
+static const unsigned cmpgtuBitOpcode[8] = {
+    J4_cmpgtu_fp0_jump_nt, J4_cmpgtu_fp0_jump_t,  J4_cmpgtu_fp1_jump_nt,
+    J4_cmpgtu_fp1_jump_t,  J4_cmpgtu_tp0_jump_nt, J4_cmpgtu_tp0_jump_t,
+    J4_cmpgtu_tp1_jump_nt, J4_cmpgtu_tp1_jump_t};
+static const unsigned cmpeqiBitOpcode[8] = {
+    J4_cmpeqi_fp0_jump_nt, J4_cmpeqi_fp0_jump_t,  J4_cmpeqi_fp1_jump_nt,
+    J4_cmpeqi_fp1_jump_t,  J4_cmpeqi_tp0_jump_nt, J4_cmpeqi_tp0_jump_t,
+    J4_cmpeqi_tp1_jump_nt, J4_cmpeqi_tp1_jump_t};
+static const unsigned cmpgtiBitOpcode[8] = {
+    J4_cmpgti_fp0_jump_nt, J4_cmpgti_fp0_jump_t,  J4_cmpgti_fp1_jump_nt,
+    J4_cmpgti_fp1_jump_t,  J4_cmpgti_tp0_jump_nt, J4_cmpgti_tp0_jump_t,
+    J4_cmpgti_tp1_jump_nt, J4_cmpgti_tp1_jump_t};
+static const unsigned cmpgtuiBitOpcode[8] = {
+    J4_cmpgtui_fp0_jump_nt, J4_cmpgtui_fp0_jump_t,  J4_cmpgtui_fp1_jump_nt,
+    J4_cmpgtui_fp1_jump_t,  J4_cmpgtui_tp0_jump_nt, J4_cmpgtui_tp0_jump_t,
+    J4_cmpgtui_tp1_jump_nt, J4_cmpgtui_tp1_jump_t};
+static const unsigned cmpeqn1BitOpcode[8] = {
+    J4_cmpeqn1_fp0_jump_nt, J4_cmpeqn1_fp0_jump_t,  J4_cmpeqn1_fp1_jump_nt,
+    J4_cmpeqn1_fp1_jump_t,  J4_cmpeqn1_tp0_jump_nt, J4_cmpeqn1_tp0_jump_t,
+    J4_cmpeqn1_tp1_jump_nt, J4_cmpeqn1_tp1_jump_t};
+static const unsigned cmpgtn1BitOpcode[8] = {
     J4_cmpgtn1_fp0_jump_nt, J4_cmpgtn1_fp0_jump_t,  J4_cmpgtn1_fp1_jump_nt,
     J4_cmpgtn1_fp1_jump_t,  J4_cmpgtn1_tp0_jump_nt, J4_cmpgtn1_tp0_jump_t,
     J4_cmpgtn1_tp1_jump_nt, J4_cmpgtn1_tp1_jump_t,
@@ -174,7 +174,7 @@ unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
 
   return HexagonII::HCG_None;
 }
-}
+} // namespace
 
 /// getCompoundOp - Return the index from 0-7 into the above opcode lists.
 namespace {
@@ -199,7 +199,7 @@ unsigned getCompoundOp(MCInst const &HMCI) {
     return (PredReg == Hexagon::P0) ? tp0_jump_t : tp1_jump_t;
   }
 }
-}
+} // namespace
 
 namespace {
 MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) {
@@ -331,7 +331,7 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) {
 
   return CompoundInsn;
 }
-}
+} // namespace
 
 /// Non-Symmetrical. See if these two instructions are fit for compound pair.
 namespace {
@@ -348,7 +348,7 @@ bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA,
   return ((MIaG == HexagonII::HCG_A && MIbG == HexagonII::HCG_B) &&
           (MIa.getOperand(0).getReg() == MIb.getOperand(0).getReg()));
 }
-}
+} // namespace
 
 namespace {
 bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) {
@@ -396,7 +396,7 @@ bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) {
   }
   return false;
 }
-}
+} // namespace
 
 /// tryCompound - Given a bundle check for compound insns when one
 /// is found update the contents fo the bundle with the compound insn.
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
index eb62977..7e9247c 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
@@ -394,8 +394,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
     Src1Reg = MCI.getOperand(0).getReg();
     if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
         MCI.getOperand(1).isImm() && isUInt<4>(MCI.getOperand(1).getImm()) &&
-        MCI.getOperand(2).isImm() && MCI.getOperand(2).isImm() &&
-        isUInt<1>(MCI.getOperand(2).getImm())) {
+        MCI.getOperand(2).isImm() && isUInt<1>(MCI.getOperand(2).getImm())) {
       return HexagonII::HSIG_S2;
     }
     break;
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
new file mode 100644
index 0000000..bf51c35
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
@@ -0,0 +1,152 @@
+//=== HexagonMCELFStreamer.cpp - Hexagon subclass of MCELFStreamer -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a stub that parses a MCInst bundle and passes the
+// instructions on to the real streamer.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "hexagonmcelfstreamer"
+
+#include "Hexagon.h"
+#include "HexagonMCELFStreamer.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonMCShuffler.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<unsigned>
+    GPSize("gpsize", cl::NotHidden,
+           cl::desc("Global Pointer Addressing Size.  The default size is 8."),
+           cl::Prefix, cl::init(8));
+
+void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCK,
+                                           const MCSubtargetInfo &STI) {
+  MCInst HMI;
+  HMI.setOpcode(Hexagon::BUNDLE);
+  HMI.addOperand(MCOperand::createImm(0));
+  MCInst *MCB;
+
+  if (MCK.getOpcode() != Hexagon::BUNDLE) {
+    HMI.addOperand(MCOperand::createInst(&MCK));
+    MCB = &HMI;
+  } else
+    MCB = const_cast<MCInst *>(&MCK);
+
+  // Examines packet and pad the packet, if needed, when an
+  // end-loop is in the bundle.
+  HexagonMCInstrInfo::padEndloop(*MCB);
+  HexagonMCShuffle(*MCII, STI, *MCB);
+
+  assert(HexagonMCInstrInfo::bundleSize(*MCB) <= HEXAGON_PACKET_SIZE);
+  bool Extended = false;
+  for (auto &I : HexagonMCInstrInfo::bundleInstructions(*MCB)) {
+    MCInst *MCI = const_cast<MCInst *>(I.getInst());
+    if (Extended) {
+      if (HexagonMCInstrInfo::isDuplex(*MCII, *MCI)) {
+        MCInst *SubInst = const_cast<MCInst *>(MCI->getOperand(1).getInst());
+        HexagonMCInstrInfo::clampExtended(*MCII, *SubInst);
+      } else {
+        HexagonMCInstrInfo::clampExtended(*MCII, *MCI);
+      }
+      Extended = false;
+    } else {
+      Extended = HexagonMCInstrInfo::isImmext(*MCI);
+    }
+  }
+
+  // At this point, MCB is a bundle
+  // Iterate through the bundle and assign addends for the instructions
+  for (auto const &I : HexagonMCInstrInfo::bundleInstructions(*MCB)) {
+    MCInst *MCI = const_cast<MCInst *>(I.getInst());
+    EmitSymbol(*MCI);
+  }
+  MCObjectStreamer::EmitInstruction(*MCB, STI);
+}
+
+void HexagonMCELFStreamer::EmitSymbol(const MCInst &Inst) {
+  // Scan for values.
+  for (unsigned i = Inst.getNumOperands(); i--;)
+    if (Inst.getOperand(i).isExpr())
+      visitUsedExpr(*Inst.getOperand(i).getExpr());
+}
+
+// EmitCommonSymbol and EmitLocalCommonSymbol are extended versions of the
+// functions found in MCELFStreamer.cpp taking AccessSize as an additional
+// parameter.
+void HexagonMCELFStreamer::HexagonMCEmitCommonSymbol(MCSymbol *Symbol,
+                                                     uint64_t Size,
+                                                     unsigned ByteAlignment,
+                                                     unsigned AccessSize) {
+  getAssembler().registerSymbol(*Symbol);
+  StringRef sbss[4] = {".sbss.1", ".sbss.2", ".sbss.4", ".sbss.8"};
+
+  auto ELFSymbol = cast<MCSymbolELF>(Symbol);
+  if (!ELFSymbol->isBindingSet()) {
+    ELFSymbol->setBinding(ELF::STB_GLOBAL);
+    ELFSymbol->setExternal(true);
+  }
+
+  ELFSymbol->setType(ELF::STT_OBJECT);
+
+  if (ELFSymbol->getBinding() == ELF::STB_LOCAL) {
+    StringRef SectionName =
+        ((AccessSize == 0) || (Size == 0) || (Size > GPSize))
+            ? ".bss"
+            : sbss[(Log2_64(AccessSize))];
+
+    MCSection *CrntSection = getCurrentSection().first;
+    MCSection *Section = getAssembler().getContext().getELFSection(
+        SectionName, ELF::SHT_NOBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+    SwitchSection(Section);
+    AssignSection(Symbol, Section);
+
+    MCELFStreamer::EmitCommonSymbol(Symbol, Size, ByteAlignment);
+    SwitchSection(CrntSection);
+  } else {
+    if (ELFSymbol->declareCommon(Size, ByteAlignment))
+      report_fatal_error("Symbol: " + Symbol->getName() +
+                         " redeclared as different type");
+    if ((AccessSize) && (Size <= GPSize)) {
+      uint64_t SectionIndex =
+          (AccessSize <= GPSize)
+              ? ELF::SHN_HEXAGON_SCOMMON + (Log2_64(AccessSize) + 1)
+              : (unsigned)ELF::SHN_HEXAGON_SCOMMON;
+      ELFSymbol->setIndex(SectionIndex);
+    }
+  }
+
+  ELFSymbol->setSize(MCConstantExpr::create(Size, getContext()));
+}
+
+void HexagonMCELFStreamer::HexagonMCEmitLocalCommonSymbol(
+    MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment,
+    unsigned AccessSize) {
+  getAssembler().registerSymbol(*Symbol);
+  auto ELFSymbol = cast<MCSymbolELF>(Symbol);
+  ELFSymbol->setBinding(ELF::STB_LOCAL);
+  ELFSymbol->setExternal(false);
+  HexagonMCEmitCommonSymbol(Symbol, Size, ByteAlignment, AccessSize);
+}
+
+namespace llvm {
+MCStreamer *createHexagonELFStreamer(MCContext &Context, MCAsmBackend &MAB,
+                                     raw_pwrite_stream &OS, MCCodeEmitter *CE) {
+  return new HexagonMCELFStreamer(Context, MAB, OS, CE);
+}
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h
new file mode 100644
index 0000000..d77c0cd
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h
@@ -0,0 +1,45 @@
+//===- HexagonMCELFStreamer.h - Hexagon subclass of MCElfStreamer ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONMCELFSTREAMER_H
+#define HEXAGONMCELFSTREAMER_H
+
+#include "MCTargetDesc/HexagonMCCodeEmitter.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "HexagonTargetStreamer.h"
+
+namespace llvm {
+
+class HexagonMCELFStreamer : public MCELFStreamer {
+  std::unique_ptr<MCInstrInfo> MCII;
+
+public:
+  HexagonMCELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+                       raw_pwrite_stream &OS, MCCodeEmitter *Emitter)
+      : MCELFStreamer(Context, TAB, OS, Emitter),
+        MCII(createHexagonMCInstrInfo()) {}
+
+  virtual void EmitInstruction(const MCInst &Inst,
+                               const MCSubtargetInfo &STI) override;
+  void EmitSymbol(const MCInst &Inst);
+  void HexagonMCEmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                      unsigned ByteAlignment,
+                                      unsigned AccessSize);
+  void HexagonMCEmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                 unsigned ByteAlignment, unsigned AccessSize);
+};
+
+MCStreamer *createHexagonELFStreamer(MCContext &Context, MCAsmBackend &MAB,
+                                     raw_pwrite_stream &OS, MCCodeEmitter *CE);
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
index 2731278..e69a52d 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
@@ -35,6 +35,21 @@ size_t HexagonMCInstrInfo::bundleSize(MCInst const &MCI) {
     return (1);
 }
 
+void HexagonMCInstrInfo::clampExtended(MCInstrInfo const &MCII, MCInst &MCI) {
+  assert(HexagonMCInstrInfo::isExtendable(MCII, MCI) ||
+         HexagonMCInstrInfo::isExtended(MCII, MCI));
+  MCOperand &exOp =
+      MCI.getOperand(HexagonMCInstrInfo::getExtendableOp(MCII, MCI));
+  // If the extended value is a constant, then use it for the extended and
+  // for the extender instructions, masking off the lower 6 bits and
+  // including the assumed bits.
+  if (exOp.isImm()) {
+    unsigned Shift = HexagonMCInstrInfo::getExtentAlignment(MCII, MCI);
+    int64_t Bits = exOp.getImm();
+    exOp.setImm((Bits & 0x3f) << Shift);
+  }
+}
+
 MCInst *HexagonMCInstrInfo::deriveDuplex(MCContext &Context, unsigned iClass,
                                          MCInst const &inst0,
                                          MCInst const &inst1) {
@@ -446,4 +461,4 @@ void HexagonMCInstrInfo::setOuterLoop(MCInst &MCI) {
   MCOperand &Operand = MCI.getOperand(0);
   Operand.setImm(Operand.getImm() | outerLoopMask);
 }
-}
+} // namespace llvm
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
index 09f305f..9f7562a 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
@@ -41,14 +41,14 @@ int64_t const outerLoopMask = 1 << outerLoopOffset;
 
 size_t const bundleInstructionsOffset = 1;
 
-// Returns the number of instructions in the bundle
-size_t bundleSize(MCInst const &MCI);
-
 // Returns a iterator range of instructions in this bundle
 iterator_range<MCInst::const_iterator> bundleInstructions(MCInst const &MCI);
 
-// Return the extender for instruction at Index or nullptr if none
-MCInst const *extenderForIndex(MCInst const &MCB, size_t Index);
+// Returns the number of instructions in the bundle
+size_t bundleSize(MCInst const &MCI);
+
+// Clamp off upper 26 bits of extendable operand for emission
+void clampExtended(MCInstrInfo const &MCII, MCInst &MCI);
 
 // Create a duplex instruction given the two subinsts
 MCInst *deriveDuplex(MCContext &Context, unsigned iClass, MCInst const &inst0,
@@ -57,6 +57,9 @@ MCInst *deriveDuplex(MCContext &Context, unsigned iClass, MCInst const &inst0,
 // Convert this instruction in to a duplex subinst
 MCInst deriveSubInst(MCInst const &Inst);
 
+// Return the extender for instruction at Index or nullptr if none
+MCInst const *extenderForIndex(MCInst const &MCB, size_t Index);
+
 // Return memory access size
 HexagonII::MemAccessSize getAccessSize(MCInstrInfo const &MCII,
                                        MCInst const &MCI);
@@ -224,9 +227,9 @@ void setOuterLoop(MCInst &MCI);
 // Would duplexing this instruction create a requirement to extend
 bool subInstWouldBeExtended(MCInst const &potentialDuplex);
 
-// Attempt to find and replace compound pairs 
+// Attempt to find and replace compound pairs
 void tryCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI);
-}
-}
+} // namespace HexagonMCInstrInfo
+} // namespace llvm
 
 #endif // LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h
index a21cce1..9c0e3f2 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h
@@ -60,6 +60,6 @@ bool HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
 unsigned HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
                           MCContext &Context, MCInst &,
                           SmallVector<DuplexCandidate, 8>);
-}
+} // namespace llvm
 
 #endif // HEXAGONMCSHUFFLER_H
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 43734ed..4a4f0c2 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -12,15 +12,20 @@
 //===----------------------------------------------------------------------===//
 
 #include "HexagonMCTargetDesc.h"
+#include "Hexagon.h"
 #include "HexagonMCAsmInfo.h"
+#include "HexagonMCELFStreamer.h"
 #include "MCTargetDesc/HexagonInstPrinter.h"
 #include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCELFStreamer.h"
 #include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCObjectStreamer.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 
@@ -48,12 +53,92 @@ static MCRegisterInfo *createHexagonMCRegisterInfo(StringRef TT) {
 }
 
 static MCSubtargetInfo *
-createHexagonMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) {
+createHexagonMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
   MCSubtargetInfo *X = new MCSubtargetInfo();
   InitHexagonMCSubtargetInfo(X, TT, CPU, FS);
   return X;
 }
 
+namespace {
+class HexagonTargetAsmStreamer : public HexagonTargetStreamer {
+public:
+  HexagonTargetAsmStreamer(MCStreamer &S,
+                           formatted_raw_ostream &, bool,
+                           MCInstPrinter &)
+      : HexagonTargetStreamer(S) {}
+  void prettyPrintAsm(MCInstPrinter &InstPrinter, raw_ostream &OS,
+                      const MCInst &Inst, const MCSubtargetInfo &STI) override {
+    assert(HexagonMCInstrInfo::isBundle(Inst));
+    assert(HexagonMCInstrInfo::bundleSize(Inst) <= HEXAGON_PACKET_SIZE);
+    std::string Buffer;
+    {
+      raw_string_ostream TempStream(Buffer);
+      InstPrinter.printInst(&Inst, TempStream, "", STI);
+    }
+    StringRef Contents(Buffer);
+    auto PacketBundle = Contents.rsplit('\n');
+    auto HeadTail = PacketBundle.first.split('\n');
+    auto Preamble = "\t{\n\t\t";
+    auto Separator = "";
+    while(!HeadTail.first.empty()) {
+      OS << Separator;
+      StringRef Inst;
+      auto Duplex = HeadTail.first.split('\v');
+      if(!Duplex.second.empty()){
+        OS << Duplex.first << "\n";
+        Inst = Duplex.second;
+      }
+      else {
+        if(!HeadTail.first.startswith("immext"))
+          Inst = Duplex.first;
+      }
+      OS << Preamble;
+      OS << Inst;
+      HeadTail = HeadTail.second.split('\n');
+      Preamble = "";
+      Separator = "\n\t\t";
+    }
+    if(HexagonMCInstrInfo::bundleSize(Inst) != 0)
+      OS << "\n\t}" << PacketBundle.second;
+  }
+};
+} // namespace
+
+namespace {
+class HexagonTargetELFStreamer : public HexagonTargetStreamer {
+public:
+  MCELFStreamer &getStreamer() {
+    return static_cast<MCELFStreamer &>(Streamer);
+  }
+  HexagonTargetELFStreamer(MCStreamer &S, MCSubtargetInfo const &STI)
+      : HexagonTargetStreamer(S) {
+    auto Bits = STI.getFeatureBits();
+    unsigned Flags;
+    if (Bits.to_ullong() & llvm::Hexagon::ArchV5)
+      Flags = ELF::EF_HEXAGON_MACH_V5;
+    else
+      Flags = ELF::EF_HEXAGON_MACH_V4;
+    getStreamer().getAssembler().setELFHeaderEFlags(Flags);
+  }
+  void EmitCommonSymbolSorted(MCSymbol *Symbol, uint64_t Size,
+                              unsigned ByteAlignment,
+                              unsigned AccessSize) override {
+    HexagonMCELFStreamer &HexagonELFStreamer =
+        static_cast<HexagonMCELFStreamer &>(getStreamer());
+    HexagonELFStreamer.HexagonMCEmitCommonSymbol(Symbol, Size, ByteAlignment,
+                                                 AccessSize);
+  }
+  void EmitLocalCommonSymbolSorted(MCSymbol *Symbol, uint64_t Size,
+                                   unsigned ByteAlignment,
+                                   unsigned AccessSize) override {
+    HexagonMCELFStreamer &HexagonELFStreamer =
+        static_cast<HexagonMCELFStreamer &>(getStreamer());
+    HexagonELFStreamer.HexagonMCEmitLocalCommonSymbol(
+        Symbol, Size, ByteAlignment, AccessSize);
+  }
+};
+} // namespace
+
 static MCAsmInfo *createHexagonMCAsmInfo(const MCRegisterInfo &MRI,
                                          const Triple &TT) {
   MCAsmInfo *MAI = new HexagonMCAsmInfo(TT);
@@ -82,9 +167,26 @@ static MCInstPrinter *createHexagonMCInstPrinter(const Triple &T,
                                                  const MCInstrInfo &MII,
                                                  const MCRegisterInfo &MRI) {
   if (SyntaxVariant == 0)
-    return(new HexagonInstPrinter(MAI, MII, MRI));
+    return (new HexagonInstPrinter(MAI, MII, MRI));
   else
-   return nullptr;
+    return nullptr;
+}
+
+MCTargetStreamer *createMCAsmTargetStreamer(
+      MCStreamer &S, formatted_raw_ostream &OS, MCInstPrinter *InstPrint,
+      bool IsVerboseAsm) {
+  return new HexagonTargetAsmStreamer(S,  OS, IsVerboseAsm, *InstPrint);
+}
+
+static MCStreamer *createMCStreamer(Triple const &T, MCContext &Context,
+                                    MCAsmBackend &MAB, raw_pwrite_stream &OS,
+                                    MCCodeEmitter *Emitter, bool RelaxAll) {
+  return createHexagonELFStreamer(Context, MAB, OS, Emitter);
+}
+
+static MCTargetStreamer *
+createHexagonObjectTargetStreamer(MCStreamer &S, MCSubtargetInfo const &STI) {
+  return new HexagonTargetELFStreamer(S, STI);
 }
 
 // Force static initialization.
@@ -116,7 +218,17 @@ extern "C" void LLVMInitializeHexagonTargetMC() {
   TargetRegistry::RegisterMCAsmBackend(TheHexagonTarget,
                                        createHexagonAsmBackend);
 
+  // Register the obj streamer
+  TargetRegistry::RegisterELFStreamer(TheHexagonTarget, createMCStreamer);
+
+  // Register the asm streamer
+  TargetRegistry::RegisterAsmTargetStreamer(TheHexagonTarget,
+                                            createMCAsmTargetStreamer);
+
   // Register the MC Inst Printer
   TargetRegistry::RegisterMCInstPrinter(TheHexagonTarget,
                                         createHexagonMCInstPrinter);
+
+  TargetRegistry::RegisterObjectTargetStreamer(
+      TheHexagonTarget, createHexagonObjectTargetStreamer);
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
index 81211cc..89c3eb3 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -27,6 +27,7 @@ class MCObjectWriter;
 class MCRegisterInfo;
 class MCSubtargetInfo;
 class Target;
+class Triple;
 class StringRef;
 class raw_ostream;
 class raw_pwrite_stream;
@@ -42,13 +43,13 @@ MCCodeEmitter *createHexagonMCCodeEmitter(MCInstrInfo const &MCII,
                                           MCContext &MCT);
 
 MCAsmBackend *createHexagonAsmBackend(Target const &T,
-                                      MCRegisterInfo const &MRI, StringRef TT,
-                                      StringRef CPU);
+                                      MCRegisterInfo const &MRI,
+                                      const Triple &TT, StringRef CPU);
 
 MCObjectWriter *createHexagonELFObjectWriter(raw_pwrite_stream &OS,
                                              uint8_t OSABI, StringRef CPU);
 
-} // End llvm namespace
+} // namespace llvm
 
 // Define symbolic names for Hexagon registers.  This defines a mapping from
 // register name to register number.
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
index 9218fd3..53325f6 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
@@ -134,6 +134,6 @@ public:
   void setError(unsigned Err) { Error = Err; };
   unsigned getError() const { return (Error); };
 };
-}
+} // namespace llvm
 
 #endif // HEXAGONSHUFFLER_H
diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
index 70141a9..80565aa 100644
--- a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
+++ b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
@@ -40,6 +40,6 @@ namespace llvm {
     void printCCOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
 
   };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
index 6bcfb32..be445c5 100644
--- a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -43,8 +43,8 @@ static MCRegisterInfo *createMSP430MCRegisterInfo(StringRef TT) {
   return X;
 }
 
-static MCSubtargetInfo *createMSP430MCSubtargetInfo(StringRef TT, StringRef CPU,
-                                                    StringRef FS) {
+static MCSubtargetInfo *
+createMSP430MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
   MCSubtargetInfo *X = new MCSubtargetInfo();
   InitMSP430MCSubtargetInfo(X, TT, CPU, FS);
   return X;
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430.h b/contrib/llvm/lib/Target/MSP430/MSP430.h
index 796f252..302012e 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430.h
+++ b/contrib/llvm/lib/Target/MSP430/MSP430.h
@@ -30,7 +30,7 @@ namespace MSP430CC {
 
     COND_INVALID = -1
   };
-}
+} // namespace MSP430CC
 
 namespace llvm {
   class MSP430TargetMachine;
@@ -42,6 +42,6 @@ namespace llvm {
 
   FunctionPass *createMSP430BranchSelectionPass();
 
-} // end namespace llvm;
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp b/contrib/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp
index ffcf222..2bc11c0 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp
@@ -44,7 +44,7 @@ namespace {
     }
   };
   char MSP430BSel::ID = 0;
-}
+} // namespace
 
 /// createMSP430BranchSelectionPass - returns an instance of the Branch
 /// Selection Pass
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.h b/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.h
index 48c4dc86..2f20bbd 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.h
+++ b/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.h
@@ -49,6 +49,6 @@ public:
                                      RegScavenger *RS = nullptr) const override;
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index 5ce5013..a60108d 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -85,7 +85,7 @@ namespace {
         errs() << " JT" << JT << " Align" << Align << '\n';
     }
   };
-}
+} // namespace
 
 /// MSP430DAGToDAGISel - MSP430 specific code to select MSP430 machine
 /// instructions for SelectionDAG operations.
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h
index 80d3ae1..b090609 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h
@@ -64,7 +64,7 @@ namespace llvm {
       /// SHL, SRA, SRL - Non-constant shifts.
       SHL, SRA, SRL
     };
-  }
+  } // namespace MSP430ISD
 
   class MSP430Subtarget;
   class MSP430TargetLowering : public TargetLowering {
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
index 27681aa..72b1780 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -262,7 +262,7 @@ bool MSP430InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
 unsigned
 MSP430InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                               MachineBasicBlock *FBB,
-                              const SmallVectorImpl<MachineOperand> &Cond,
+                              ArrayRef<MachineOperand> Cond,
                               DebugLoc DL) const {
   // Shouldn't be a fall through.
   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h
index f9b25b6..c6bad1e 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h
@@ -38,7 +38,7 @@ namespace MSP430II {
     Size4Bytes  = 3 << SizeShift,
     Size6Bytes  = 4 << SizeShift
   };
-}
+} // namespace MSP430II
 
 class MSP430InstrInfo : public MSP430GenInstrInfo {
   const MSP430RegisterInfo RI;
@@ -82,12 +82,11 @@ public:
 
   unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
   unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
-                        MachineBasicBlock *FBB,
-                        const SmallVectorImpl<MachineOperand> &Cond,
+                        MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
                         DebugLoc DL) const override;
 
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.h b/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.h
index ebd6397..ebbc6e5 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.h
+++ b/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.h
@@ -42,6 +42,6 @@ public:
   MCSymbol *GetBlockAddressSymbol(const MachineOperand &MO) const;
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h
index fcc5f5b..3d1a245 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h
@@ -49,6 +49,6 @@ public:
   void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430SelectionDAGInfo.h b/contrib/llvm/lib/Target/MSP430/MSP430SelectionDAGInfo.h
index 61a6b19..95c9293 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430SelectionDAGInfo.h
+++ b/contrib/llvm/lib/Target/MSP430/MSP430SelectionDAGInfo.h
@@ -26,6 +26,6 @@ public:
   ~MSP430SelectionDAGInfo();
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp b/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
index 3dda3bf..6374f41 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
@@ -31,7 +31,7 @@ MSP430Subtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
   return *this;
 }
 
-MSP430Subtarget::MSP430Subtarget(const std::string &TT, const std::string &CPU,
+MSP430Subtarget::MSP430Subtarget(const Triple &TT, const std::string &CPU,
                                  const std::string &FS, const TargetMachine &TM)
     : MSP430GenSubtargetInfo(TT, CPU, FS), FrameLowering(),
       InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.h b/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.h
index 30d46d3..958a5d3 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.h
+++ b/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.h
@@ -41,7 +41,7 @@ public:
   /// This constructor initializes the data members to match that
   /// of the specified triple.
   ///
-  MSP430Subtarget(const std::string &TT, const std::string &CPU,
+  MSP430Subtarget(const Triple &TT, const std::string &CPU,
                   const std::string &FS, const TargetMachine &TM);
 
   MSP430Subtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
@@ -64,6 +64,6 @@ public:
     return &TSInfo;
   }
 };
-} // End llvm namespace
+} // namespace llvm
 
 #endif  // LLVM_TARGET_MSP430_SUBTARGET_H
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
index d6cc4ae..97a4047 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -25,7 +25,7 @@ extern "C" void LLVMInitializeMSP430Target() {
   RegisterTargetMachine<MSP430TargetMachine> X(TheMSP430Target);
 }
 
-MSP430TargetMachine::MSP430TargetMachine(const Target &T, StringRef TT,
+MSP430TargetMachine::MSP430TargetMachine(const Target &T, const Triple &TT,
                                          StringRef CPU, StringRef FS,
                                          const TargetOptions &Options,
                                          Reloc::Model RM, CodeModel::Model CM,
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h
index 6ccd30d..4f955a8 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h
@@ -28,8 +28,8 @@ class MSP430TargetMachine : public LLVMTargetMachine {
   MSP430Subtarget        Subtarget;
 
 public:
-  MSP430TargetMachine(const Target &T, StringRef TT,
-                      StringRef CPU, StringRef FS, const TargetOptions &Options,
+  MSP430TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+                      StringRef FS, const TargetOptions &Options,
                       Reloc::Model RM, CodeModel::Model CM,
                       CodeGenOpt::Level OL);
   ~MSP430TargetMachine() override;
diff --git a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 9c054e5..5b8d633 100644
--- a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -186,6 +186,10 @@ class MipsAsmParser : public MCTargetAsmParser {
                      bool Is32BitImm, SMLoc IDLoc,
                      SmallVectorImpl<MCInst> &Instructions);
 
+  bool loadAndAddSymbolAddress(const MCExpr *SymExpr, unsigned DstReg,
+                               unsigned SrcReg, bool Is32BitSym, SMLoc IDLoc,
+                               SmallVectorImpl<MCInst> &Instructions);
+
   bool expandLoadImm(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc,
                      SmallVectorImpl<MCInst> &Instructions);
 
@@ -197,10 +201,6 @@ class MipsAsmParser : public MCTargetAsmParser {
   bool expandUncondBranchMMPseudo(MCInst &Inst, SMLoc IDLoc,
                                   SmallVectorImpl<MCInst> &Instructions);
 
-  void expandLoadAddressSym(const MCOperand &DstRegOp, const MCOperand &SymOp,
-                            bool Is32BitSym, SMLoc IDLoc,
-                            SmallVectorImpl<MCInst> &Instructions);
-
   void expandMemInst(MCInst &Inst, SMLoc IDLoc,
                      SmallVectorImpl<MCInst> &Instructions, bool isLoad,
                      bool isImmOpnd);
@@ -208,6 +208,12 @@ class MipsAsmParser : public MCTargetAsmParser {
   bool expandLoadStoreMultiple(MCInst &Inst, SMLoc IDLoc,
                                SmallVectorImpl<MCInst> &Instructions);
 
+  bool expandBranchImm(MCInst &Inst, SMLoc IDLoc,
+                       SmallVectorImpl<MCInst> &Instructions);
+
+  bool expandCondBranches(MCInst &Inst, SMLoc IDLoc,
+                          SmallVectorImpl<MCInst> &Instructions);
+
   void createNop(bool hasShortDelaySlot, SMLoc IDLoc,
                  SmallVectorImpl<MCInst> &Instructions);
 
@@ -879,6 +885,9 @@ public:
   bool isConstantImm() const {
     return isImm() && dyn_cast<MCConstantExpr>(getImm());
   }
+  template <unsigned Bits> bool isUImm() const {
+    return isImm() && isConstantImm() && isUInt<Bits>(getConstantImm());
+  }
   bool isToken() const override {
     // Note: It's not possible to pretend that other operand kinds are tokens.
     // The matcher emitter checks tokens first.
@@ -1616,6 +1625,16 @@ bool MipsAsmParser::needsExpansion(MCInst &Inst) {
   case Mips::SWM_MM:
   case Mips::JalOneReg:
   case Mips::JalTwoReg:
+  case Mips::BneImm:
+  case Mips::BeqImm:
+  case Mips::BLT:
+  case Mips::BLE:
+  case Mips::BGE:
+  case Mips::BGT:
+  case Mips::BLTU:
+  case Mips::BLEU:
+  case Mips::BGEU:
+  case Mips::BGTU:
     return true;
   default:
     return false;
@@ -1642,6 +1661,18 @@ bool MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
   case Mips::JalOneReg:
   case Mips::JalTwoReg:
     return expandJalWithRegs(Inst, IDLoc, Instructions);
+  case Mips::BneImm:
+  case Mips::BeqImm:
+    return expandBranchImm(Inst, IDLoc, Instructions);
+  case Mips::BLT:
+  case Mips::BLE:
+  case Mips::BGE:
+  case Mips::BGT:
+  case Mips::BLTU:
+  case Mips::BLEU:
+  case Mips::BGEU:
+  case Mips::BGTU:
+    return expandCondBranches(Inst, IDLoc, Instructions);
   }
 }
 
@@ -1898,15 +1929,20 @@ MipsAsmParser::expandLoadAddressReg(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc,
   const MCOperand &DstRegOp = Inst.getOperand(0);
   assert(DstRegOp.isReg() && "expected register operand kind");
 
+  const MCOperand &SrcRegOp = Inst.getOperand(1);
+  assert(SrcRegOp.isReg() && "expected register operand kind");
+
   const MCOperand &ImmOp = Inst.getOperand(2);
   assert((ImmOp.isImm() || ImmOp.isExpr()) &&
          "expected immediate operand kind");
   if (!ImmOp.isImm()) {
-    expandLoadAddressSym(DstRegOp, ImmOp, Is32BitImm, IDLoc, Instructions);
+    if (loadAndAddSymbolAddress(ImmOp.getExpr(), DstRegOp.getReg(),
+                                SrcRegOp.getReg(), Is32BitImm, IDLoc,
+                                Instructions))
+      return true;
+
     return false;
   }
-  const MCOperand &SrcRegOp = Inst.getOperand(1);
-  assert(SrcRegOp.isReg() && "expected register operand kind");
 
   if (loadImmediate(ImmOp.getImm(), DstRegOp.getReg(), SrcRegOp.getReg(),
                     Is32BitImm, IDLoc, Instructions))
@@ -1925,7 +1961,11 @@ MipsAsmParser::expandLoadAddressImm(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc,
   assert((ImmOp.isImm() || ImmOp.isExpr()) &&
          "expected immediate operand kind");
   if (!ImmOp.isImm()) {
-    expandLoadAddressSym(DstRegOp, ImmOp, Is32BitImm, IDLoc, Instructions);
+    if (loadAndAddSymbolAddress(ImmOp.getExpr(), DstRegOp.getReg(),
+                                Mips::NoRegister, Is32BitImm, IDLoc,
+                                Instructions))
+      return true;
+
     return false;
   }
 
@@ -1936,8 +1976,8 @@ MipsAsmParser::expandLoadAddressImm(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc,
   return false;
 }
 
-void MipsAsmParser::expandLoadAddressSym(
-    const MCOperand &DstRegOp, const MCOperand &SymOp, bool Is32BitSym,
+bool MipsAsmParser::loadAndAddSymbolAddress(
+    const MCExpr *SymExpr, unsigned DstReg, unsigned SrcReg, bool Is32BitSym,
     SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) {
   warnIfNoMacro(IDLoc);
 
@@ -1945,14 +1985,12 @@ void MipsAsmParser::expandLoadAddressSym(
     Warning(IDLoc, "instruction loads the 32-bit address of a 64-bit symbol");
 
   MCInst tmpInst;
-  unsigned RegNo = DstRegOp.getReg();
-  const MCSymbolRefExpr *Symbol = cast<MCSymbolRefExpr>(SymOp.getExpr());
-  const MCSymbolRefExpr *HiExpr =
-      MCSymbolRefExpr::create(Symbol->getSymbol().getName(),
-                              MCSymbolRefExpr::VK_Mips_ABS_HI, getContext());
-  const MCSymbolRefExpr *LoExpr =
-      MCSymbolRefExpr::create(Symbol->getSymbol().getName(),
-                              MCSymbolRefExpr::VK_Mips_ABS_LO, getContext());
+  const MCSymbolRefExpr *Symbol = cast<MCSymbolRefExpr>(SymExpr);
+  const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::create(
+      &Symbol->getSymbol(), MCSymbolRefExpr::VK_Mips_ABS_HI, getContext());
+  const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::create(
+      &Symbol->getSymbol(), MCSymbolRefExpr::VK_Mips_ABS_LO, getContext());
+
   if (!Is32BitSym) {
     // If it's a 64-bit architecture, expand to:
     // la d,sym => lui  d,highest(sym)
@@ -1961,36 +1999,39 @@ void MipsAsmParser::expandLoadAddressSym(
     //             ori  d,d,hi16(sym)
     //             dsll d,d,16
     //             ori  d,d,lo16(sym)
-    const MCSymbolRefExpr *HighestExpr =
-        MCSymbolRefExpr::create(Symbol->getSymbol().getName(),
-                                MCSymbolRefExpr::VK_Mips_HIGHEST, getContext());
-    const MCSymbolRefExpr *HigherExpr =
-        MCSymbolRefExpr::create(Symbol->getSymbol().getName(),
-                                MCSymbolRefExpr::VK_Mips_HIGHER, getContext());
+    const MCSymbolRefExpr *HighestExpr = MCSymbolRefExpr::create(
+        &Symbol->getSymbol(), MCSymbolRefExpr::VK_Mips_HIGHEST, getContext());
+    const MCSymbolRefExpr *HigherExpr = MCSymbolRefExpr::create(
+        &Symbol->getSymbol(), MCSymbolRefExpr::VK_Mips_HIGHER, getContext());
 
     tmpInst.setOpcode(Mips::LUi);
-    tmpInst.addOperand(MCOperand::createReg(RegNo));
+    tmpInst.addOperand(MCOperand::createReg(DstReg));
     tmpInst.addOperand(MCOperand::createExpr(HighestExpr));
     Instructions.push_back(tmpInst);
 
-    createLShiftOri<0>(MCOperand::createExpr(HigherExpr), RegNo, SMLoc(),
+    createLShiftOri<0>(MCOperand::createExpr(HigherExpr), DstReg, SMLoc(),
                        Instructions);
-    createLShiftOri<16>(MCOperand::createExpr(HiExpr), RegNo, SMLoc(),
+    createLShiftOri<16>(MCOperand::createExpr(HiExpr), DstReg, SMLoc(),
                         Instructions);
-    createLShiftOri<16>(MCOperand::createExpr(LoExpr), RegNo, SMLoc(),
+    createLShiftOri<16>(MCOperand::createExpr(LoExpr), DstReg, SMLoc(),
                         Instructions);
   } else {
     // Otherwise, expand to:
     // la d,sym => lui  d,hi16(sym)
     //             ori  d,d,lo16(sym)
     tmpInst.setOpcode(Mips::LUi);
-    tmpInst.addOperand(MCOperand::createReg(RegNo));
+    tmpInst.addOperand(MCOperand::createReg(DstReg));
     tmpInst.addOperand(MCOperand::createExpr(HiExpr));
     Instructions.push_back(tmpInst);
 
-    createLShiftOri<0>(MCOperand::createExpr(LoExpr), RegNo, SMLoc(),
+    createLShiftOri<0>(MCOperand::createExpr(LoExpr), DstReg, SMLoc(),
                        Instructions);
   }
+
+  if (SrcReg != Mips::NoRegister)
+    createAddu(DstReg, DstReg, SrcReg, Instructions);
+
+  return false;
 }
 
 bool MipsAsmParser::expandUncondBranchMMPseudo(
@@ -2032,10 +2073,62 @@ bool MipsAsmParser::expandUncondBranchMMPseudo(
   return false;
 }
 
+bool MipsAsmParser::expandBranchImm(MCInst &Inst, SMLoc IDLoc,
+                                    SmallVectorImpl<MCInst> &Instructions) {
+  const MCOperand &DstRegOp = Inst.getOperand(0);
+  assert(DstRegOp.isReg() && "expected register operand kind");
+
+  const MCOperand &ImmOp = Inst.getOperand(1);
+  assert(ImmOp.isImm() && "expected immediate operand kind");
+
+  const MCOperand &MemOffsetOp = Inst.getOperand(2);
+  assert(MemOffsetOp.isImm() && "expected immediate operand kind");
+
+  unsigned OpCode = 0;
+  switch(Inst.getOpcode()) {
+    case Mips::BneImm:
+      OpCode = Mips::BNE;
+      break;
+    case Mips::BeqImm:
+      OpCode = Mips::BEQ;
+      break;
+    default:
+      llvm_unreachable("Unknown immediate branch pseudo-instruction.");
+      break;
+  }
+
+  int64_t ImmValue = ImmOp.getImm();
+  if (ImmValue == 0) {
+    MCInst BranchInst;
+    BranchInst.setOpcode(OpCode);
+    BranchInst.addOperand(DstRegOp);
+    BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
+    BranchInst.addOperand(MemOffsetOp);
+    Instructions.push_back(BranchInst);
+  } else {
+    warnIfNoMacro(IDLoc);
+
+    unsigned ATReg = getATReg(IDLoc);
+    if (!ATReg)
+      return true;
+
+    if (loadImmediate(ImmValue, ATReg, Mips::NoRegister, !isGP64bit(), IDLoc,
+                      Instructions))
+      return true;
+
+    MCInst BranchInst;
+    BranchInst.setOpcode(OpCode);
+    BranchInst.addOperand(DstRegOp);
+    BranchInst.addOperand(MCOperand::createReg(ATReg));
+    BranchInst.addOperand(MemOffsetOp);
+    Instructions.push_back(BranchInst);
+  }
+  return false;
+}
+
 void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
                                   SmallVectorImpl<MCInst> &Instructions,
                                   bool isLoad, bool isImmOpnd) {
-  const MCSymbolRefExpr *SR;
   MCInst TempInst;
   unsigned ImmOffset, HiOffset, LoOffset;
   const MCExpr *ExprOffset;
@@ -2102,16 +2195,8 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
   if (isImmOpnd)
     TempInst.addOperand(MCOperand::createImm(HiOffset));
   else {
-    if (ExprOffset->getKind() == MCExpr::SymbolRef) {
-      SR = static_cast<const MCSymbolRefExpr *>(ExprOffset);
-      const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::create(
-          SR->getSymbol().getName(), MCSymbolRefExpr::VK_Mips_ABS_HI,
-          getContext());
-      TempInst.addOperand(MCOperand::createExpr(HiExpr));
-    } else {
-      const MCExpr *HiExpr = evaluateRelocExpr(ExprOffset, "hi");
-      TempInst.addOperand(MCOperand::createExpr(HiExpr));
-    }
+    const MCExpr *HiExpr = evaluateRelocExpr(ExprOffset, "hi");
+    TempInst.addOperand(MCOperand::createExpr(HiExpr));
   }
   // Add the instruction to the list.
   Instructions.push_back(TempInst);
@@ -2134,15 +2219,8 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
   if (isImmOpnd)
     TempInst.addOperand(MCOperand::createImm(LoOffset));
   else {
-    if (ExprOffset->getKind() == MCExpr::SymbolRef) {
-      const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::create(
-          SR->getSymbol().getName(), MCSymbolRefExpr::VK_Mips_ABS_LO,
-          getContext());
-      TempInst.addOperand(MCOperand::createExpr(LoExpr));
-    } else {
-      const MCExpr *LoExpr = evaluateRelocExpr(ExprOffset, "lo");
-      TempInst.addOperand(MCOperand::createExpr(LoExpr));
-    }
+    const MCExpr *LoExpr = evaluateRelocExpr(ExprOffset, "lo");
+    TempInst.addOperand(MCOperand::createExpr(LoExpr));
   }
   Instructions.push_back(TempInst);
   TempInst.clear();
@@ -2171,6 +2249,206 @@ MipsAsmParser::expandLoadStoreMultiple(MCInst &Inst, SMLoc IDLoc,
   return false;
 }
 
+bool MipsAsmParser::expandCondBranches(MCInst &Inst, SMLoc IDLoc,
+                                       SmallVectorImpl<MCInst> &Instructions) {
+  unsigned PseudoOpcode = Inst.getOpcode();
+  unsigned SrcReg = Inst.getOperand(0).getReg();
+  unsigned TrgReg = Inst.getOperand(1).getReg();
+  const MCExpr *OffsetExpr = Inst.getOperand(2).getExpr();
+
+  unsigned ZeroSrcOpcode, ZeroTrgOpcode;
+  bool ReverseOrderSLT, IsUnsigned, AcceptsEquality;
+
+  switch (PseudoOpcode) {
+  case Mips::BLT:
+  case Mips::BLTU:
+    AcceptsEquality = false;
+    ReverseOrderSLT = false;
+    IsUnsigned = (PseudoOpcode == Mips::BLTU);
+    ZeroSrcOpcode = Mips::BGTZ;
+    ZeroTrgOpcode = Mips::BLTZ;
+    break;
+  case Mips::BLE:
+  case Mips::BLEU:
+    AcceptsEquality = true;
+    ReverseOrderSLT = true;
+    IsUnsigned = (PseudoOpcode == Mips::BLEU);
+    ZeroSrcOpcode = Mips::BGEZ;
+    ZeroTrgOpcode = Mips::BLEZ;
+    break;
+  case Mips::BGE:
+  case Mips::BGEU:
+    AcceptsEquality = true;
+    ReverseOrderSLT = false;
+    IsUnsigned = (PseudoOpcode == Mips::BGEU);
+    ZeroSrcOpcode = Mips::BLEZ;
+    ZeroTrgOpcode = Mips::BGEZ;
+    break;
+  case Mips::BGT:
+  case Mips::BGTU:
+    AcceptsEquality = false;
+    ReverseOrderSLT = true;
+    IsUnsigned = (PseudoOpcode == Mips::BGTU);
+    ZeroSrcOpcode = Mips::BLTZ;
+    ZeroTrgOpcode = Mips::BGTZ;
+    break;
+  default:
+    llvm_unreachable("unknown opcode for branch pseudo-instruction");
+  }
+
+  MCInst BranchInst;
+  bool IsTrgRegZero = (TrgReg == Mips::ZERO);
+  bool IsSrcRegZero = (SrcReg == Mips::ZERO);
+  if (IsSrcRegZero && IsTrgRegZero) {
+    // FIXME: All of these Opcode-specific if's are needed for compatibility
+    // with GAS' behaviour. However, they may not generate the most efficient
+    // code in some circumstances.
+    if (PseudoOpcode == Mips::BLT) {
+      BranchInst.setOpcode(Mips::BLTZ);
+      BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
+      BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
+      Instructions.push_back(BranchInst);
+      return false;
+    }
+    if (PseudoOpcode == Mips::BLE) {
+      BranchInst.setOpcode(Mips::BLEZ);
+      BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
+      BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
+      Instructions.push_back(BranchInst);
+      Warning(IDLoc, "branch is always taken");
+      return false;
+    }
+    if (PseudoOpcode == Mips::BGE) {
+      BranchInst.setOpcode(Mips::BGEZ);
+      BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
+      BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
+      Instructions.push_back(BranchInst);
+      Warning(IDLoc, "branch is always taken");
+      return false;
+    }
+    if (PseudoOpcode == Mips::BGT) {
+      BranchInst.setOpcode(Mips::BGTZ);
+      BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
+      BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
+      Instructions.push_back(BranchInst);
+      return false;
+    }
+    if (PseudoOpcode == Mips::BGTU) {
+      BranchInst.setOpcode(Mips::BNE);
+      BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
+      BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
+      BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
+      Instructions.push_back(BranchInst);
+      return false;
+    }
+    if (AcceptsEquality) {
+      // If both registers are $0 and the pseudo-branch accepts equality, it
+      // will always be taken, so we emit an unconditional branch.
+      BranchInst.setOpcode(Mips::BEQ);
+      BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
+      BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
+      BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
+      Instructions.push_back(BranchInst);
+      Warning(IDLoc, "branch is always taken");
+      return false;
+    }
+    // If both registers are $0 and the pseudo-branch does not accept
+    // equality, it will never be taken, so we don't have to emit anything.
+    return false;
+  }
+  if (IsSrcRegZero || IsTrgRegZero) {
+    if ((IsSrcRegZero && PseudoOpcode == Mips::BGTU) ||
+        (IsTrgRegZero && PseudoOpcode == Mips::BLTU)) {
+      // If the $rs is $0 and the pseudo-branch is BGTU (0 > x) or
+      // if the $rt is $0 and the pseudo-branch is BLTU (x < 0),
+      // the pseudo-branch will never be taken, so we don't emit anything.
+      // This only applies to unsigned pseudo-branches.
+      return false;
+    }
+    if ((IsSrcRegZero && PseudoOpcode == Mips::BLEU) ||
+        (IsTrgRegZero && PseudoOpcode == Mips::BGEU)) {
+      // If the $rs is $0 and the pseudo-branch is BLEU (0 <= x) or
+      // if the $rt is $0 and the pseudo-branch is BGEU (x >= 0),
+      // the pseudo-branch will always be taken, so we emit an unconditional
+      // branch.
+      // This only applies to unsigned pseudo-branches.
+      BranchInst.setOpcode(Mips::BEQ);
+      BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
+      BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
+      BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
+      Instructions.push_back(BranchInst);
+      Warning(IDLoc, "branch is always taken");
+      return false;
+    }
+    if (IsUnsigned) {
+      // If the $rs is $0 and the pseudo-branch is BLTU (0 < x) or
+      // if the $rt is $0 and the pseudo-branch is BGTU (x > 0),
+      // the pseudo-branch will be taken only when the non-zero register is
+      // different from 0, so we emit a BNEZ.
+      //
+      // If the $rs is $0 and the pseudo-branch is BGEU (0 >= x) or
+      // if the $rt is $0 and the pseudo-branch is BLEU (x <= 0),
+      // the pseudo-branch will be taken only when the non-zero register is
+      // equal to 0, so we emit a BEQZ.
+      //
+      // Because only BLEU and BGEU branch on equality, we can use the
+      // AcceptsEquality variable to decide when to emit the BEQZ.
+      BranchInst.setOpcode(AcceptsEquality ? Mips::BEQ : Mips::BNE);
+      BranchInst.addOperand(
+          MCOperand::createReg(IsSrcRegZero ? TrgReg : SrcReg));
+      BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
+      BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
+      Instructions.push_back(BranchInst);
+      return false;
+    }
+    // If we have a signed pseudo-branch and one of the registers is $0,
+    // we can use an appropriate compare-to-zero branch. We select which one
+    // to use in the switch statement above.
+    BranchInst.setOpcode(IsSrcRegZero ? ZeroSrcOpcode : ZeroTrgOpcode);
+    BranchInst.addOperand(MCOperand::createReg(IsSrcRegZero ? TrgReg : SrcReg));
+    BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
+    Instructions.push_back(BranchInst);
+    return false;
+  }
+
+  // If neither the SrcReg nor the TrgReg are $0, we need AT to perform the
+  // expansions. If it is not available, we return.
+  unsigned ATRegNum = getATReg(IDLoc);
+  if (!ATRegNum)
+    return true;
+
+  warnIfNoMacro(IDLoc);
+
+  // SLT fits well with 2 of our 4 pseudo-branches:
+  //   BLT, where $rs < $rt, translates into "slt $at, $rs, $rt" and
+  //   BGT, where $rs > $rt, translates into "slt $at, $rt, $rs".
+  // If the result of the SLT is 1, we branch, and if it's 0, we don't.
+  // This is accomplished by using a BNEZ with the result of the SLT.
+  //
+  // The other 2 pseudo-branches are opposites of the above 2 (BGE with BLT
+  // and BLE with BGT), so we change the BNEZ into a a BEQZ.
+  // Because only BGE and BLE branch on equality, we can use the
+  // AcceptsEquality variable to decide when to emit the BEQZ.
+  // Note that the order of the SLT arguments doesn't change between
+  // opposites.
+  //
+  // The same applies to the unsigned variants, except that SLTu is used
+  // instead of SLT.
+  MCInst SetInst;
+  SetInst.setOpcode(IsUnsigned ? Mips::SLTu : Mips::SLT);
+  SetInst.addOperand(MCOperand::createReg(ATRegNum));
+  SetInst.addOperand(MCOperand::createReg(ReverseOrderSLT ? TrgReg : SrcReg));
+  SetInst.addOperand(MCOperand::createReg(ReverseOrderSLT ? SrcReg : TrgReg));
+  Instructions.push_back(SetInst);
+
+  BranchInst.setOpcode(AcceptsEquality ? Mips::BEQ : Mips::BNE);
+  BranchInst.addOperand(MCOperand::createReg(ATRegNum));
+  BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
+  BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
+  Instructions.push_back(BranchInst);
+  return false;
+}
+
 void MipsAsmParser::createNop(bool hasShortDelaySlot, SMLoc IDLoc,
                               SmallVectorImpl<MCInst> &Instructions) {
   MCInst NopInst;
@@ -2572,7 +2850,7 @@ const MCExpr *MipsAsmParser::evaluateRelocExpr(const MCExpr *Expr,
 
   if (const MCSymbolRefExpr *MSRE = dyn_cast<MCSymbolRefExpr>(Expr)) {
     // It's a symbol, create a symbolic expression from the symbol.
-    StringRef Symbol = MSRE->getSymbol().getName();
+    const MCSymbol *Symbol = &MSRE->getSymbol();
     MCSymbolRefExpr::VariantKind VK = getVariantKind(RelocStr);
     Res = MCSymbolRefExpr::create(Symbol, VK, getContext());
     return Res;
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp
index 70b9cca..725ea7f 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp
@@ -66,4 +66,4 @@ MCStreamer &operator<<(MCStreamer &OS, MipsABIFlagsSection &ABIFlagsSection) {
   OS.EmitIntValue(ABIFlagsSection.getFlags2Value(), 4);          // flags2
   return OS;
 }
-}
+} // namespace llvm
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
index b078cd3..bf306ee 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
@@ -186,6 +186,6 @@ public:
 };
 
 MCStreamer &operator<<(MCStreamer &OS, MipsABIFlagsSection &ABIFlagsSection);
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
index bf8f7d1..8e6c9e6 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
@@ -47,7 +47,7 @@ unsigned MipsABIInfo::GetCalleeAllocdArgSizeInBytes(CallingConv::ID CC) const {
   llvm_unreachable("Unhandled ABI");
 }
 
-MipsABIInfo MipsABIInfo::computeTargetABI(Triple TT, StringRef CPU,
+MipsABIInfo MipsABIInfo::computeTargetABI(const Triple &TT, StringRef CPU,
                                           const MCTargetOptions &Options) {
   if (Options.getABIName().startswith("o32"))
     return MipsABIInfo::O32();
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
index d20dc90..aa965e82a 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
@@ -36,7 +36,7 @@ public:
   static MipsABIInfo N32() { return MipsABIInfo(ABI::N32); }
   static MipsABIInfo N64() { return MipsABIInfo(ABI::N64); }
   static MipsABIInfo EABI() { return MipsABIInfo(ABI::EABI); }
-  static MipsABIInfo computeTargetABI(Triple TT, StringRef CPU,
+  static MipsABIInfo computeTargetABI(const Triple &TT, StringRef CPU,
                                       const MCTargetOptions &Options);
 
   bool IsKnown() const { return ThisABI != ABI::Unknown; }
@@ -73,6 +73,6 @@ public:
 
   unsigned GetEhDataReg(unsigned I) const;
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index d823ffc..5c746b2 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -417,32 +417,27 @@ void MipsAsmBackend::processFixupValue(const MCAssembler &Asm,
 // MCAsmBackend
 MCAsmBackend *llvm::createMipsAsmBackendEL32(const Target &T,
                                              const MCRegisterInfo &MRI,
-                                             StringRef TT,
-                                             StringRef CPU) {
-  return new MipsAsmBackend(T, Triple(TT).getOS(),
-                            /*IsLittle*/true, /*Is64Bit*/false);
+                                             const Triple &TT, StringRef CPU) {
+  return new MipsAsmBackend(T, TT.getOS(), /*IsLittle*/ true,
+                            /*Is64Bit*/ false);
 }
 
 MCAsmBackend *llvm::createMipsAsmBackendEB32(const Target &T,
                                              const MCRegisterInfo &MRI,
-                                             StringRef TT,
-                                             StringRef CPU) {
-  return new MipsAsmBackend(T, Triple(TT).getOS(),
-                            /*IsLittle*/false, /*Is64Bit*/false);
+                                             const Triple &TT, StringRef CPU) {
+  return new MipsAsmBackend(T, TT.getOS(), /*IsLittle*/ false,
+                            /*Is64Bit*/ false);
 }
 
 MCAsmBackend *llvm::createMipsAsmBackendEL64(const Target &T,
                                              const MCRegisterInfo &MRI,
-                                             StringRef TT,
-                                             StringRef CPU) {
-  return new MipsAsmBackend(T, Triple(TT).getOS(),
-                            /*IsLittle*/true, /*Is64Bit*/true);
+                                             const Triple &TT, StringRef CPU) {
+  return new MipsAsmBackend(T, TT.getOS(), /*IsLittle*/ true, /*Is64Bit*/ true);
 }
 
 MCAsmBackend *llvm::createMipsAsmBackendEB64(const Target &T,
                                              const MCRegisterInfo &MRI,
-                                             StringRef TT,
-                                             StringRef CPU) {
-  return new MipsAsmBackend(T, Triple(TT).getOS(),
-                            /*IsLittle*/false, /*Is64Bit*/true);
+                                             const Triple &TT, StringRef CPU) {
+  return new MipsAsmBackend(T, TT.getOS(), /*IsLittle*/ false,
+                            /*Is64Bit*/ true);
 }
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
index b3d5a49..fe84e40 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
@@ -87,6 +87,6 @@ public:
 
 }; // class MipsAsmBackend
 
-} // namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
index ff7779e..a7d5a1e 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
@@ -119,7 +119,7 @@ namespace MipsII {
 
     FormMask = 15
   };
-}
-}
+} // namespace MipsII
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index 982a7f5..a45e2ad 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -51,7 +51,7 @@ struct MipsRelocationEntry {
     virtual void sortRelocs(const MCAssembler &Asm,
                             std::vector<ELFRelocationEntry> &Relocs) override;
   };
-}
+} // namespace
 
 MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI,
                                          bool _isN64, bool IsLittleEndian)
@@ -64,13 +64,47 @@ MipsELFObjectWriter::~MipsELFObjectWriter() {}
 unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
                                            const MCFixup &Fixup,
                                            bool IsPCRel) const {
-  // determine the type of the relocation
+  // Determine the type of the relocation.
   unsigned Kind = (unsigned)Fixup.getKind();
 
   switch (Kind) {
+  case Mips::fixup_Mips_16:
+  case FK_Data_2:
+    return IsPCRel ? ELF::R_MIPS_PC16 : ELF::R_MIPS_16;
   case Mips::fixup_Mips_32:
   case FK_Data_4:
     return IsPCRel ? ELF::R_MIPS_PC32 : ELF::R_MIPS_32;
+  }
+
+  if (IsPCRel) {
+    switch (Kind) {
+    case Mips::fixup_Mips_Branch_PCRel:
+    case Mips::fixup_Mips_PC16:
+      return ELF::R_MIPS_PC16;
+    case Mips::fixup_MICROMIPS_PC7_S1:
+      return ELF::R_MICROMIPS_PC7_S1;
+    case Mips::fixup_MICROMIPS_PC10_S1:
+      return ELF::R_MICROMIPS_PC10_S1;
+    case Mips::fixup_MICROMIPS_PC16_S1:
+      return ELF::R_MICROMIPS_PC16_S1;
+    case Mips::fixup_MIPS_PC19_S2:
+      return ELF::R_MIPS_PC19_S2;
+    case Mips::fixup_MIPS_PC18_S3:
+      return ELF::R_MIPS_PC18_S3;
+    case Mips::fixup_MIPS_PC21_S2:
+      return ELF::R_MIPS_PC21_S2;
+    case Mips::fixup_MIPS_PC26_S2:
+      return ELF::R_MIPS_PC26_S2;
+    case Mips::fixup_MIPS_PCHI16:
+      return ELF::R_MIPS_PCHI16;
+    case Mips::fixup_MIPS_PCLO16:
+      return ELF::R_MIPS_PCLO16;
+    }
+
+    llvm_unreachable("invalid PC-relative fixup kind!");
+  }
+
+  switch (Kind) {
   case Mips::fixup_Mips_64:
   case FK_Data_8:
     return ELF::R_MIPS_64;
@@ -110,9 +144,6 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
     return ELF::R_MIPS_TLS_DTPREL_HI16;
   case Mips::fixup_Mips_DTPREL_LO:
     return ELF::R_MIPS_TLS_DTPREL_LO16;
-  case Mips::fixup_Mips_Branch_PCRel:
-  case Mips::fixup_Mips_PC16:
-    return ELF::R_MIPS_PC16;
   case Mips::fixup_Mips_GOT_PAGE:
     return ELF::R_MIPS_GOT_PAGE;
   case Mips::fixup_Mips_GOT_OFST:
@@ -153,12 +184,6 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
     return ELF::R_MICROMIPS_LO16;
   case Mips::fixup_MICROMIPS_GOT16:
     return ELF::R_MICROMIPS_GOT16;
-  case Mips::fixup_MICROMIPS_PC7_S1:
-    return ELF::R_MICROMIPS_PC7_S1;
-  case Mips::fixup_MICROMIPS_PC10_S1:
-    return ELF::R_MICROMIPS_PC10_S1;
-  case Mips::fixup_MICROMIPS_PC16_S1:
-    return ELF::R_MICROMIPS_PC16_S1;
   case Mips::fixup_MICROMIPS_CALL16:
     return ELF::R_MICROMIPS_CALL16;
   case Mips::fixup_MICROMIPS_GOT_DISP:
@@ -179,19 +204,8 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
     return ELF::R_MICROMIPS_TLS_TPREL_HI16;
   case Mips::fixup_MICROMIPS_TLS_TPREL_LO16:
     return ELF::R_MICROMIPS_TLS_TPREL_LO16;
-  case Mips::fixup_MIPS_PC19_S2:
-    return ELF::R_MIPS_PC19_S2;
-  case Mips::fixup_MIPS_PC18_S3:
-    return ELF::R_MIPS_PC18_S3;
-  case Mips::fixup_MIPS_PC21_S2:
-    return ELF::R_MIPS_PC21_S2;
-  case Mips::fixup_MIPS_PC26_S2:
-    return ELF::R_MIPS_PC26_S2;
-  case Mips::fixup_MIPS_PCHI16:
-    return ELF::R_MIPS_PCHI16;
-  case Mips::fixup_MIPS_PCLO16:
-    return ELF::R_MIPS_PCLO16;
   }
+
   llvm_unreachable("invalid fixup kind!");
 }
 
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
index 687b800..81a0a98 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
@@ -25,6 +25,6 @@ bool baseRegNeedsLoadStoreMask(unsigned Reg);
 MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB,
                                          raw_pwrite_stream &OS,
                                          MCCodeEmitter *Emitter, bool RelaxAll);
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index 54d8863..9bdf8235 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -43,11 +43,9 @@ using namespace llvm;
 
 /// Select the Mips CPU for the given triple and cpu name.
 /// FIXME: Merge with the copy in MipsSubtarget.cpp
-StringRef MIPS_MC::selectMipsCPU(StringRef TT, StringRef CPU) {
+StringRef MIPS_MC::selectMipsCPU(const Triple &TT, StringRef CPU) {
   if (CPU.empty() || CPU == "generic") {
-    Triple TheTriple(TT);
-    if (TheTriple.getArch() == Triple::mips ||
-        TheTriple.getArch() == Triple::mipsel)
+    if (TT.getArch() == Triple::mips || TT.getArch() == Triple::mipsel)
       CPU = "mips32";
     else
       CPU = "mips64";
@@ -67,8 +65,8 @@ static MCRegisterInfo *createMipsMCRegisterInfo(StringRef TT) {
   return X;
 }
 
-static MCSubtargetInfo *createMipsMCSubtargetInfo(StringRef TT, StringRef CPU,
-                                                  StringRef FS) {
+static MCSubtargetInfo *createMipsMCSubtargetInfo(const Triple &TT,
+                                                  StringRef CPU, StringRef FS) {
   CPU = MIPS_MC::selectMipsCPU(TT, CPU);
   MCSubtargetInfo *X = new MCSubtargetInfo();
   InitMipsMCSubtargetInfo(X, TT, CPU, FS);
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
index 577a8b3..20358a0 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
@@ -26,6 +26,7 @@ class MCRegisterInfo;
 class MCSubtargetInfo;
 class StringRef;
 class Target;
+class Triple;
 class raw_ostream;
 class raw_pwrite_stream;
 
@@ -42,26 +43,26 @@ MCCodeEmitter *createMipsMCCodeEmitterEL(const MCInstrInfo &MCII,
                                          MCContext &Ctx);
 
 MCAsmBackend *createMipsAsmBackendEB32(const Target &T,
-                                       const MCRegisterInfo &MRI, StringRef TT,
-                                       StringRef CPU);
+                                       const MCRegisterInfo &MRI,
+                                       const Triple &TT, StringRef CPU);
 MCAsmBackend *createMipsAsmBackendEL32(const Target &T,
-                                       const MCRegisterInfo &MRI, StringRef TT,
-                                       StringRef CPU);
+                                       const MCRegisterInfo &MRI,
+                                       const Triple &TT, StringRef CPU);
 MCAsmBackend *createMipsAsmBackendEB64(const Target &T,
-                                       const MCRegisterInfo &MRI, StringRef TT,
-                                       StringRef CPU);
+                                       const MCRegisterInfo &MRI,
+                                       const Triple &TT, StringRef CPU);
 MCAsmBackend *createMipsAsmBackendEL64(const Target &T,
-                                       const MCRegisterInfo &MRI, StringRef TT,
-                                       StringRef CPU);
+                                       const MCRegisterInfo &MRI,
+                                       const Triple &TT, StringRef CPU);
 
 MCObjectWriter *createMipsELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI,
                                           bool IsLittleEndian, bool Is64Bit);
 
 namespace MIPS_MC {
-StringRef selectMipsCPU(StringRef TT, StringRef CPU);
+StringRef selectMipsCPU(const Triple &TT, StringRef CPU);
 }
 
-} // End llvm namespace
+} // namespace llvm
 
 // Defines symbolic names for Mips registers.  This defines a mapping from
 // register name to register number.
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
index aef9bd3..5378675 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
@@ -265,4 +265,4 @@ MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB,
   return S;
 }
 
-}
+} // namespace llvm
diff --git a/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrFormats.td b/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrFormats.td
index 7350b97..78ba76d 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrFormats.td
+++ b/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrFormats.td
@@ -221,3 +221,22 @@ class CMP_BRANCH_1R_BOTH_OFF16_FM_MMR6<bits<6> funct> : MipsR6Inst {
   let Inst{20-16} = rt;
   let Inst{15-0}  = offset;
 }
+
+class ERET_FM_MMR6<string instr_asm> : MMR6Arch<instr_asm> {
+  bits<32> Inst;
+
+  let Inst{31-26} = 0x00;
+  let Inst{25-16} = 0x00;
+  let Inst{15-6}  = 0x3cd;
+  let Inst{5-0}   = 0x3c;
+}
+
+class ERETNC_FM_MMR6<string instr_asm> : MMR6Arch<instr_asm> {
+  bits<32> Inst;
+
+  let Inst{31-26} = 0x00;
+  let Inst{25-17} = 0x00;
+  let Inst{16-16} = 0x01;
+  let Inst{15-6}  = 0x3cd;
+  let Inst{5-0}   = 0x3c;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td b/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
index 2259d5d..ed71c3d 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
@@ -40,6 +40,8 @@ class CLO_MMR6_ENC : POOL32A_2R_FM_MMR6<0b0100101100>;
 class CLZ_MMR6_ENC : SPECIAL_2R_FM_MMR6<0b010000>;
 class DIV_MMR6_ENC : ARITH_FM_MMR6<"div", 0x118>;
 class DIVU_MMR6_ENC : ARITH_FM_MMR6<"divu", 0x198>;
+class ERET_MMR6_ENC : ERET_FM_MMR6<"eret">;
+class ERETNC_MMR6_ENC : ERETNC_FM_MMR6<"eretnc">;
 class JIALC_MMR6_ENC : JMP_IDX_COMPACT_FM<0b100000>;
 class JIC_MMR6_ENC   : JMP_IDX_COMPACT_FM<0b101000>;
 class LSA_MMR6_ENC : POOL32A_LSA_FM<0b001111>;
@@ -164,6 +166,9 @@ class CLO_CLZ_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
 class CLO_MMR6_DESC : CLO_CLZ_MMR6_DESC_BASE<"clo", GPR32Opnd>;
 class CLZ_MMR6_DESC : CLO_CLZ_MMR6_DESC_BASE<"clz", GPR32Opnd>;
 
+class ERET_MMR6_DESC : ER_FT<"eret">;
+class ERETNC_MMR6_DESC : ER_FT<"eretnc">;
+
 class JMP_MMR6_IDX_COMPACT_DESC_BASE<string opstr, DAGOperand opnd,
                                      RegisterOperand GPROpnd>
     : MMR6Arch<opstr> {
@@ -302,6 +307,9 @@ def CLO_MMR6 : R6MMR6Rel, CLO_MMR6_ENC, CLO_MMR6_DESC, ISA_MICROMIPS32R6;
 def CLZ_MMR6 : R6MMR6Rel, CLZ_MMR6_ENC, CLZ_MMR6_DESC, ISA_MICROMIPS32R6;
 def DIV_MMR6 : R6MMR6Rel, DIV_MMR6_DESC, DIV_MMR6_ENC, ISA_MICROMIPS32R6;
 def DIVU_MMR6 : R6MMR6Rel, DIVU_MMR6_DESC, DIVU_MMR6_ENC, ISA_MICROMIPS32R6;
+def ERET_MMR6 : R6MMR6Rel, ERET_MMR6_DESC, ERET_MMR6_ENC, ISA_MICROMIPS32R6;
+def ERETNC_MMR6 : R6MMR6Rel, ERETNC_MMR6_DESC, ERETNC_MMR6_ENC,
+                  ISA_MICROMIPS32R6;
 def JIALC_MMR6 : R6MMR6Rel, JIALC_MMR6_ENC, JIALC_MMR6_DESC, ISA_MICROMIPS32R6;
 def JIC_MMR6 : R6MMR6Rel, JIC_MMR6_ENC, JIC_MMR6_DESC, ISA_MICROMIPS32R6;
 def LSA_MMR6 : R6MMR6Rel, LSA_MMR6_ENC, LSA_MMR6_DESC, ISA_MICROMIPS32R6;
diff --git a/contrib/llvm/lib/Target/Mips/Mips.h b/contrib/llvm/lib/Target/Mips/Mips.h
index 671d7a8..604b670 100644
--- a/contrib/llvm/lib/Target/Mips/Mips.h
+++ b/contrib/llvm/lib/Target/Mips/Mips.h
@@ -31,6 +31,6 @@ namespace llvm {
   FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM);
   FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM);
   FunctionPass *createMipsConstantIslandPass(MipsTargetMachine &tm);
-} // end namespace llvm;
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.h b/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.h
index f281c92..2c33cfb 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.h
+++ b/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.h
@@ -42,6 +42,6 @@ public:
                                             RegScavenger *RS) const override;
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp b/contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp
index 893fc7c..f2831fd 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp
@@ -62,7 +62,7 @@ namespace {
   };
 
   char Mips16HardFloat::ID = 0;
-}
+} // namespace
 
 //
 // Return types that matter for hard float are:
diff --git a/contrib/llvm/lib/Target/Mips/Mips16HardFloatInfo.cpp b/contrib/llvm/lib/Target/Mips/Mips16HardFloatInfo.cpp
index 2eb6e5d..bf82108 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16HardFloatInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16HardFloatInfo.cpp
@@ -46,5 +46,5 @@ extern FuncSignature const *findFuncSignature(const char *name) {
   }
   return nullptr;
 }
-}
-}
+} // namespace Mips16HardFloatInfo
+} // namespace llvm
diff --git a/contrib/llvm/lib/Target/Mips/Mips16HardFloatInfo.h b/contrib/llvm/lib/Target/Mips/Mips16HardFloatInfo.h
index 7295c28..8354c33 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16HardFloatInfo.h
+++ b/contrib/llvm/lib/Target/Mips/Mips16HardFloatInfo.h
@@ -44,7 +44,7 @@ struct FuncNameSignature {
 extern const FuncNameSignature PredefinedFuncs[];
 
 extern FuncSignature const *findFuncSignature(const char *name);
-}
-}
+} // namespace Mips16HardFloatInfo
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.h b/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.h
index ae0e61e..ce6b3f8 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.h
+++ b/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.h
@@ -48,6 +48,6 @@ private:
 
 FunctionPass *createMips16ISelDag(MipsTargetMachine &TM);
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp b/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
index 846e3c9..c52ef2a 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -54,7 +54,7 @@ struct Mips16IntrinsicHelperType{
     return std::strcmp(Name, RHS.Name) == 0;
   }
 };
-}
+} // namespace
 
 // Libcalls for which no helper is generated. Sorted by name for binary search.
 static const Mips16Libcall HardFloatLibCalls[] = {
diff --git a/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.h b/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.h
index d3b9f75..99d3cac 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.h
+++ b/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.h
@@ -77,6 +77,6 @@ namespace llvm {
       unsigned SltiOpc, unsigned SltiXOpc,
       MachineInstr *MI,  MachineBasicBlock *BB )const;
   };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.h b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.h
index 6540b40..1132d8a 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.h
+++ b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.h
@@ -123,6 +123,6 @@ private:
 
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td b/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td
index 8a27874..83781ff 100644
--- a/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td
@@ -27,8 +27,6 @@ def uimm16_64      : Operand<i64> {
 // Signed Operand
 def simm10_64 : Operand<i64>;
 
-def imm64: Operand<i64>;
-
 // Transformation Function - get Imm - 32.
 def Subtract32 : SDNodeXForm<imm, [{
   return getImm(N, (unsigned)N->getZExtValue() - 32);
diff --git a/contrib/llvm/lib/Target/Mips/MipsAnalyzeImmediate.h b/contrib/llvm/lib/Target/Mips/MipsAnalyzeImmediate.h
index ae3c38c..6b5d02b 100644
--- a/contrib/llvm/lib/Target/Mips/MipsAnalyzeImmediate.h
+++ b/contrib/llvm/lib/Target/Mips/MipsAnalyzeImmediate.h
@@ -58,6 +58,6 @@ namespace llvm {
     unsigned ADDiu, ORi, SLL, LUi;
     InstSeq Insts;
   };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
index f84666b..1c80021 100644
--- a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -694,9 +694,8 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
   // clean anyhow.
   // FIXME: For ifunc related functions we could iterate over and look
   // for a feature string that doesn't match the default one.
-  StringRef TT = TM.getTargetTriple();
-  StringRef CPU =
-      MIPS_MC::selectMipsCPU(TM.getTargetTriple(), TM.getTargetCPU());
+  const Triple &TT = TM.getTargetTriple();
+  StringRef CPU = MIPS_MC::selectMipsCPU(TT, TM.getTargetCPU());
   StringRef FS = TM.getTargetFeatureString();
   const MipsTargetMachine &MTM = static_cast<const MipsTargetMachine &>(TM);
   const MipsSubtarget STI(TT, CPU, FS, MTM.isLittleEndian(), MTM);
@@ -900,7 +899,8 @@ void MipsAsmPrinter::EmitFPCallStub(
   // freed) and since we're at the global level we can use the default
   // constructed subtarget.
   std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
-      TM.getTargetTriple(), TM.getTargetCPU(), TM.getTargetFeatureString()));
+      TM.getTargetTriple().str(), TM.getTargetCPU(),
+      TM.getTargetFeatureString()));
 
   //
   // .global xxxx
diff --git a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h
index a7f3304..3c2b843 100644
--- a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h
+++ b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h
@@ -145,7 +145,7 @@ public:
   void EmitEndOfAsmFile(Module &M) override;
   void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
 };
-}
+} // namespace llvm
 
 #endif
 
diff --git a/contrib/llvm/lib/Target/Mips/MipsCCState.h b/contrib/llvm/lib/Target/Mips/MipsCCState.h
index 081c393..04a9ef5 100644
--- a/contrib/llvm/lib/Target/Mips/MipsCCState.h
+++ b/contrib/llvm/lib/Target/Mips/MipsCCState.h
@@ -131,6 +131,6 @@ public:
   bool IsCallOperandFixed(unsigned ValNo) { return CallOperandIsFixed[ValNo]; }
   SpecialCallingConvType getSpecialCallingConv() { return SpecialCallingConv; }
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h
index 5eabd58..dab9c05 100644
--- a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h
+++ b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h
@@ -49,6 +49,6 @@ protected:
 const MipsFrameLowering *createMips16FrameLowering(const MipsSubtarget &ST);
 const MipsFrameLowering *createMipsSEFrameLowering(const MipsSubtarget &ST);
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.h b/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.h
index 1426d0f..83be74f 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.h
+++ b/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.h
@@ -129,6 +129,6 @@ private:
                                     unsigned ConstraintID,
                                     std::vector<SDValue> &OutOps) override;
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
index bc9a1ce..e4f3cde 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -204,7 +204,7 @@ namespace llvm {
       SDL,
       SDR
     };
-  }
+  } // namespace MipsISD
 
   //===--------------------------------------------------------------------===//
   // TargetLowering Implementation
@@ -566,6 +566,6 @@ namespace llvm {
     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
                              const TargetLibraryInfo *libInfo);
   }
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp
index 0839147..bb23cc0 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp
@@ -96,8 +96,7 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
 
 void
 MipsInstrInfo::BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
-                           DebugLoc DL,
-                           const SmallVectorImpl<MachineOperand> &Cond) const {
+                           DebugLoc DL, ArrayRef<MachineOperand> Cond) const {
   unsigned Opc = Cond[0].getImm();
   const MCInstrDesc &MCID = get(Opc);
   MachineInstrBuilder MIB = BuildMI(&MBB, DL, MCID);
@@ -115,7 +114,7 @@ MipsInstrInfo::BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
 
 unsigned MipsInstrInfo::InsertBranch(
     MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
-    const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const {
+    ArrayRef<MachineOperand> Cond, DebugLoc DL) const {
   // Shouldn't be a fall through.
   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
 
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h
index 4589535..3daff5f 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h
@@ -59,8 +59,7 @@ public:
   unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
 
   unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
-                        MachineBasicBlock *FBB,
-                        const SmallVectorImpl<MachineOperand> &Cond,
+                        MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
                         DebugLoc DL) const override;
 
   bool
@@ -140,13 +139,13 @@ private:
                      SmallVectorImpl<MachineOperand> &Cond) const;
 
   void BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, DebugLoc DL,
-                   const SmallVectorImpl<MachineOperand>& Cond) const;
+                   ArrayRef<MachineOperand> Cond) const;
 };
 
 /// Create MipsInstrInfo objects.
 const MipsInstrInfo *createMips16InstrInfo(const MipsSubtarget &STI);
 const MipsInstrInfo *createMipsSEInstrInfo(const MipsSubtarget &STI);
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
index 58791cf..2a7949e 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
@@ -358,6 +358,8 @@ def calltarget  : Operand<iPTR> {
   let ParserMatchClass = MipsJumpTargetAsmOperand;
 }
 
+def imm64: Operand<i64>;
+
 def simm9 : Operand<i32>;
 def simm10 : Operand<i32>;
 def simm11 : Operand<i32>;
@@ -384,7 +386,15 @@ def simm20      : Operand<i32> {
 def uimm20      : Operand<i32> {
 }
 
+def MipsUImm10AsmOperand : AsmOperandClass {
+  let Name = "UImm10";
+  let RenderMethod = "addImmOperands";
+  let ParserMethod = "parseImm";
+  let PredicateMethod = "isUImm<10>";
+}
+
 def uimm10      : Operand<i32> {
+  let ParserMatchClass = MipsUImm10AsmOperand;
 }
 
 def simm16_64   : Operand<i64> {
@@ -1273,7 +1283,9 @@ def SYSCALL : MMRel, SYS_FT<"syscall">, SYS_FM<0xc>;
 def TRAP : TrapBase<BREAK>;
 def SDBBP : MMRel, SYS_FT<"sdbbp">, SDBBP_FM, ISA_MIPS32_NOT_32R6_64R6;
 
+let AdditionalPredicates = [NotInMicroMips] in {
 def ERET : MMRel, ER_FT<"eret">, ER_FM<0x18>, INSN_MIPS3_32;
+}
 def DERET : MMRel, ER_FT<"deret">, ER_FM<0x1f>, ISA_MIPS32;
 
 def EI : MMRel, DEI_FT<"ei", GPR32Opnd>, EI_FM<1>, ISA_MIPS32R2;
@@ -1672,6 +1684,29 @@ def JalTwoReg : MipsAsmPseudoInst<(outs GPR32Opnd:$rd), (ins GPR32Opnd:$rs),
 def JalOneReg : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs),
                       "jal\t$rs"> ;
 
+let hasDelaySlot = 1 in {
+def BneImm : MipsAsmPseudoInst<(outs GPR32Opnd:$rt),
+                               (ins imm64:$imm64, brtarget:$offset),
+                               "bne\t$rt, $imm64, $offset">;
+def BeqImm : MipsAsmPseudoInst<(outs GPR32Opnd:$rt),
+                               (ins imm64:$imm64, brtarget:$offset),
+                               "beq\t$rt, $imm64, $offset">;
+
+class CondBranchPseudo<string instr_asm> :
+  MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs, GPR32Opnd:$rt,
+                                 brtarget:$offset),
+                    !strconcat(instr_asm, "\t$rs, $rt, $offset")>;
+}
+
+def BLT : CondBranchPseudo<"blt">;
+def BLE : CondBranchPseudo<"ble">;
+def BGE : CondBranchPseudo<"bge">;
+def BGT : CondBranchPseudo<"bgt">;
+def BLTU : CondBranchPseudo<"bltu">;
+def BLEU : CondBranchPseudo<"bleu">;
+def BGEU : CondBranchPseudo<"bgeu">;
+def BGTU : CondBranchPseudo<"bgtu">;
+
 //===----------------------------------------------------------------------===//
 //  Arbitrary patterns that map to one or more instructions
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Mips/MipsMCInstLower.h b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.h
index 1ce27e4..a8bd1cd 100644
--- a/contrib/llvm/lib/Target/Mips/MipsMCInstLower.h
+++ b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.h
@@ -45,6 +45,6 @@ private:
                             MCSymbolRefExpr::VariantKind Kind) const;
   bool lowerLongBranch(const MachineInstr *MI, MCInst &OutMI) const;
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
index b18a673..8568137 100644
--- a/contrib/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
@@ -37,7 +37,7 @@ namespace {
   };
 
   char MipsModuleDAGToDAGISel::ID = 0;
-}
+} // namespace
 
 bool MipsModuleDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
   DEBUG(errs() << "In MipsModuleDAGToDAGISel::runMachineFunction\n");
diff --git a/contrib/llvm/lib/Target/Mips/MipsOs16.cpp b/contrib/llvm/lib/Target/Mips/MipsOs16.cpp
index b6cd791..5c71272 100644
--- a/contrib/llvm/lib/Target/Mips/MipsOs16.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsOs16.cpp
@@ -43,7 +43,7 @@ namespace {
   };
 
   char MipsOs16::ID = 0;
-}
+} // namespace
 
 // Figure out if we need float point based on the function signature.
 // We need to move variables in and/or out of floating point
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
index ec7bf31..a858f30 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -75,7 +75,7 @@ private:
   const MipsSEInstrInfo &TII;
   const MipsRegisterInfo &RegInfo;
 };
-}
+} // namespace
 
 ExpandPseudo::ExpandPseudo(MachineFunction &MF_)
     : MF(MF_), MRI(MF.getRegInfo()),
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.h b/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.h
index 2fcd6bb..ee56b8b 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.h
+++ b/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.h
@@ -39,6 +39,6 @@ public:
   unsigned ehDataReg(unsigned I) const;
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h
index a894034..fb2f041 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -126,6 +126,6 @@ private:
 
 FunctionPass *createMipsSEISelDag(MipsTargetMachine &TM);
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.h b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.h
index d44f8d8..623630a 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.h
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.h
@@ -112,6 +112,6 @@ namespace llvm {
     MachineBasicBlock *emitFEXP2_D_1(MachineInstr *MI,
                                      MachineBasicBlock *BB) const;
   };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h
index bebbabf..cdafe9f 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h
+++ b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h
@@ -113,6 +113,6 @@ private:
                       MachineBasicBlock::iterator I) const;
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsSelectionDAGInfo.h b/contrib/llvm/lib/Target/Mips/MipsSelectionDAGInfo.h
index 061423f..feddf98 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSelectionDAGInfo.h
+++ b/contrib/llvm/lib/Target/Mips/MipsSelectionDAGInfo.h
@@ -26,6 +26,6 @@ public:
   ~MipsSelectionDAGInfo();
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp b/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp
index 7ea10eb..c41bb16 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp
@@ -59,7 +59,7 @@ static cl::opt<bool>
 
 void MipsSubtarget::anchor() { }
 
-MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
+MipsSubtarget::MipsSubtarget(const Triple &TT, const std::string &CPU,
                              const std::string &FS, bool little,
                              const MipsTargetMachine &TM)
     : MipsGenSubtargetInfo(TT, CPU, FS), MipsArchVersion(MipsDefault),
@@ -126,7 +126,7 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
 }
 
 /// This overrides the PostRAScheduler bit in the SchedModel for any CPU.
-bool MipsSubtarget::enablePostMachineScheduler() const { return true; }
+bool MipsSubtarget::enablePostRAScheduler() const { return true; }
 
 void MipsSubtarget::getCriticalPathRCs(RegClassVector &CriticalPathRCs) const {
   CriticalPathRCs.clear();
diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
index 0bfafc8..c8a2e4b 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
+++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
@@ -147,7 +147,7 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
 
 public:
   /// This overrides the PostRAScheduler bit in the SchedModel for each CPU.
-  bool enablePostMachineScheduler() const override;
+  bool enablePostRAScheduler() const override;
   void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const override;
   CodeGenOpt::Level getOptLevelToEnablePostRAScheduler() const override;
 
@@ -161,9 +161,8 @@ public:
 
   /// This constructor initializes the data members to match that
   /// of the specified triple.
-  MipsSubtarget(const std::string &TT, const std::string &CPU,
-                const std::string &FS, bool little,
-                const MipsTargetMachine &TM);
+  MipsSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS,
+                bool little, const MipsTargetMachine &TM);
 
   /// ParseSubtargetFeatures - Parses features string setting specified
   /// subtarget options.  Definition of function is auto generated by tblgen.
@@ -293,6 +292,6 @@ public:
     return &InstrItins;
   }
 };
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
index b279184..c820668 100644
--- a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
@@ -44,12 +44,11 @@ extern "C" void LLVMInitializeMipsTarget() {
   RegisterTargetMachine<MipselTargetMachine> B(TheMips64elTarget);
 }
 
-static std::string computeDataLayout(StringRef TT, StringRef CPU,
+static std::string computeDataLayout(const Triple &TT, StringRef CPU,
                                      const TargetOptions &Options,
                                      bool isLittle) {
   std::string Ret = "";
-  MipsABIInfo ABI =
-      MipsABIInfo::computeTargetABI(Triple(TT), CPU, Options.MCOptions);
+  MipsABIInfo ABI = MipsABIInfo::computeTargetABI(TT, CPU, Options.MCOptions);
 
   // There are both little and big endian mips.
   if (isLittle)
@@ -83,7 +82,7 @@ static std::string computeDataLayout(StringRef TT, StringRef CPU,
 // offset from the stack/frame pointer, using StackGrowsUp enables
 // an easier handling.
 // Using CodeModel::Large enables different CALL behavior.
-MipsTargetMachine::MipsTargetMachine(const Target &T, StringRef TT,
+MipsTargetMachine::MipsTargetMachine(const Target &T, const Triple &TT,
                                      StringRef CPU, StringRef FS,
                                      const TargetOptions &Options,
                                      Reloc::Model RM, CodeModel::Model CM,
@@ -91,7 +90,7 @@ MipsTargetMachine::MipsTargetMachine(const Target &T, StringRef TT,
     : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, isLittle), TT,
                         CPU, FS, Options, RM, CM, OL),
       isLittle(isLittle), TLOF(make_unique<MipsTargetObjectFile>()),
-      ABI(MipsABIInfo::computeTargetABI(Triple(TT), CPU, Options.MCOptions)),
+      ABI(MipsABIInfo::computeTargetABI(TT, CPU, Options.MCOptions)),
       Subtarget(nullptr), DefaultSubtarget(TT, CPU, FS, isLittle, *this),
       NoMips16Subtarget(TT, CPU, FS.empty() ? "-mips16" : FS.str() + ",-mips16",
                         isLittle, *this),
@@ -105,21 +104,21 @@ MipsTargetMachine::~MipsTargetMachine() {}
 
 void MipsebTargetMachine::anchor() { }
 
-MipsebTargetMachine::
-MipsebTargetMachine(const Target &T, StringRef TT,
-                    StringRef CPU, StringRef FS, const TargetOptions &Options,
-                    Reloc::Model RM, CodeModel::Model CM,
-                    CodeGenOpt::Level OL)
-  : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
+MipsebTargetMachine::MipsebTargetMachine(const Target &T, const Triple &TT,
+                                         StringRef CPU, StringRef FS,
+                                         const TargetOptions &Options,
+                                         Reloc::Model RM, CodeModel::Model CM,
+                                         CodeGenOpt::Level OL)
+    : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
 
 void MipselTargetMachine::anchor() { }
 
-MipselTargetMachine::
-MipselTargetMachine(const Target &T, StringRef TT,
-                    StringRef CPU, StringRef FS, const TargetOptions &Options,
-                    Reloc::Model RM, CodeModel::Model CM,
-                    CodeGenOpt::Level OL)
-  : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
+MipselTargetMachine::MipselTargetMachine(const Target &T, const Triple &TT,
+                                         StringRef CPU, StringRef FS,
+                                         const TargetOptions &Options,
+                                         Reloc::Model RM, CodeModel::Model CM,
+                                         CodeGenOpt::Level OL)
+    : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
 
 const MipsSubtarget *
 MipsTargetMachine::getSubtargetImpl(const Function &F) const {
@@ -157,7 +156,8 @@ MipsTargetMachine::getSubtargetImpl(const Function &F) const {
     // creation will depend on the TM and the code generation flags on the
     // function that reside in TargetOptions.
     resetTargetOptions(F);
-    I = llvm::make_unique<MipsSubtarget>(TargetTriple, CPU, FS, isLittle, *this);
+    I = llvm::make_unique<MipsSubtarget>(TargetTriple, CPU, FS, isLittle,
+                                         *this);
   }
   return I.get();
 }
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h
index 5427d6a..976970c 100644
--- a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h
@@ -39,8 +39,8 @@ class MipsTargetMachine : public LLVMTargetMachine {
   mutable StringMap<std::unique_ptr<MipsSubtarget>> SubtargetMap;
 
 public:
-  MipsTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
-                    const TargetOptions &Options, Reloc::Model RM,
+  MipsTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+                    StringRef FS, const TargetOptions &Options, Reloc::Model RM,
                     CodeModel::Model CM, CodeGenOpt::Level OL, bool isLittle);
   ~MipsTargetMachine() override;
 
@@ -73,8 +73,8 @@ public:
 class MipsebTargetMachine : public MipsTargetMachine {
   virtual void anchor();
 public:
-  MipsebTargetMachine(const Target &T, StringRef TT,
-                      StringRef CPU, StringRef FS, const TargetOptions &Options,
+  MipsebTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+                      StringRef FS, const TargetOptions &Options,
                       Reloc::Model RM, CodeModel::Model CM,
                       CodeGenOpt::Level OL);
 };
@@ -84,12 +84,12 @@ public:
 class MipselTargetMachine : public MipsTargetMachine {
   virtual void anchor();
 public:
-  MipselTargetMachine(const Target &T, StringRef TT,
-                      StringRef CPU, StringRef FS, const TargetOptions &Options,
+  MipselTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+                      StringRef FS, const TargetOptions &Options,
                       Reloc::Model RM, CodeModel::Model CM,
                       CodeGenOpt::Level OL);
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetStreamer.h b/contrib/llvm/lib/Target/Mips/MipsTargetStreamer.h
index fed0600..39cadc1 100644
--- a/contrib/llvm/lib/Target/Mips/MipsTargetStreamer.h
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetStreamer.h
@@ -248,5 +248,5 @@ public:
   void emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI) override;
   void emitMipsAbiFlags();
 };
-}
+} // namespace llvm
 #endif
diff --git a/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h b/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
index 02c5a21..8144f3f 100644
--- a/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
+++ b/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
@@ -49,6 +49,6 @@ public:
                        raw_ostream &O, const char *Modifier = nullptr);
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
index a72ae2e..b55664e 100644
--- a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
+++ b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
@@ -94,7 +94,7 @@ enum {
   IsSurfTexQueryFlag = 0x800,
   IsTexModeUnifiedFlag = 0x1000
 };
-}
-}
+} // namespace NVPTXII
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
index d500105..8a28b089 100644
--- a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
@@ -45,7 +45,7 @@ static MCRegisterInfo *createNVPTXMCRegisterInfo(StringRef TT) {
 }
 
 static MCSubtargetInfo *
-createNVPTXMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) {
+createNVPTXMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
   MCSubtargetInfo *X = new MCSubtargetInfo();
   InitNVPTXMCSubtargetInfo(X, TT, CPU, FS);
   return X;
diff --git a/contrib/llvm/lib/Target/NVPTX/ManagedStringPool.h b/contrib/llvm/lib/Target/NVPTX/ManagedStringPool.h
index a2d670f..1480b61 100644
--- a/contrib/llvm/lib/Target/NVPTX/ManagedStringPool.h
+++ b/contrib/llvm/lib/Target/NVPTX/ManagedStringPool.h
@@ -43,6 +43,6 @@ public:
   }
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTX.h b/contrib/llvm/lib/Target/NVPTX/NVPTX.h
index 477b0ba..d06d61f 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTX.h
@@ -70,6 +70,7 @@ MachineFunctionPass *createNVPTXPrologEpilogPass();
 MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
 FunctionPass *createNVPTXImageOptimizerPass();
 FunctionPass *createNVPTXLowerKernelArgsPass(const NVPTXTargetMachine *TM);
+BasicBlockPass *createNVPTXLowerAllocaPass();
 
 bool isImageOrSamplerVal(const Value *, const Module *);
 
@@ -132,7 +133,7 @@ enum VecType {
   V2 = 2,
   V4 = 4
 };
-}
+} // namespace PTXLdStInstCode
 
 /// PTXCvtMode - Conversion code enumeration
 namespace PTXCvtMode {
@@ -151,7 +152,7 @@ enum CvtMode {
   FTZ_FLAG = 0x10,
   SAT_FLAG = 0x20
 };
-}
+} // namespace PTXCvtMode
 
 /// PTXCmpMode - Comparison mode enumeration
 namespace PTXCmpMode {
@@ -179,9 +180,9 @@ enum CmpMode {
   BASE_MASK = 0xFF,
   FTZ_FLAG = 0x100
 };
-}
-}
-} // end namespace llvm;
+} // namespace PTXCmpMode
+} // namespace NVPTX
+} // namespace llvm
 
 // Defines symbolic names for NVPTX registers.  This defines a mapping from
 // register name to register number.
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 298b992..1a1a8ca 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -109,7 +109,7 @@ void VisitGlobalVariableForEmission(
   Visited.insert(GV);
   Visiting.erase(GV);
 }
-}
+} // namespace
 
 void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
   if (!EmitLineNumbers)
@@ -808,7 +808,7 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
   // Construct a default subtarget off of the TargetMachine defaults. The
   // rest of NVPTX isn't friendly to change subtargets per function and
   // so the default TargetMachine will have all of the options.
-  StringRef TT = TM.getTargetTriple();
+  const Triple &TT = TM.getTargetTriple();
   StringRef CPU = TM.getTargetCPU();
   StringRef FS = TM.getTargetFeatureString();
   const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
@@ -818,7 +818,6 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
   raw_svector_ostream OS1(Str1);
 
   MMI = getAnalysisIfAvailable<MachineModuleInfo>();
-  MMI->AnalyzeModule(M);
 
   // We need to call the parent's one explicitly.
   //bool Result = AsmPrinter::doInitialization(M);
@@ -847,7 +846,7 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
   }
 
   // If we're not NVCL we're CUDA, go ahead and emit filenames.
-  if (Triple(TM.getTargetTriple()).getOS() != Triple::NVCL)
+  if (TM.getTargetTriple().getOS() != Triple::NVCL)
     recordAndEmitFilenames(M);
 
   GlobalsEmitted = false;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
index f6f7685..12d80a3 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -349,6 +349,6 @@ public:
   DebugLoc prevDebugLoc;
   void emitLineNumberAsDotLoc(const MachineInstr &);
 };
-} // end of namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
index 7d4be8e..2d5e74c 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
@@ -38,7 +38,7 @@ public:
   /// \brief Clean up the name to remove symbols invalid in PTX.
   std::string cleanUpName(StringRef Name);
 };
-}
+} // namespace
 
 char NVPTXAssignValidGlobalNames::ID = 0;
 
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
index cfff001..3eb7024 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
@@ -98,17 +98,16 @@ private:
   /// This reordering exposes to optimizeMemoryInstruction more
   /// optimization opportunities on loads and stores.
   ///
-  /// Returns true if this function succesfully hoists an eliminable
-  /// addrspacecast or V is already such an addrspacecast.
-  /// Transforms "gep (addrspacecast X), indices" into "addrspacecast (gep X,
-  /// indices)".
-  bool hoistAddrSpaceCastFrom(Value *V, int Depth = 0);
+  /// If this function succesfully hoists an eliminable addrspacecast or V is
+  /// already such an addrspacecast, it returns the transformed value (which is
+  /// guaranteed to be an addrspacecast); otherwise, it returns nullptr.
+  Value *hoistAddrSpaceCastFrom(Value *V, int Depth = 0);
   /// Helper function for GEPs.
-  bool hoistAddrSpaceCastFromGEP(GEPOperator *GEP, int Depth);
+  Value *hoistAddrSpaceCastFromGEP(GEPOperator *GEP, int Depth);
   /// Helper function for bitcasts.
-  bool hoistAddrSpaceCastFromBitCast(BitCastOperator *BC, int Depth);
+  Value *hoistAddrSpaceCastFromBitCast(BitCastOperator *BC, int Depth);
 };
-}
+} // namespace
 
 char NVPTXFavorNonGenericAddrSpaces::ID = 0;
 
@@ -143,17 +142,19 @@ static bool isEliminableAddrSpaceCast(Value *V) {
           DestTy->getAddressSpace() == AddressSpace::ADDRESS_SPACE_GENERIC);
 }
 
-bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP(GEPOperator *GEP,
-                                                               int Depth) {
-  if (!hoistAddrSpaceCastFrom(GEP->getPointerOperand(), Depth + 1))
-    return false;
+Value *NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP(
+    GEPOperator *GEP, int Depth) {
+  Value *NewOperand =
+      hoistAddrSpaceCastFrom(GEP->getPointerOperand(), Depth + 1);
+  if (NewOperand == nullptr)
+    return nullptr;
 
-  // That hoistAddrSpaceCastFrom succeeds implies GEP's pointer operand is now
-  // an eliminable addrspacecast.
-  assert(isEliminableAddrSpaceCast(GEP->getPointerOperand()));
-  Operator *Cast = cast<Operator>(GEP->getPointerOperand());
+  // hoistAddrSpaceCastFrom returns an eliminable addrspacecast or nullptr.
+  assert(isEliminableAddrSpaceCast(NewOperand));
+  Operator *Cast = cast<Operator>(NewOperand);
 
   SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
+  Value *NewASC;
   if (Instruction *GEPI = dyn_cast<Instruction>(GEP)) {
     // GEP = gep (addrspacecast X), indices
     // =>
@@ -163,30 +164,31 @@ bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP(GEPOperator *GEP,
         GEP->getSourceElementType(), Cast->getOperand(0), Indices,
         "", GEPI);
     NewGEP->setIsInBounds(GEP->isInBounds());
-    Value *NewASC = new AddrSpaceCastInst(NewGEP, GEP->getType(), "", GEPI);
+    NewASC = new AddrSpaceCastInst(NewGEP, GEP->getType(), "", GEPI);
     NewASC->takeName(GEP);
+    // Without RAUWing GEP, the compiler would visit GEP again and emit
+    // redundant instructions. This is exercised in test @rauw in
+    // access-non-generic.ll.
     GEP->replaceAllUsesWith(NewASC);
   } else {
     // GEP is a constant expression.
     Constant *NewGEP = ConstantExpr::getGetElementPtr(
         GEP->getSourceElementType(), cast<Constant>(Cast->getOperand(0)),
         Indices, GEP->isInBounds());
-    GEP->replaceAllUsesWith(
-        ConstantExpr::getAddrSpaceCast(NewGEP, GEP->getType()));
+    NewASC = ConstantExpr::getAddrSpaceCast(NewGEP, GEP->getType());
   }
-
-  return true;
+  return NewASC;
 }
 
-bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromBitCast(
+Value *NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromBitCast(
     BitCastOperator *BC, int Depth) {
-  if (!hoistAddrSpaceCastFrom(BC->getOperand(0), Depth + 1))
-    return false;
+  Value *NewOperand = hoistAddrSpaceCastFrom(BC->getOperand(0), Depth + 1);
+  if (NewOperand == nullptr)
+    return nullptr;
 
-  // That hoistAddrSpaceCastFrom succeeds implies BC's source operand is now
-  // an eliminable addrspacecast.
-  assert(isEliminableAddrSpaceCast(BC->getOperand(0)));
-  Operator *Cast = cast<Operator>(BC->getOperand(0));
+  // hoistAddrSpaceCastFrom returns an eliminable addrspacecast or nullptr.
+  assert(isEliminableAddrSpaceCast(NewOperand));
+  Operator *Cast = cast<Operator>(NewOperand);
 
   // Cast  = addrspacecast Src
   // BC    = bitcast Cast
@@ -197,31 +199,34 @@ bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromBitCast(
   Type *TypeOfNewCast =
       PointerType::get(BC->getType()->getPointerElementType(),
                        Src->getType()->getPointerAddressSpace());
+  Value *NewBC;
   if (BitCastInst *BCI = dyn_cast<BitCastInst>(BC)) {
     Value *NewCast = new BitCastInst(Src, TypeOfNewCast, "", BCI);
-    Value *NewBC = new AddrSpaceCastInst(NewCast, BC->getType(), "", BCI);
+    NewBC = new AddrSpaceCastInst(NewCast, BC->getType(), "", BCI);
     NewBC->takeName(BC);
+    // Without RAUWing BC, the compiler would visit BC again and emit
+    // redundant instructions. This is exercised in test @rauw in
+    // access-non-generic.ll.
     BC->replaceAllUsesWith(NewBC);
   } else {
     // BC is a constant expression.
     Constant *NewCast =
         ConstantExpr::getBitCast(cast<Constant>(Src), TypeOfNewCast);
-    Constant *NewBC = ConstantExpr::getAddrSpaceCast(NewCast, BC->getType());
-    BC->replaceAllUsesWith(NewBC);
+    NewBC = ConstantExpr::getAddrSpaceCast(NewCast, BC->getType());
   }
-  return true;
+  return NewBC;
 }
 
-bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFrom(Value *V,
-                                                            int Depth) {
-  // Returns true if V is already an eliminable addrspacecast.
+Value *NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFrom(Value *V,
+                                                              int Depth) {
+  // Returns V if V is already an eliminable addrspacecast.
   if (isEliminableAddrSpaceCast(V))
-    return true;
+    return V;
 
   // Limit the depth to prevent this recursive function from running too long.
   const int MaxDepth = 20;
   if (Depth >= MaxDepth)
-    return false;
+    return nullptr;
 
   // If V is a GEP or bitcast, hoist the addrspacecast if any from its pointer
   // operand. This enables optimizeMemoryInstruction to shortcut addrspacecasts
@@ -232,28 +237,29 @@ bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFrom(Value *V,
   if (BitCastOperator *BC = dyn_cast<BitCastOperator>(V))
     return hoistAddrSpaceCastFromBitCast(BC, Depth);
 
-  return false;
+  return nullptr;
 }
 
 bool NVPTXFavorNonGenericAddrSpaces::optimizeMemoryInstruction(Instruction *MI,
                                                                unsigned Idx) {
-  if (hoistAddrSpaceCastFrom(MI->getOperand(Idx))) {
-    // load/store (addrspacecast X) => load/store X if shortcutting the
-    // addrspacecast is valid and can improve performance.
-    //
-    // e.g.,
-    // %1 = addrspacecast float addrspace(3)* %0 to float*
-    // %2 = load float* %1
-    // ->
-    // %2 = load float addrspace(3)* %0
-    //
-    // Note: the addrspacecast can also be a constant expression.
-    assert(isEliminableAddrSpaceCast(MI->getOperand(Idx)));
-    Operator *ASC = dyn_cast<Operator>(MI->getOperand(Idx));
-    MI->setOperand(Idx, ASC->getOperand(0));
-    return true;
-  }
-  return false;
+  Value *NewOperand = hoistAddrSpaceCastFrom(MI->getOperand(Idx));
+  if (NewOperand == nullptr)
+    return false;
+
+  // load/store (addrspacecast X) => load/store X if shortcutting the
+  // addrspacecast is valid and can improve performance.
+  //
+  // e.g.,
+  // %1 = addrspacecast float addrspace(3)* %0 to float*
+  // %2 = load float* %1
+  // ->
+  // %2 = load float addrspace(3)* %0
+  //
+  // Note: the addrspacecast can also be a constant expression.
+  assert(isEliminableAddrSpaceCast(NewOperand));
+  Operator *ASC = dyn_cast<Operator>(NewOperand);
+  MI->setOperand(Idx, ASC->getOperand(0));
+  return true;
 }
 
 bool NVPTXFavorNonGenericAddrSpaces::runOnFunction(Function &F) {
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h b/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h
index 14f8bb7..488edec 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h
@@ -31,6 +31,6 @@ public:
                                 MachineBasicBlock::iterator I) const override;
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index fe20580..5879df3 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -95,6 +95,6 @@ private:
   bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const;
 
 };
-}
+} // namespace
 
 #endif
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index ed94775..276851f 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -427,7 +427,7 @@ enum NodeType : unsigned {
   Suld3DV4I16Zero,
   Suld3DV4I32Zero
 };
-}
+} // namespace NVPTXISD
 
 class NVPTXSubtarget;
 
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
index aa36b6b..c86f861 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
@@ -42,7 +42,7 @@ private:
   Value *cleanupValue(Value *V);
   void replaceWith(Instruction *From, ConstantInt *To);
 };
-}
+} // namespace
 
 char NVPTXImageOptimizer::ID = 0;
 
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index dabc3be..76d6597 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -248,7 +248,7 @@ unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
 
 unsigned NVPTXInstrInfo::InsertBranch(
     MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
-    const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const {
+    ArrayRef<MachineOperand> Cond, DebugLoc DL) const {
   // Shouldn't be a fall through.
   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
   assert((Cond.size() == 1 || Cond.size() == 0) &&
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
index 9b5d491..179c068 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
@@ -66,7 +66,7 @@ public:
   unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
   unsigned InsertBranch(
       MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
-      const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const override;
+      ArrayRef<MachineOperand> Cond, DebugLoc DL) const override;
   unsigned getLdStCodeAddrSpace(const MachineInstr &MI) const {
     return MI.getOperand(2).getImm();
   }
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
new file mode 100644
index 0000000..93d0025
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
@@ -0,0 +1,115 @@
+//===-- NVPTXLowerAlloca.cpp - Make alloca to use local memory =====--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// For all alloca instructions, and add a pair of cast to local address for
+// each of them. For example,
+//
+//   %A = alloca i32
+//   store i32 0, i32* %A ; emits st.u32
+//
+// will be transformed to
+//
+//   %A = alloca i32
+//   %Local = addrspacecast i32* %A to i32 addrspace(5)*
+//   %Generic = addrspacecast i32 addrspace(5)* %A to i32*
+//   store i32 0, i32 addrspace(5)* %Generic ; emits st.local.u32
+//
+// And we will rely on NVPTXFavorNonGenericAddrSpace to combine the last
+// two instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "NVPTXUtilities.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+namespace llvm {
+void initializeNVPTXLowerAllocaPass(PassRegistry &);
+}
+
+namespace {
+class NVPTXLowerAlloca : public BasicBlockPass {
+  bool runOnBasicBlock(BasicBlock &BB) override;
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+  NVPTXLowerAlloca() : BasicBlockPass(ID) {}
+  const char *getPassName() const override {
+    return "convert address space of alloca'ed memory to local";
+  }
+};
+} // namespace
+
+char NVPTXLowerAlloca::ID = 1;
+
+INITIALIZE_PASS(NVPTXLowerAlloca, "nvptx-lower-alloca",
+                "Lower Alloca", false, false)
+
+// =============================================================================
+// Main function for this pass.
+// =============================================================================
+bool NVPTXLowerAlloca::runOnBasicBlock(BasicBlock &BB) {
+  bool Changed = false;
+  for (auto &I : BB) {
+    if (auto allocaInst = dyn_cast<AllocaInst>(&I)) {
+      Changed = true;
+      auto PTy = dyn_cast<PointerType>(allocaInst->getType());
+      auto ETy = PTy->getElementType();
+      auto LocalAddrTy = PointerType::get(ETy, ADDRESS_SPACE_LOCAL);
+      auto NewASCToLocal = new AddrSpaceCastInst(allocaInst, LocalAddrTy, "");
+      auto GenericAddrTy = PointerType::get(ETy, ADDRESS_SPACE_GENERIC);
+      auto NewASCToGeneric = new AddrSpaceCastInst(NewASCToLocal,
+                                                    GenericAddrTy, "");
+      NewASCToLocal->insertAfter(allocaInst);
+      NewASCToGeneric->insertAfter(NewASCToLocal);
+      for (Value::use_iterator UI = allocaInst->use_begin(),
+                                UE = allocaInst->use_end();
+            UI != UE; ) {
+        // Check Load, Store, GEP, and BitCast Uses on alloca and make them
+        // use the converted generic address, in order to expose non-generic
+        // addrspacecast to NVPTXFavorNonGenericAddrSpace. For other types
+        // of instructions this is unecessary and may introduce redudant
+        // address cast.
+        const auto &AllocaUse = *UI++;
+        auto LI = dyn_cast<LoadInst>(AllocaUse.getUser());
+        if (LI && LI->getPointerOperand() == allocaInst && !LI->isVolatile()) {
+          LI->setOperand(LI->getPointerOperandIndex(), NewASCToGeneric);
+          continue;
+        }
+        auto SI = dyn_cast<StoreInst>(AllocaUse.getUser());
+        if (SI && SI->getPointerOperand() == allocaInst && !SI->isVolatile()) {
+          SI->setOperand(SI->getPointerOperandIndex(), NewASCToGeneric);
+          continue;
+        }
+        auto GI = dyn_cast<GetElementPtrInst>(AllocaUse.getUser());
+        if (GI && GI->getPointerOperand() == allocaInst) {
+          GI->setOperand(GI->getPointerOperandIndex(), NewASCToGeneric);
+          continue;
+        }
+        auto BI = dyn_cast<BitCastInst>(AllocaUse.getUser());
+        if (BI && BI->getOperand(0) == allocaInst) {
+          BI->setOperand(0, NewASCToGeneric);
+          continue;
+        }
+      }
+    }
+  }
+  return Changed;
+}
+
+BasicBlockPass *llvm::createNVPTXLowerAllocaPass() {
+  return new NVPTXLowerAlloca();
+}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h b/contrib/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
index 10f1135..4b9322c 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
@@ -46,6 +46,6 @@ public:
     return ImageHandleList[Idx].c_str();
   }
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
index 5fd69a6..ea58f77 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -39,7 +39,7 @@ public:
 private:
   void calculateFrameObjectOffsets(MachineFunction &Fn);
 };
-}
+} // namespace
 
 MachineFunctionPass *llvm::createNVPTXPrologEpilogPass() {
   return new NVPTXPrologEpilogPass();
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index 6e97f9e..3ef997b 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -69,7 +69,7 @@ std::string getNVPTXRegClassStr(TargetRegisterClass const *RC) {
   }
   return "";
 }
-}
+} // namespace llvm
 
 NVPTXRegisterInfo::NVPTXRegisterInfo() : NVPTXGenRegisterInfo(0) {}
 
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
index e83f735..bb0adc5 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
@@ -45,7 +45,7 @@ private:
   bool findIndexForHandle(MachineOperand &Op, MachineFunction &MF,
                           unsigned &Idx);
 };
-}
+} // namespace
 
 char NVPTXReplaceImageHandles::ID = 0;
 
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
index 069d6e1..71645dc 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -43,7 +43,7 @@ NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU,
   return *this;
 }
 
-NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU,
+NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU,
                                const std::string &FS,
                                const NVPTXTargetMachine &TM)
     : NVPTXGenSubtargetInfo(TT, CPU, FS), PTXVersion(0), SmVersion(20), TM(TM),
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index e9833e5..d452045 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -52,7 +52,7 @@ public:
   /// This constructor initializes the data members to match that
   /// of the specified module.
   ///
-  NVPTXSubtarget(const std::string &TT, const std::string &CPU,
+  NVPTXSubtarget(const Triple &TT, const std::string &CPU,
                  const std::string &FS, const NVPTXTargetMachine &TM);
 
   const TargetFrameLowering *getFrameLowering() const override {
@@ -103,6 +103,6 @@ public:
   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index a646668..c071ee8 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -54,6 +54,7 @@ void initializeNVPTXAllocaHoistingPass(PassRegistry &);
 void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
 void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
 void initializeNVPTXLowerKernelArgsPass(PassRegistry &);
+void initializeNVPTXLowerAllocaPass(PassRegistry &);
 }
 
 extern "C" void LLVMInitializeNVPTXTarget() {
@@ -70,6 +71,7 @@ extern "C" void LLVMInitializeNVPTXTarget() {
   initializeNVPTXFavorNonGenericAddrSpacesPass(
     *PassRegistry::getPassRegistry());
   initializeNVPTXLowerKernelArgsPass(*PassRegistry::getPassRegistry());
+  initializeNVPTXLowerAllocaPass(*PassRegistry::getPassRegistry());
 }
 
 static std::string computeDataLayout(bool is64Bit) {
@@ -83,7 +85,7 @@ static std::string computeDataLayout(bool is64Bit) {
   return Ret;
 }
 
-NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT,
+NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT,
                                        StringRef CPU, StringRef FS,
                                        const TargetOptions &Options,
                                        Reloc::Model RM, CodeModel::Model CM,
@@ -92,7 +94,7 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT,
                         CM, OL),
       is64bit(is64bit), TLOF(make_unique<NVPTXTargetObjectFile>()),
       Subtarget(TT, CPU, FS, *this) {
-  if (Triple(TT).getOS() == Triple::NVCL)
+  if (TT.getOS() == Triple::NVCL)
     drvInterface = NVPTX::NVCL;
   else
     drvInterface = NVPTX::CUDA;
@@ -103,18 +105,20 @@ NVPTXTargetMachine::~NVPTXTargetMachine() {}
 
 void NVPTXTargetMachine32::anchor() {}
 
-NVPTXTargetMachine32::NVPTXTargetMachine32(
-    const Target &T, StringRef TT, StringRef CPU, StringRef FS,
-    const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
-    CodeGenOpt::Level OL)
+NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, const Triple &TT,
+                                           StringRef CPU, StringRef FS,
+                                           const TargetOptions &Options,
+                                           Reloc::Model RM, CodeModel::Model CM,
+                                           CodeGenOpt::Level OL)
     : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
 
 void NVPTXTargetMachine64::anchor() {}
 
-NVPTXTargetMachine64::NVPTXTargetMachine64(
-    const Target &T, StringRef TT, StringRef CPU, StringRef FS,
-    const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
-    CodeGenOpt::Level OL)
+NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, const Triple &TT,
+                                           StringRef CPU, StringRef FS,
+                                           const TargetOptions &Options,
+                                           Reloc::Model RM, CodeModel::Model CM,
+                                           CodeGenOpt::Level OL)
     : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
 
 namespace {
@@ -164,12 +168,11 @@ void NVPTXPassConfig::addIRPasses() {
   addPass(createNVPTXAssignValidGlobalNamesPass());
   addPass(createGenericToNVVMPass());
   addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine()));
-  addPass(createNVPTXFavorNonGenericAddrSpacesPass());
   // NVPTXLowerKernelArgs emits alloca for byval parameters which can often
-  // be eliminated by SROA. We do not run SROA right after NVPTXLowerKernelArgs
-  // because we plan to merge NVPTXLowerKernelArgs and
-  // NVPTXFavorNonGenericAddrSpaces into one pass.
+  // be eliminated by SROA.
   addPass(createSROAPass());
+  addPass(createNVPTXLowerAllocaPass());
+  addPass(createNVPTXFavorNonGenericAddrSpacesPass());
   // FavorNonGenericAddrSpaces shortcuts unnecessary addrspacecasts, and leave
   // them unused. We could remove dead code in an ad-hoc manner, but that
   // requires manual work and might be error-prone.
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
index 2cd10e8..da7f62b 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -34,9 +34,10 @@ class NVPTXTargetMachine : public LLVMTargetMachine {
   ManagedStringPool ManagedStrPool;
 
 public:
-  NVPTXTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
-                     const TargetOptions &Options, Reloc::Model RM,
-                     CodeModel::Model CM, CodeGenOpt::Level OP, bool is64bit);
+  NVPTXTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+                     StringRef FS, const TargetOptions &Options,
+                     Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OP,
+                     bool is64bit);
 
   ~NVPTXTargetMachine() override;
   const NVPTXSubtarget *getSubtargetImpl(const Function &) const override {
@@ -67,7 +68,7 @@ public:
 class NVPTXTargetMachine32 : public NVPTXTargetMachine {
   virtual void anchor();
 public:
-  NVPTXTargetMachine32(const Target &T, StringRef TT, StringRef CPU,
+  NVPTXTargetMachine32(const Target &T, const Triple &TT, StringRef CPU,
                        StringRef FS, const TargetOptions &Options,
                        Reloc::Model RM, CodeModel::Model CM,
                        CodeGenOpt::Level OL);
@@ -76,7 +77,7 @@ public:
 class NVPTXTargetMachine64 : public NVPTXTargetMachine {
   virtual void anchor();
 public:
-  NVPTXTargetMachine64(const Target &T, StringRef TT, StringRef CPU,
+  NVPTXTargetMachine64(const Target &T, const Triple &TT, StringRef CPU,
                        StringRef FS, const TargetOptions &Options,
                        Reloc::Model RM, CodeModel::Model CM,
                        CodeGenOpt::Level OL);
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.h b/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.h
index 7e2ce73..4d937c6 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.h
@@ -91,6 +91,6 @@ void dumpInstRec(Value *v, std::set<Instruction *> *visited);
 void dumpInstRec(Value *v);
 void dumpParent(Value *v);
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp b/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
index 5e375b7..1c20430 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
@@ -75,7 +75,7 @@ private:
   bool handleFunction(Function *ReflectFunction);
   void setVarMap();
 };
-}
+} // namespace
 
 ModulePass *llvm::createNVVMReflectPass() {
   return new NVVMReflect();
diff --git a/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 1736d03..a699a55 100644
--- a/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -1184,6 +1184,13 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     Inst = TmpInst;
     break;
   }
+  case PPC::MFTB: {
+    if (STI.getFeatureBits()[PPC::FeatureMFTB]) {
+      assert(Inst.getNumOperands() == 2 && "Expecting two operands");
+      Inst.setOpcode(PPC::MFSPR);
+    }
+    break;
+  }
   }
 }
 
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 72742dc..b6dd595 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -230,11 +230,11 @@ namespace {
 
 MCAsmBackend *llvm::createPPCAsmBackend(const Target &T,
                                         const MCRegisterInfo &MRI,
-                                        StringRef TT, StringRef CPU) {
-  if (Triple(TT).isOSDarwin())
+                                        const Triple &TT, StringRef CPU) {
+  if (TT.isOSDarwin())
     return new DarwinPPCAsmBackend(T);
 
-  uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS());
-  bool IsLittleEndian = Triple(TT).getArch() == Triple::ppc64le;
+  uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
+  bool IsLittleEndian = TT.getArch() == Triple::ppc64le;
   return new ELFPPCAsmBackend(T, IsLittleEndian, OSABI);
 }
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index 992be5b..36119d5 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -31,7 +31,7 @@ namespace {
     bool needsRelocateWithSymbol(const MCSymbol &Sym,
                                  unsigned Type) const override;
   };
-}
+} // namespace
 
 PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI)
   : MCELFObjectTargetWriter(Is64Bit, OSABI,
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index ae43e59d..ad614f2 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -50,7 +50,7 @@ enum Fixups {
   LastTargetFixupKind,
   NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
 };
-}
-}
+} // namespace PPC
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 9537924..b729156 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -309,7 +309,7 @@ unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
   // Return the thread-pointer register's encoding.
   Fixups.push_back(MCFixup::create(0, MO.getExpr(),
                                    (MCFixupKind)PPC::fixup_ppc_nofixup));
-  Triple TT(STI.getTargetTriple());
+  const Triple &TT = STI.getTargetTriple();
   bool isPPC64 = TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le;
   return CTX.getRegisterInfo()->getEncodingValue(isPPC64 ? PPC::X13 : PPC::R2);
 }
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 1e8e804..489905b 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -63,8 +63,8 @@ static MCRegisterInfo *createPPCMCRegisterInfo(StringRef TT) {
   return X;
 }
 
-static MCSubtargetInfo *createPPCMCSubtargetInfo(StringRef TT, StringRef CPU,
-                                                 StringRef FS) {
+static MCSubtargetInfo *createPPCMCSubtargetInfo(const Triple &TT,
+                                                 StringRef CPU, StringRef FS) {
   MCSubtargetInfo *X = new MCSubtargetInfo();
   InitPPCMCSubtargetInfo(X, TT, CPU, FS);
   return X;
@@ -219,7 +219,7 @@ public:
     llvm_unreachable("Unknown pseudo-op: .localentry");
   }
 };
-}
+} // namespace
 
 static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S,
                                                  formatted_raw_ostream &OS,
@@ -230,7 +230,7 @@ static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S,
 
 static MCTargetStreamer *
 createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
-  Triple TT(STI.getTargetTriple());
+  const Triple &TT = STI.getTargetTriple();
   if (TT.getObjectFormat() == Triple::ELF)
     return new PPCTargetELFStreamer(S);
   return new PPCTargetMachOStreamer(S);
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 5f2117c..18818a1 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -29,6 +29,7 @@ class MCObjectWriter;
 class MCRegisterInfo;
 class MCSubtargetInfo;
 class Target;
+class Triple;
 class StringRef;
 class raw_pwrite_stream;
 class raw_ostream;
@@ -42,7 +43,7 @@ MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
                                       MCContext &Ctx);
 
 MCAsmBackend *createPPCAsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                  StringRef TT, StringRef CPU);
+                                  const Triple &TT, StringRef CPU);
 
 /// Construct an PPC ELF object writer.
 MCObjectWriter *createPPCELFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
@@ -80,7 +81,7 @@ static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
   return false;
 }
 
-} // End llvm namespace
+} // namespace llvm
 
 // Generated files will use "namespace PPC". To avoid symbol clash,
 // undefine PPC here. PPC may be predefined on some hosts.
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
index 9d72896..9b5491f 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
@@ -51,7 +51,7 @@ public:
                           FixedValue);
   }
 };
-}
+} // namespace
 
 /// computes the log2 of the size of the relocation,
 /// used for relocation_info::r_length.
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
index 6075631..ff9b059 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -62,7 +62,7 @@ namespace PPC {
   /// Assume the condition register is set by MI(a,b), return the predicate if
   /// we modify the instructions such that condition register is set by MI(b,a).
   Predicate getSwappedPredicate(Predicate Opcode);
-}
-}
+} // namespace PPC
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.h b/contrib/llvm/lib/Target/PowerPC/PPC.h
index ae8d8b4..49f77b5 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPC.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.h
@@ -98,6 +98,6 @@ namespace llvm {
   };
   } // end namespace PPCII
   
-} // end namespace llvm;
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm/lib/Target/PowerPC/PPC.td
index 1a02bcc..641b237 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPC.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.td
@@ -135,9 +135,9 @@ def FeatureInvariantFunctionDescriptors :
                    "Assume function descriptors are invariant">;
 def FeatureHTM : SubtargetFeature<"htm", "HasHTM", "true",
                                   "Enable Hardware Transactional Memory instructions">;
+def FeatureMFTB   : SubtargetFeature<"", "FeatureMFTB", "true",
+                                        "Implement mftb using the mfspr instruction">;
 
-def DeprecatedMFTB   : SubtargetFeature<"", "DeprecatedMFTB", "true",
-                                        "Treat mftb as deprecated">;
 def DeprecatedDST    : SubtargetFeature<"", "DeprecatedDST", "true",
   "Treat vector data stream cache control instructions as deprecated">;
 
@@ -165,7 +165,7 @@ def ProcessorFeatures {
        FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
        Feature64Bit /*, Feature64BitRegs */,
        FeatureBPERMD, FeatureExtDiv,
-       DeprecatedMFTB, DeprecatedDST];
+       FeatureMFTB, DeprecatedDST];
   list<SubtargetFeature> Power8SpecificFeatures =
       [DirectivePwr8, FeatureP8Altivec, FeatureP8Vector, FeatureP8Crypto,
        FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic];
@@ -247,61 +247,75 @@ include "PPCInstrInfo.td"
 // PowerPC processors supported.
 //
 
-def : Processor<"generic", G3Itineraries, [Directive32]>;
+def : Processor<"generic", G3Itineraries, [Directive32, FeatureMFTB]>;
 def : ProcessorModel<"440", PPC440Model, [Directive440, FeatureISEL,
                                           FeatureFRES, FeatureFRSQRTE,
                                           FeatureICBT, FeatureBookE, 
-                                          FeatureMSYNC, DeprecatedMFTB]>;
+                                          FeatureMSYNC, FeatureMFTB]>;
 def : ProcessorModel<"450", PPC440Model, [Directive440, FeatureISEL,
                                           FeatureFRES, FeatureFRSQRTE,
                                           FeatureICBT, FeatureBookE, 
-                                          FeatureMSYNC, DeprecatedMFTB]>;
+                                          FeatureMSYNC, FeatureMFTB]>;
 def : Processor<"601", G3Itineraries, [Directive601]>;
-def : Processor<"602", G3Itineraries, [Directive602]>;
+def : Processor<"602", G3Itineraries, [Directive602,
+                                       FeatureMFTB]>;
 def : Processor<"603", G3Itineraries, [Directive603,
-                                       FeatureFRES, FeatureFRSQRTE]>;
+                                       FeatureFRES, FeatureFRSQRTE,
+                                       FeatureMFTB]>;
 def : Processor<"603e", G3Itineraries, [Directive603,
-                                        FeatureFRES, FeatureFRSQRTE]>;
+                                        FeatureFRES, FeatureFRSQRTE,
+                                        FeatureMFTB]>;
 def : Processor<"603ev", G3Itineraries, [Directive603,
-                                         FeatureFRES, FeatureFRSQRTE]>;
+                                         FeatureFRES, FeatureFRSQRTE,
+                                         FeatureMFTB]>;
 def : Processor<"604", G3Itineraries, [Directive604,
-                                       FeatureFRES, FeatureFRSQRTE]>;
+                                       FeatureFRES, FeatureFRSQRTE,
+                                       FeatureMFTB]>;
 def : Processor<"604e", G3Itineraries, [Directive604,
-                                        FeatureFRES, FeatureFRSQRTE]>;
+                                        FeatureFRES, FeatureFRSQRTE,
+                                        FeatureMFTB]>;
 def : Processor<"620", G3Itineraries, [Directive620,
-                                       FeatureFRES, FeatureFRSQRTE]>;
+                                       FeatureFRES, FeatureFRSQRTE,
+                                       FeatureMFTB]>;
 def : Processor<"750", G4Itineraries, [Directive750,
-                                       FeatureFRES, FeatureFRSQRTE]>;
+                                       FeatureFRES, FeatureFRSQRTE,
+                                       FeatureMFTB]>;
 def : Processor<"g3", G3Itineraries, [Directive750,
-                                      FeatureFRES, FeatureFRSQRTE]>;
+                                      FeatureFRES, FeatureFRSQRTE,
+                                      FeatureMFTB]>;
 def : Processor<"7400", G4Itineraries, [Directive7400, FeatureAltivec,
-                                        FeatureFRES, FeatureFRSQRTE]>;
+                                        FeatureFRES, FeatureFRSQRTE,
+                                        FeatureMFTB]>;
 def : Processor<"g4", G4Itineraries, [Directive7400, FeatureAltivec,
-                                      FeatureFRES, FeatureFRSQRTE]>;
+                                      FeatureFRES, FeatureFRSQRTE,
+                                      FeatureMFTB]>;
 def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec,
-                                            FeatureFRES, FeatureFRSQRTE]>;
+                                            FeatureFRES, FeatureFRSQRTE,
+                                            FeatureMFTB]>;
 def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec,
-                                           FeatureFRES, FeatureFRSQRTE]>;
+                                           FeatureFRES, FeatureFRSQRTE, 
+                                           FeatureMFTB]>;
 
 def : ProcessorModel<"970", G5Model,
                   [Directive970, FeatureAltivec,
                    FeatureMFOCRF, FeatureFSqrt,
                    FeatureFRES, FeatureFRSQRTE, FeatureSTFIWX,
-                   Feature64Bit /*, Feature64BitRegs */]>;
+                   Feature64Bit /*, Feature64BitRegs */,
+                   FeatureMFTB]>;
 def : ProcessorModel<"g5", G5Model,
                   [Directive970, FeatureAltivec,
                    FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
                    FeatureFRES, FeatureFRSQRTE,
                    Feature64Bit /*, Feature64BitRegs */,
-                   DeprecatedMFTB, DeprecatedDST]>;
+                   FeatureMFTB, DeprecatedDST]>;
 def : ProcessorModel<"e500mc", PPCE500mcModel,
                   [DirectiveE500mc, FeatureMFOCRF,
                    FeatureSTFIWX, FeatureICBT, FeatureBookE, 
-                   FeatureISEL, DeprecatedMFTB]>;
+                   FeatureISEL, FeatureMFTB]>;
 def : ProcessorModel<"e5500", PPCE5500Model,
                   [DirectiveE5500, FeatureMFOCRF, Feature64Bit,
                    FeatureSTFIWX, FeatureICBT, FeatureBookE, 
-                   FeatureISEL, DeprecatedMFTB]>;
+                   FeatureISEL, FeatureMFTB]>;
 def : ProcessorModel<"a2", PPCA2Model,
                   [DirectiveA2, FeatureICBT, FeatureBookE, FeatureMFOCRF,
                    FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES,
@@ -309,7 +323,7 @@ def : ProcessorModel<"a2", PPCA2Model,
                    FeatureSTFIWX, FeatureLFIWAX,
                    FeatureFPRND, FeatureFPCVT, FeatureISEL,
                    FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, Feature64Bit
-               /*, Feature64BitRegs */, DeprecatedMFTB]>;
+               /*, Feature64BitRegs */, FeatureMFTB]>;
 def : ProcessorModel<"a2q", PPCA2Model,
                   [DirectiveA2, FeatureICBT, FeatureBookE, FeatureMFOCRF,
                    FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES,
@@ -317,7 +331,7 @@ def : ProcessorModel<"a2q", PPCA2Model,
                    FeatureSTFIWX, FeatureLFIWAX,
                    FeatureFPRND, FeatureFPCVT, FeatureISEL,
                    FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, Feature64Bit
-               /*, Feature64BitRegs */, FeatureQPX, DeprecatedMFTB]>;
+               /*, Feature64BitRegs */, FeatureQPX, FeatureMFTB]>;
 def : ProcessorModel<"pwr3", G5Model,
                   [DirectivePwr3, FeatureAltivec,
                    FeatureFRES, FeatureFRSQRTE, FeatureMFOCRF,
@@ -325,41 +339,42 @@ def : ProcessorModel<"pwr3", G5Model,
 def : ProcessorModel<"pwr4", G5Model,
                   [DirectivePwr4, FeatureAltivec, FeatureMFOCRF,
                    FeatureFSqrt, FeatureFRES, FeatureFRSQRTE,
-                   FeatureSTFIWX, Feature64Bit]>;
+                   FeatureSTFIWX, Feature64Bit, FeatureMFTB]>;
 def : ProcessorModel<"pwr5", G5Model,
                   [DirectivePwr5, FeatureAltivec, FeatureMFOCRF,
                    FeatureFSqrt, FeatureFRE, FeatureFRES,
                    FeatureFRSQRTE, FeatureFRSQRTES,
                    FeatureSTFIWX, Feature64Bit,
-                   DeprecatedMFTB, DeprecatedDST]>;
+                   FeatureMFTB, DeprecatedDST]>;
 def : ProcessorModel<"pwr5x", G5Model,
                   [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
                    FeatureFSqrt, FeatureFRE, FeatureFRES,
                    FeatureFRSQRTE, FeatureFRSQRTES,
                    FeatureSTFIWX, FeatureFPRND, Feature64Bit,
-                   DeprecatedMFTB, DeprecatedDST]>;
+                   FeatureMFTB, DeprecatedDST]>;
 def : ProcessorModel<"pwr6", G5Model,
                   [DirectivePwr6, FeatureAltivec,
                    FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
                    FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
                    FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureCMPB,
                    FeatureFPRND, Feature64Bit /*, Feature64BitRegs */,
-                   DeprecatedMFTB, DeprecatedDST]>;
+                   FeatureMFTB, DeprecatedDST]>;
 def : ProcessorModel<"pwr6x", G5Model,
                   [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
                    FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES,
                    FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
                    FeatureSTFIWX, FeatureLFIWAX, FeatureCMPB,
                    FeatureFPRND, Feature64Bit,
-                   DeprecatedMFTB, DeprecatedDST]>;
+                   FeatureMFTB, DeprecatedDST]>;
 def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.Power7FeatureList>;
 def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.Power8FeatureList>;
-def : Processor<"ppc", G3Itineraries, [Directive32]>;
+def : Processor<"ppc", G3Itineraries, [Directive32, FeatureMFTB]>;
 def : ProcessorModel<"ppc64", G5Model,
                   [Directive64, FeatureAltivec,
                    FeatureMFOCRF, FeatureFSqrt, FeatureFRES,
                    FeatureFRSQRTE, FeatureSTFIWX,
-                   Feature64Bit /*, Feature64BitRegs */]>;
+                   Feature64Bit /*, Feature64BitRegs */,
+                   FeatureMFTB]>;
 def : ProcessorModel<"ppc64le", P8Model, ProcessorFeatures.Power8FeatureList>;
 
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index b42b0f9..87a5236 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -440,7 +440,7 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
 void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   MCInst TmpInst;
   bool isPPC64 = Subtarget->isPPC64();
-  bool isDarwin = Triple(TM.getTargetTriple()).isOSDarwin();
+  bool isDarwin = TM.getTargetTriple().isOSDarwin();
   const Module *M = MF->getFunction()->getParent();
   PICLevel::Level PL = M->getPICLevel();
   
@@ -1276,7 +1276,8 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
   // freed) and since we're at the global level we can use the default
   // constructed subtarget.
   std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
-      TM.getTargetTriple(), TM.getTargetCPU(), TM.getTargetFeatureString()));
+      TM.getTargetTriple().str(), TM.getTargetCPU(),
+      TM.getTargetFeatureString()));
   auto EmitToStreamer = [&STI] (MCStreamer &S, const MCInst &Inst) {
     S.EmitInstruction(Inst, *STI);
   };
@@ -1510,7 +1511,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
 static AsmPrinter *
 createPPCAsmPrinterPass(TargetMachine &tm,
                         std::unique_ptr<MCStreamer> &&Streamer) {
-  if (Triple(tm.getTargetTriple()).isMacOSX())
+  if (tm.getTargetTriple().isMacOSX())
     return new PPCDarwinAsmPrinter(tm, std::move(Streamer));
   return new PPCLinuxAsmPrinter(tm, std::move(Streamer));
 }
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
index 940d55a..2b6030a 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -51,7 +51,7 @@ namespace {
     }
   };
   char PPCBSel::ID = 0;
-}
+} // namespace
 
 INITIALIZE_PASS(PPCBSel, "ppc-branch-select", "PowerPC Branch Selector",
                 false, false)
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
index 69afd68..4161317 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -417,8 +417,8 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
 bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
   bool MadeChange = false;
 
-  Triple TT = Triple(L->getHeader()->getParent()->getParent()->
-                     getTargetTriple());
+  const Triple TT =
+      Triple(L->getHeader()->getParent()->getParent()->getTargetTriple());
   if (!TT.isArch32Bit() && !TT.isArch64Bit())
     return MadeChange; // Unknown arch. type.
 
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.h b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.h
index eb904a8..550cac6 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.h
@@ -29,7 +29,7 @@ inline bool CC_PPC_AnyReg_Error(unsigned &, MVT &, MVT &,
   return false;
 }
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
 
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp b/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
index fc89753..9cd9c2f 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
@@ -191,7 +191,7 @@ public:
       MachineFunctionPass::getAnalysisUsage(AU);
     }
   };
-}
+} // namespace
 
 INITIALIZE_PASS(PPCEarlyReturn, DEBUG_TYPE,
                 "PowerPC Early-Return Creation", false, false)
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
index a561d5b..82ff530 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
@@ -2347,4 +2347,4 @@ namespace llvm {
       return new PPCFastISel(FuncInfo, LibInfo);
     return nullptr;
   }
-}
+} // namespace llvm
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
index 28d074e..b232863 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
@@ -93,6 +93,6 @@ public:
   const SpillSlot *
   getCalleeSavedSpillSlots(unsigned &NumEntries) const override;
 };
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index afc1f36..5f9f9f2 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -234,7 +234,7 @@ private:
 
     SDNode *transferMemOperands(SDNode *N, SDNode *Result);
   };
-}
+} // namespace
 
 /// InsertVRSaveCode - Once the entire function has been instruction selected,
 /// all virtual registers are created and all machine instructions are built,
@@ -1301,12 +1301,9 @@ class BitPermutationSelector {
 
       // Now, remove all groups with this underlying value and rotation
       // factor.
-      for (auto I = BitGroups.begin(); I != BitGroups.end();) {
-        if (I->V == VRI.V && I->RLAmt == VRI.RLAmt)
-          I = BitGroups.erase(I);
-        else
-          ++I;
-      }
+      eraseMatchingBitGroups([VRI](const BitGroup &BG) {
+        return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
+      });
     }
   }
 
@@ -1337,12 +1334,9 @@ class BitPermutationSelector {
       }
 
       // Now, remove all groups with this underlying value and rotation factor.
-      for (auto I = BitGroups.begin(); I != BitGroups.end();) {
-        if (I->V == VRI.V && I->RLAmt == VRI.RLAmt)
-          I = BitGroups.erase(I);
-        else
-          ++I;
-      }
+      eraseMatchingBitGroups([VRI](const BitGroup &BG) {
+        return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
+      });
     }
 
     if (InstCnt) *InstCnt += BitGroups.size();
@@ -1544,7 +1538,7 @@ class BitPermutationSelector {
       // Repl32 true, but are trivially convertable to Repl32 false. Such a
       // group is trivially convertable if it overlaps only with the lower 32
       // bits, and the group has not been coalesced.
-      auto MatchingBG = [VRI](BitGroup &BG) {
+      auto MatchingBG = [VRI](const BitGroup &BG) {
         if (VRI.V != BG.V)
           return false;
 
@@ -1675,12 +1669,7 @@ class BitPermutationSelector {
 
       // Now, remove all groups with this underlying value and rotation
       // factor.
-      for (auto I = BitGroups.begin(); I != BitGroups.end();) {
-        if (MatchingBG(*I))
-          I = BitGroups.erase(I);
-        else
-          ++I;
-      }
+      eraseMatchingBitGroups(MatchingBG);
     }
   }
 
@@ -1740,12 +1729,10 @@ class BitPermutationSelector {
 
       // Now, remove all groups with this underlying value and rotation factor.
       if (Res)
-        for (auto I = BitGroups.begin(); I != BitGroups.end();) {
-          if (I->V == VRI.V && I->RLAmt == VRI.RLAmt && I->Repl32 == VRI.Repl32)
-            I = BitGroups.erase(I);
-          else
-            ++I;
-        }
+        eraseMatchingBitGroups([VRI](const BitGroup &BG) {
+          return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt &&
+                 BG.Repl32 == VRI.Repl32;
+        });
     }
 
     // Because 64-bit rotates are more flexible than inserts, we might have a
@@ -1846,6 +1833,11 @@ class BitPermutationSelector {
     return nullptr;
   }
 
+  void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {
+    BitGroups.erase(std::remove_if(BitGroups.begin(), BitGroups.end(), F),
+                    BitGroups.end());
+  }
+
   SmallVector<ValueBit, 64> Bits;
 
   bool HasZeros;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 2600ee5..1cdfb41 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3765,7 +3765,7 @@ struct TailCallArgumentInfo {
   TailCallArgumentInfo() : FrameIdx(0) {}
 };
 
-}
+} // namespace
 
 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
 static void
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 7fd3f9c..c33d605 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -353,7 +353,7 @@ namespace llvm {
       /// the last operand.
       TOC_ENTRY
     };
-  }
+  } // namespace PPCISD
 
   /// Define some predicates that are used for node matching.
   namespace PPC {
@@ -405,7 +405,7 @@ namespace llvm {
     /// If this is a qvaligni shuffle mask, return the shift
     /// amount, otherwise return -1.
     int isQVALIGNIShuffleMask(SDNode *N);
-  }
+  } // namespace PPC
 
   class PPCTargetLowering : public TargetLowering {
     const PPCSubtarget &Subtarget;
@@ -871,6 +871,6 @@ namespace llvm {
                                            CCValAssign::LocInfo &LocInfo,
                                            ISD::ArgFlagsTy &ArgFlags,
                                            CCState &State);
-}
+} // namespace llvm
 
 #endif   // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index e27bf7f5..9ff604b 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -1142,7 +1142,9 @@ def:Pat<(vpkudum_unary_shuffle v16i8:$vA, undef),
 def:Pat<(vpkudum_swapped_shuffle v16i8:$vA, v16i8:$vB),
         (VPKUDUM $vB, $vA)>;
 
-
+def VGBBD : VX2_Int_Ty2<1292, "vgbbd", int_ppc_altivec_vgbbd, v16i8, v16i8>;
+def VBPERMQ : VX1_Int_Ty2<1356, "vbpermq", int_ppc_altivec_vbpermq,
+                          v2i64, v16i8>;
 } // end HasP8Altivec
 
 // Crypto instructions (from builtins)
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h
index cf71b1c..ec94fa5 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h
@@ -38,6 +38,6 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0,
     return MIB.addFrameIndex(FI).addImm(Offset);
 }
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index b4bb50c..d3bb7a6 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -548,7 +548,7 @@ unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
 unsigned
 PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                            MachineBasicBlock *FBB,
-                           const SmallVectorImpl<MachineOperand> &Cond,
+                           ArrayRef<MachineOperand> Cond,
                            DebugLoc DL) const {
   // Shouldn't be a fall through.
   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
@@ -593,7 +593,7 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
 
 // Select analysis.
 bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
-                const SmallVectorImpl<MachineOperand> &Cond,
+                ArrayRef<MachineOperand> Cond,
                 unsigned TrueReg, unsigned FalseReg,
                 int &CondCycles, int &TrueCycles, int &FalseCycles) const {
   if (!Subtarget.hasISEL())
@@ -634,8 +634,7 @@ bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
 
 void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB,
                                 MachineBasicBlock::iterator MI, DebugLoc dl,
-                                unsigned DestReg,
-                                const SmallVectorImpl<MachineOperand> &Cond,
+                                unsigned DestReg, ArrayRef<MachineOperand> Cond,
                                 unsigned TrueReg, unsigned FalseReg) const {
   assert(Cond.size() == 2 &&
          "PPC branch conditions have two components!");
@@ -1213,9 +1212,8 @@ bool PPCInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
   return !isPredicated(MI);
 }
 
-bool PPCInstrInfo::PredicateInstruction(
-                     MachineInstr *MI,
-                     const SmallVectorImpl<MachineOperand> &Pred) const {
+bool PPCInstrInfo::PredicateInstruction(MachineInstr *MI,
+                                        ArrayRef<MachineOperand> Pred) const {
   unsigned OpC = MI->getOpcode();
   if (OpC == PPC::BLR || OpC == PPC::BLR8) {
     if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
@@ -1306,9 +1304,8 @@ bool PPCInstrInfo::PredicateInstruction(
   return false;
 }
 
-bool PPCInstrInfo::SubsumesPredicate(
-                     const SmallVectorImpl<MachineOperand> &Pred1,
-                     const SmallVectorImpl<MachineOperand> &Pred2) const {
+bool PPCInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
+                                     ArrayRef<MachineOperand> Pred2) const {
   assert(Pred1.size() == 2 && "Invalid PPC first predicate");
   assert(Pred2.size() == 2 && "Invalid PPC second predicate");
 
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 7fd076a..39bf454 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -106,7 +106,7 @@ public:
                                               UseNode, UseIdx);
   }
 
-  bool hasLowDefLatency(const InstrItineraryData *ItinData,
+  bool hasLowDefLatency(const TargetSchedModel &SchedModel,
                         const MachineInstr *DefMI,
                         unsigned DefIdx) const override {
     // Machine LICM should hoist all instructions in low-register-pressure
@@ -141,18 +141,14 @@ public:
                      bool AllowModify) const override;
   unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
   unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
-                        MachineBasicBlock *FBB,
-                        const SmallVectorImpl<MachineOperand> &Cond,
+                        MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
                         DebugLoc DL) const override;
 
   // Select analysis.
-  bool canInsertSelect(const MachineBasicBlock&,
-                       const SmallVectorImpl<MachineOperand> &Cond,
-                       unsigned, unsigned, int&, int&, int&) const override;
-  void insertSelect(MachineBasicBlock &MBB,
-                    MachineBasicBlock::iterator MI, DebugLoc DL,
-                    unsigned DstReg,
-                    const SmallVectorImpl<MachineOperand> &Cond,
+  bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond,
+                       unsigned, unsigned, int &, int &, int &) const override;
+  void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+                    DebugLoc DL, unsigned DstReg, ArrayRef<MachineOperand> Cond,
                     unsigned TrueReg, unsigned FalseReg) const override;
 
   void copyPhysReg(MachineBasicBlock &MBB,
@@ -211,10 +207,10 @@ public:
   bool isUnpredicatedTerminator(const MachineInstr *MI) const override;
 
   bool PredicateInstruction(MachineInstr *MI,
-                    const SmallVectorImpl<MachineOperand> &Pred) const override;
+                            ArrayRef<MachineOperand> Pred) const override;
 
-  bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
-                   const SmallVectorImpl<MachineOperand> &Pred2) const override;
+  bool SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
+                         ArrayRef<MachineOperand> Pred2) const override;
 
   bool DefinesPredicate(MachineInstr *MI,
                         std::vector<MachineOperand> &Pred) const override;
@@ -241,6 +237,6 @@ public:
   void getNoopForMachoTarget(MCInst &NopInst) const override;
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index c5a044c..b50124d 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -2225,7 +2225,7 @@ def MTSPR : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, gprc:$RT),
                       "mtspr $SPR, $RT", IIC_SprMTSPR>;
 
 def MFTB : XFXForm_1<31, 371, (outs gprc:$RT), (ins i32imm:$SPR),
-                     "mftb $RT, $SPR", IIC_SprMFTB>, Deprecated<DeprecatedMFTB>;
+                     "mftb $RT, $SPR", IIC_SprMFTB>;
 
 // A pseudo-instruction used to implement the read of the 64-bit cycle counter
 // on a 32-bit target.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp b/contrib/llvm/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
index b4e1c09..e783b5e 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
@@ -88,7 +88,7 @@ namespace {
     const TargetTransformInfo *TTI;
     const DataLayout *DL;
   };
-}
+} // namespace
 
 char PPCLoopDataPrefetch::ID = 0;
 INITIALIZE_PASS_BEGIN(PPCLoopDataPrefetch, "ppc-loop-data-prefetch",
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
index b6e7799..1891b63 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
@@ -87,7 +87,7 @@ namespace {
     LoopInfo *LI;
     ScalarEvolution *SE;
   };
-}
+} // namespace
 
 char PPCLoopPreIncPrep::ID = 0;
 static const char *name = "Prepare loop for pre-inc. addressing modes";
@@ -113,7 +113,7 @@ namespace {
   protected:
     ScalarEvolution *SE;
   };
-}
+} // namespace
 
 static bool IsPtrInBounds(Value *BasePtr) {
   Value *StrippedBasePtr = BasePtr;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
index 05cb6e1..c44d5d7 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -40,7 +40,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
   Mangler *Mang = AP.Mang;
   const DataLayout *DL = TM.getDataLayout();
   MCContext &Ctx = AP.OutContext;
-  bool isDarwin = Triple(TM.getTargetTriple()).isOSDarwin();
+  bool isDarwin = TM.getTargetTriple().isOSDarwin();
 
   SmallString<128> Name;
   StringRef Suffix;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h
index 2c1378d..d2eaeb4 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h
@@ -26,6 +26,6 @@ public:
   ~PPCSelectionDAGInfo();
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index f313b0a..cf603fe 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -47,7 +47,7 @@ PPCSubtarget &PPCSubtarget::initializeSubtargetDependencies(StringRef CPU,
   return *this;
 }
 
-PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
+PPCSubtarget::PPCSubtarget(const Triple &TT, const std::string &CPU,
                            const std::string &FS, const PPCTargetMachine &TM)
     : PPCGenSubtargetInfo(TT, CPU, FS), TargetTriple(TT),
       IsPPC64(TargetTriple.getArch() == Triple::ppc64 ||
@@ -91,7 +91,7 @@ void PPCSubtarget::initializeEnvironment() {
   IsPPC4xx = false;
   IsPPC6xx = false;
   IsE500 = false;
-  DeprecatedMFTB = false;
+  FeatureMFTB = false;
   DeprecatedDST = false;
   HasLazyResolverStubs = false;
   HasICBT = false;
@@ -175,7 +175,7 @@ bool PPCSubtarget::enableMachineScheduler() const {
 }
 
 // This overrides the PostRAScheduler bit in the SchedModel for each CPU.
-bool PPCSubtarget::enablePostMachineScheduler() const { return true; }
+bool PPCSubtarget::enablePostRAScheduler() const { return true; }
 
 PPCGenSubtargetInfo::AntiDepBreakMode PPCSubtarget::getAntiDepBreakMode() const {
   return TargetSubtargetInfo::ANTIDEP_ALL;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
index 8d95508..ea17e1c 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -58,7 +58,7 @@ namespace PPC {
     DIR_PWR8,
     DIR_64
   };
-}
+} // namespace PPC
 
 class GlobalValue;
 class TargetMachine;
@@ -110,7 +110,7 @@ protected:
   bool IsE500;
   bool IsPPC4xx;
   bool IsPPC6xx;
-  bool DeprecatedMFTB;
+  bool FeatureMFTB;
   bool DeprecatedDST;
   bool HasLazyResolverStubs;
   bool IsLittleEndian;
@@ -135,8 +135,8 @@ public:
   /// This constructor initializes the data members to match that
   /// of the specified triple.
   ///
-  PPCSubtarget(const std::string &TT, const std::string &CPU,
-               const std::string &FS, const PPCTargetMachine &TM);
+  PPCSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS,
+               const PPCTargetMachine &TM);
 
   /// ParseSubtargetFeatures - Parses features string setting specified
   /// subtarget options.  Definition of function is auto generated by tblgen.
@@ -237,7 +237,7 @@ public:
   bool isPPC4xx() const { return IsPPC4xx; }
   bool isPPC6xx() const { return IsPPC6xx; }
   bool isE500() const { return IsE500; }
-  bool isDeprecatedMFTB() const { return DeprecatedMFTB; }
+  bool isFeatureMFTB() const { return FeatureMFTB; }
   bool isDeprecatedDST() const { return DeprecatedDST; }
   bool hasICBT() const { return HasICBT; }
   bool hasInvariantFunctionDescriptors() const {
@@ -274,7 +274,7 @@ public:
   // Scheduling customization.
   bool enableMachineScheduler() const override;
   // This overrides the PostRAScheduler bit in the SchedModel for each CPU.
-  bool enablePostMachineScheduler() const override;
+  bool enablePostRAScheduler() const override;
   AntiDepBreakMode getAntiDepBreakMode() const override;
   void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const override;
 
@@ -286,6 +286,6 @@ public:
 
   bool enableSubRegLiveness() const override;
 };
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 2dc0d82..7a9db0f 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -156,7 +156,7 @@ public:
       MachineFunctionPass::getAnalysisUsage(AU);
     }
   };
-}
+} // namespace
 
 INITIALIZE_PASS_BEGIN(PPCTLSDynamicCall, DEBUG_TYPE,
                       "PowerPC TLS Dynamic Call Fixup", false, false)
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp
index bf165c9..61b963f 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp
@@ -145,7 +145,7 @@ public:
       MachineFunctionPass::getAnalysisUsage(AU);
     }
   };
-}
+} // namespace
 
 INITIALIZE_PASS(PPCTOCRegDeps, DEBUG_TYPE,
                 "PowerPC TOC Register Dependencies", false, false)
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 50d4395..074bc87 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -98,13 +98,12 @@ static std::string getDataLayoutString(const Triple &T) {
   return Ret;
 }
 
-static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL, StringRef TT) {
+static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL,
+                                      const Triple &TT) {
   std::string FullFS = FS;
-  Triple TargetTriple(TT);
 
   // Make sure 64-bit features are available when CPUname is generic
-  if (TargetTriple.getArch() == Triple::ppc64 ||
-      TargetTriple.getArch() == Triple::ppc64le) {
+  if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le) {
     if (!FullFS.empty())
       FullFS = "+64bit," + FullFS;
     else
@@ -165,14 +164,15 @@ static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
 // with what are (currently) non-function specific overrides as it goes into the
 // LLVMTargetMachine constructor and then using the stored value in the
 // Subtarget constructor below it.
-PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, StringRef CPU,
-                                   StringRef FS, const TargetOptions &Options,
+PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT,
+                                   StringRef CPU, StringRef FS,
+                                   const TargetOptions &Options,
                                    Reloc::Model RM, CodeModel::Model CM,
                                    CodeGenOpt::Level OL)
-    : LLVMTargetMachine(T, getDataLayoutString(Triple(TT)), TT, CPU,
+    : LLVMTargetMachine(T, getDataLayoutString(TT), TT, CPU,
                         computeFSAdditions(FS, OL, TT), Options, RM, CM, OL),
-      TLOF(createTLOF(Triple(getTargetTriple()))),
-      TargetABI(computeTargetABI(Triple(TT), Options)) {
+      TLOF(createTLOF(getTargetTriple())),
+      TargetABI(computeTargetABI(TT, Options)) {
   initAsmInfo();
 }
 
@@ -180,23 +180,21 @@ PPCTargetMachine::~PPCTargetMachine() {}
 
 void PPC32TargetMachine::anchor() { }
 
-PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT,
+PPC32TargetMachine::PPC32TargetMachine(const Target &T, const Triple &TT,
                                        StringRef CPU, StringRef FS,
                                        const TargetOptions &Options,
                                        Reloc::Model RM, CodeModel::Model CM,
                                        CodeGenOpt::Level OL)
-  : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
-}
+    : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
 
 void PPC64TargetMachine::anchor() { }
 
-PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT,
-                                       StringRef CPU,  StringRef FS,
+PPC64TargetMachine::PPC64TargetMachine(const Target &T, const Triple &TT,
+                                       StringRef CPU, StringRef FS,
                                        const TargetOptions &Options,
                                        Reloc::Model RM, CodeModel::Model CM,
                                        CodeGenOpt::Level OL)
-  : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
-}
+    : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
 
 const PPCSubtarget *
 PPCTargetMachine::getSubtargetImpl(const Function &F) const {
@@ -264,9 +262,8 @@ void PPCPassConfig::addIRPasses() {
 
   // For the BG/Q (or if explicitly requested), add explicit data prefetch
   // intrinsics.
-  bool UsePrefetching =
-    Triple(TM->getTargetTriple()).getVendor() == Triple::BGQ &&           
-    getOptLevel() != CodeGenOpt::None;
+  bool UsePrefetching = TM->getTargetTriple().getVendor() == Triple::BGQ &&
+                        getOptLevel() != CodeGenOpt::None;
   if (EnablePrefetch.getNumOccurrences() > 0)
     UsePrefetching = EnablePrefetch;
   if (UsePrefetching)
@@ -320,7 +317,7 @@ void PPCPassConfig::addMachineSSAOptimization() {
   TargetPassConfig::addMachineSSAOptimization();
   // For little endian, remove where possible the vector swap instructions
   // introduced at code generation to normalize vector element order.
-  if (Triple(TM->getTargetTriple()).getArch() == Triple::ppc64le &&
+  if (TM->getTargetTriple().getArch() == Triple::ppc64le &&
       !DisableVSXSwapRemoval)
     addPass(createPPCVSXSwapRemovalPass());
 }
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h
index 7a49058..5c0f7e6 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h
@@ -32,8 +32,8 @@ private:
   mutable StringMap<std::unique_ptr<PPCSubtarget>> SubtargetMap;
 
 public:
-  PPCTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
-                   const TargetOptions &Options, Reloc::Model RM,
+  PPCTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+                   StringRef FS, const TargetOptions &Options, Reloc::Model RM,
                    CodeModel::Model CM, CodeGenOpt::Level OL);
 
   ~PPCTargetMachine() override;
@@ -50,7 +50,7 @@ public:
   }
   bool isELFv2ABI() const { return TargetABI == PPC_ABI_ELFv2; }
   bool isPPC64() const {
-    Triple TT(getTargetTriple());
+    const Triple &TT = getTargetTriple();
     return (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le);
   };
 };
@@ -60,8 +60,8 @@ public:
 class PPC32TargetMachine : public PPCTargetMachine {
   virtual void anchor();
 public:
-  PPC32TargetMachine(const Target &T, StringRef TT,
-                     StringRef CPU, StringRef FS, const TargetOptions &Options,
+  PPC32TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+                     StringRef FS, const TargetOptions &Options,
                      Reloc::Model RM, CodeModel::Model CM,
                      CodeGenOpt::Level OL);
 };
@@ -71,8 +71,8 @@ public:
 class PPC64TargetMachine : public PPCTargetMachine {
   virtual void anchor();
 public:
-  PPC64TargetMachine(const Target &T, StringRef TT,
-                     StringRef CPU, StringRef FS, const TargetOptions &Options,
+  PPC64TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+                     StringRef FS, const TargetOptions &Options,
                      Reloc::Model RM, CodeModel::Model CM,
                      CodeGenOpt::Level OL);
 };
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h
index dbe7617..a5c4c23 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h
@@ -22,6 +22,6 @@ public:
   virtual void emitAbiVersion(int AbiVersion) = 0;
   virtual void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) = 0;
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
index 5e3ae2a..537db65 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
@@ -165,7 +165,7 @@ public:
       MachineFunctionPass::getAnalysisUsage(AU);
     }
   };
-}
+} // namespace
 
 INITIALIZE_PASS(PPCVSXCopy, DEBUG_TYPE,
                 "PowerPC VSX Copy Legalization", false, false)
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
index f352fa6..a029ddf 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
@@ -317,7 +317,7 @@ public:
       MachineFunctionPass::getAnalysisUsage(AU);
     }
   };
-}
+} // namespace
 
 INITIALIZE_PASS_BEGIN(PPCVSXFMAMutate, DEBUG_TYPE,
                       "PowerPC VSX FMA Mutation", false, false)
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
index e238669..939293a 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -809,7 +809,7 @@ void PPCVSXSwapRemoval::dumpSwapVector() {
   DEBUG(dbgs() << "\n");
 }
 
-} // end default namespace
+} // namespace
 
 INITIALIZE_PASS_BEGIN(PPCVSXSwapRemoval, DEBUG_TYPE,
                       "PowerPC VSX Swap Removal", false, false)
diff --git a/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index 6b3b51a..4a33f7f 100644
--- a/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -76,7 +76,9 @@ class SparcAsmParser : public MCTargetAsmParser {
   bool matchSparcAsmModifiers(const MCExpr *&EVal, SMLoc &EndLoc);
   bool parseDirectiveWord(unsigned Size, SMLoc L);
 
-  bool is64Bit() const { return STI.getTargetTriple().startswith("sparcv9"); }
+  bool is64Bit() const {
+    return STI.getTargetTriple().getArchName().startswith("sparcv9");
+  }
 
   void expandSET(MCInst &Inst, SMLoc IDLoc,
                  SmallVectorImpl<MCInst> &Instructions);
@@ -945,6 +947,8 @@ bool SparcAsmParser::matchRegisterName(const AsmToken &Tok,
   return false;
 }
 
+// Determine if an expression contains a reference to the symbol
+// "_GLOBAL_OFFSET_TABLE_".
 static bool hasGOTReference(const MCExpr *Expr) {
   switch (Expr->getKind()) {
   case MCExpr::Target:
@@ -996,6 +1000,13 @@ bool SparcAsmParser::matchSparcAsmModifiers(const MCExpr *&EVal,
 
   bool isPIC = getContext().getObjectFileInfo()->getRelocM() == Reloc::PIC_;
 
+  // Ugly: if a sparc assembly expression says "%hi(...)" but the
+  // expression within contains _GLOBAL_OFFSET_TABLE_, it REALLY means
+  // %pc22. Same with %lo -> %pc10. Worse, if it doesn't contain that,
+  // the meaning depends on whether the assembler was invoked with
+  // -KPIC or not: if so, it really means %got22/%got10; if not, it
+  // actually means what it said! Sigh, historical mistakes...
+
   switch(VK) {
   default: break;
   case SparcMCExpr::VK_Sparc_LO:
diff --git a/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
index 3e56b9e..59f011a 100644
--- a/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
+++ b/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
@@ -41,7 +41,7 @@ public:
                               raw_ostream &VStream,
                               raw_ostream &CStream) const override;
 };
-}
+} // namespace
 
 namespace llvm {
 extern Target TheSparcTarget, TheSparcV9Target, TheSparcelTarget;
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index 9388527..d1d7aaa 100644
--- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -297,10 +297,8 @@ namespace {
 
 } // end anonymous namespace
 
-
 MCAsmBackend *llvm::createSparcAsmBackend(const Target &T,
                                           const MCRegisterInfo &MRI,
-                                          StringRef TT,
-                                          StringRef CPU) {
-  return new ELFSparcAsmBackend(T, Triple(TT).getOS());
+                                          const Triple &TT, StringRef CPU) {
+  return new ELFSparcAsmBackend(T, TT.getOS());
 }
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
index 4f07ae2..800a5f2 100644
--- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
@@ -31,8 +31,12 @@ namespace {
   protected:
     unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
                           bool IsPCRel) const override;
+
+    bool needsRelocateWithSymbol(const MCSymbol &Sym,
+                                 unsigned Type) const override;
+
   };
-}
+} // namespace
 
 unsigned SparcELFObjectWriter::GetRelocType(const MCValue &Target,
                                             const MCFixup &Fixup,
@@ -105,6 +109,27 @@ unsigned SparcELFObjectWriter::GetRelocType(const MCValue &Target,
   return ELF::R_SPARC_NONE;
 }
 
+bool SparcELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
+                                                 unsigned Type) const {
+  switch (Type) {
+    default:
+      return false;
+
+    // All relocations that use a GOT need a symbol, not an offset, as
+    // the offset of the symbol within the section is irrelevant to
+    // where the GOT entry is. Don't need to list all the TLS entries,
+    // as they're all marked as requiring a symbol anyways.
+    case ELF::R_SPARC_GOT10:
+    case ELF::R_SPARC_GOT13:
+    case ELF::R_SPARC_GOT22:
+    case ELF::R_SPARC_GOTDATA_HIX22:
+    case ELF::R_SPARC_GOTDATA_LOX10:
+    case ELF::R_SPARC_GOTDATA_OP_HIX22:
+    case ELF::R_SPARC_GOTDATA_OP_LOX10:
+      return true;
+  }
+}
+
 MCObjectWriter *llvm::createSparcELFObjectWriter(raw_pwrite_stream &OS,
                                                  bool Is64Bit,
                                                  bool IsLittleEndian,
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h
index 8d79396..34c58da 100644
--- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h
@@ -91,7 +91,7 @@ namespace llvm {
       LastTargetFixupKind,
       NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
     };
-  }
-}
+  } // namespace Sparc
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
index d34c879..91d2eee 100644
--- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -63,12 +63,11 @@ static MCRegisterInfo *createSparcMCRegisterInfo(StringRef TT) {
   return X;
 }
 
-static MCSubtargetInfo *createSparcMCSubtargetInfo(StringRef TT, StringRef CPU,
-                                                   StringRef FS) {
+static MCSubtargetInfo *
+createSparcMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
   MCSubtargetInfo *X = new MCSubtargetInfo();
-  Triple TheTriple(TT);
   if (CPU.empty())
-    CPU = (TheTriple.getArch() == Triple::sparcv9) ? "v9" : "v8";
+    CPU = (TT.getArch() == Triple::sparcv9) ? "v9" : "v8";
   InitSparcMCSubtargetInfo(X, TT, CPU, FS);
   return X;
 }
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
index 28e2119..8f62de4 100644
--- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
@@ -25,6 +25,7 @@ class MCObjectWriter;
 class MCRegisterInfo;
 class MCSubtargetInfo;
 class Target;
+class Triple;
 class StringRef;
 class raw_pwrite_stream;
 class raw_ostream;
@@ -37,10 +38,10 @@ MCCodeEmitter *createSparcMCCodeEmitter(const MCInstrInfo &MCII,
                                         const MCRegisterInfo &MRI,
                                         MCContext &Ctx);
 MCAsmBackend *createSparcAsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                    StringRef TT, StringRef CPU);
+                                    const Triple &TT, StringRef CPU);
 MCObjectWriter *createSparcELFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
                                            bool IsLIttleEndian, uint8_t OSABI);
-} // End llvm namespace
+} // namespace llvm
 
 // Defines symbolic names for Sparc registers.  This defines a mapping from
 // register name to register number.
diff --git a/contrib/llvm/lib/Target/Sparc/Sparc.h b/contrib/llvm/lib/Target/Sparc/Sparc.h
index 96378d5..133af86 100644
--- a/contrib/llvm/lib/Target/Sparc/Sparc.h
+++ b/contrib/llvm/lib/Target/Sparc/Sparc.h
@@ -33,7 +33,7 @@ namespace llvm {
   void LowerSparcMachineInstrToMCInst(const MachineInstr *MI,
                                       MCInst &OutMI,
                                       AsmPrinter &AP);
-} // end namespace llvm;
+} // namespace llvm
 
 namespace llvm {
   // Enums corresponding to Sparc condition codes, both icc's and fcc's.  These
@@ -74,7 +74,7 @@ namespace llvm {
       FCC_ULE = 14+16,  // Unordered or Less or Equal
       FCC_O   = 15+16   // Ordered
     };
-  }
+  } // namespace SPCC
 
   inline static const char *SPARCCondCodeToString(SPCC::CondCodes CC) {
     switch (CC) {
diff --git a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h
index bb3b788..3d73bbd 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h
@@ -55,6 +55,6 @@ private:
 
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
index b6bc3d2..a4b9c79 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
@@ -49,7 +49,7 @@ namespace llvm {
       TLS_LD,
       TLS_CALL
     };
-  }
+  } // namespace SPISD
 
   class SparcTargetLowering : public TargetLowering {
     const SparcSubtarget *Subtarget;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
index 4b70f16..f87cee4 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -229,7 +229,7 @@ bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
 unsigned
 SparcInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
                              MachineBasicBlock *FBB,
-                             const SmallVectorImpl<MachineOperand> &Cond,
+                             ArrayRef<MachineOperand> Cond,
                              DebugLoc DL) const {
   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
   assert((Cond.size() == 1 || Cond.size() == 0) &&
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h
index 6e08418..b59dd89 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h
@@ -73,8 +73,7 @@ public:
   unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
 
   unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
-                        MachineBasicBlock *FBB,
-                        const SmallVectorImpl<MachineOperand> &Cond,
+                        MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
                         DebugLoc DL) const override;
 
   void copyPhysReg(MachineBasicBlock &MBB,
@@ -97,6 +96,6 @@ public:
   unsigned getGlobalBaseReg(MachineFunction *MF) const;
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h b/contrib/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h
index 1047442..0471443 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h
@@ -51,6 +51,6 @@ namespace llvm {
     void setLeafProc(bool rhs) { IsLeafProc = rhs; }
     bool isLeafProc() const { return IsLeafProc; }
   };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.h b/contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.h
index 6818291..2ceae82 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.h
@@ -26,6 +26,6 @@ public:
   ~SparcSelectionDAGInfo() override;
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp
index ce1105f..479b25d 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp
@@ -49,7 +49,7 @@ SparcSubtarget &SparcSubtarget::initializeSubtargetDependencies(StringRef CPU,
   return *this;
 }
 
-SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &CPU,
+SparcSubtarget::SparcSubtarget(const Triple &TT, const std::string &CPU,
                                const std::string &FS, TargetMachine &TM,
                                bool is64Bit)
     : SparcGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit),
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
index e6cf460..983b119 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
@@ -43,7 +43,7 @@ class SparcSubtarget : public SparcGenSubtargetInfo {
   SparcFrameLowering FrameLowering;
 
 public:
-  SparcSubtarget(const std::string &TT, const std::string &CPU,
+  SparcSubtarget(const Triple &TT, const std::string &CPU,
                  const std::string &FS, TargetMachine &TM, bool is64bit);
 
   const SparcInstrInfo *getInstrInfo() const override { return &InstrInfo; }
diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
index d43cd9e..725d7f0 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -54,13 +54,13 @@ static std::string computeDataLayout(const Triple &T, bool is64Bit) {
 
 /// SparcTargetMachine ctor - Create an ILP32 architecture model
 ///
-SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT,
+SparcTargetMachine::SparcTargetMachine(const Target &T, const Triple &TT,
                                        StringRef CPU, StringRef FS,
                                        const TargetOptions &Options,
                                        Reloc::Model RM, CodeModel::Model CM,
                                        CodeGenOpt::Level OL, bool is64bit)
-    : LLVMTargetMachine(T, computeDataLayout(Triple(TT), is64bit), TT, CPU, FS,
-                        Options, RM, CM, OL),
+    : LLVMTargetMachine(T, computeDataLayout(TT, is64bit), TT, CPU, FS, Options,
+                        RM, CM, OL),
       TLOF(make_unique<SparcELFTargetObjectFile>()),
       Subtarget(TT, CPU, FS, *this, is64bit) {
   initAsmInfo();
@@ -106,19 +106,16 @@ void SparcPassConfig::addPreEmitPass(){
 
 void SparcV8TargetMachine::anchor() { }
 
-SparcV8TargetMachine::SparcV8TargetMachine(const Target &T,
-                                           StringRef TT, StringRef CPU,
-                                           StringRef FS,
+SparcV8TargetMachine::SparcV8TargetMachine(const Target &T, const Triple &TT,
+                                           StringRef CPU, StringRef FS,
                                            const TargetOptions &Options,
-                                           Reloc::Model RM,
-                                           CodeModel::Model CM,
+                                           Reloc::Model RM, CodeModel::Model CM,
                                            CodeGenOpt::Level OL)
-  : SparcTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
-}
+    : SparcTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
 
 void SparcV9TargetMachine::anchor() { }
 
-SparcV9TargetMachine::SparcV9TargetMachine(const Target &T, StringRef TT,
+SparcV9TargetMachine::SparcV9TargetMachine(const Target &T, const Triple &TT,
                                            StringRef CPU, StringRef FS,
                                            const TargetOptions &Options,
                                            Reloc::Model RM, CodeModel::Model CM,
@@ -127,7 +124,7 @@ SparcV9TargetMachine::SparcV9TargetMachine(const Target &T, StringRef TT,
 
 void SparcelTargetMachine::anchor() {}
 
-SparcelTargetMachine::SparcelTargetMachine(const Target &T, StringRef TT,
+SparcelTargetMachine::SparcelTargetMachine(const Target &T, const Triple &TT,
                                            StringRef CPU, StringRef FS,
                                            const TargetOptions &Options,
                                            Reloc::Model RM, CodeModel::Model CM,
diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h
index fd05b8c..903c2d1 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h
@@ -24,10 +24,10 @@ class SparcTargetMachine : public LLVMTargetMachine {
   std::unique_ptr<TargetLoweringObjectFile> TLOF;
   SparcSubtarget Subtarget;
 public:
-  SparcTargetMachine(const Target &T, StringRef TT,
-                     StringRef CPU, StringRef FS, const TargetOptions &Options,
-                     Reloc::Model RM, CodeModel::Model CM,
-                     CodeGenOpt::Level OL, bool is64bit);
+  SparcTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+                     StringRef FS, const TargetOptions &Options,
+                     Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL,
+                     bool is64bit);
   ~SparcTargetMachine() override;
 
   const SparcSubtarget *getSubtargetImpl(const Function &) const override {
@@ -46,9 +46,8 @@ public:
 class SparcV8TargetMachine : public SparcTargetMachine {
   virtual void anchor();
 public:
-  SparcV8TargetMachine(const Target &T, StringRef TT,
-                       StringRef CPU, StringRef FS,
-                       const TargetOptions &Options,
+  SparcV8TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+                       StringRef FS, const TargetOptions &Options,
                        Reloc::Model RM, CodeModel::Model CM,
                        CodeGenOpt::Level OL);
 };
@@ -58,7 +57,7 @@ public:
 class SparcV9TargetMachine : public SparcTargetMachine {
   virtual void anchor();
 public:
-  SparcV9TargetMachine(const Target &T, StringRef TT, StringRef CPU,
+  SparcV9TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
                        StringRef FS, const TargetOptions &Options,
                        Reloc::Model RM, CodeModel::Model CM,
                        CodeGenOpt::Level OL);
@@ -68,7 +67,7 @@ class SparcelTargetMachine : public SparcTargetMachine {
   virtual void anchor();
 
 public:
-  SparcelTargetMachine(const Target &T, StringRef TT, StringRef CPU,
+  SparcelTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
                        StringRef FS, const TargetOptions &Options,
                        Reloc::Model RM, CodeModel::Model CM,
                        CodeGenOpt::Level OL);
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
index 0e8a680..57eebe1 100644
--- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -111,7 +111,7 @@ bool SystemZMCAsmBackend::writeNopData(uint64_t Count,
 
 MCAsmBackend *llvm::createSystemZMCAsmBackend(const Target &T,
                                               const MCRegisterInfo &MRI,
-                                              StringRef TT, StringRef CPU) {
-  uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS());
+                                              const Triple &TT, StringRef CPU) {
+  uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
   return new SystemZMCAsmBackend(OSABI);
 }
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index 92681cf..8188210 100644
--- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -154,9 +154,8 @@ static MCRegisterInfo *createSystemZMCRegisterInfo(StringRef TT) {
   return X;
 }
 
-static MCSubtargetInfo *createSystemZMCSubtargetInfo(StringRef TT,
-                                                     StringRef CPU,
-                                                     StringRef FS) {
+static MCSubtargetInfo *
+createSystemZMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
   MCSubtargetInfo *X = new MCSubtargetInfo();
   InitSystemZMCSubtargetInfo(X, TT, CPU, FS);
   return X;
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
index 36ea750..0db48fe 100644
--- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
@@ -23,6 +23,7 @@ class MCRegisterInfo;
 class MCSubtargetInfo;
 class StringRef;
 class Target;
+class Triple;
 class raw_pwrite_stream;
 class raw_ostream;
 
@@ -84,7 +85,7 @@ MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
 
 MCAsmBackend *createSystemZMCAsmBackend(const Target &T,
                                         const MCRegisterInfo &MRI,
-                                        StringRef TT, StringRef CPU);
+                                        const Triple &TT, StringRef CPU);
 
 MCObjectWriter *createSystemZObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI);
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 6399293..0eb3d65 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -1113,8 +1113,8 @@ bool SystemZDAGToDAGISel::canUseBlockOperation(StoreSDNode *Store,
   if (V1 == V2 && End1 == End2)
     return false;
 
-  return !AA->alias(AliasAnalysis::Location(V1, End1, Load->getAAInfo()),
-                    AliasAnalysis::Location(V2, End2, Store->getAAInfo()));
+  return !AA->alias(MemoryLocation(V1, End1, Load->getAAInfo()),
+                    MemoryLocation(V2, End2, Store->getAAInfo()));
 }
 
 bool SystemZDAGToDAGISel::storeLoadCanUseMVC(SDNode *N) const {
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 91e12c2..7584579 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -3292,7 +3292,7 @@ struct Permute {
   unsigned Operand;
   unsigned char Bytes[SystemZ::VectorBytes];
 };
-}
+} // namespace
 
 static const Permute PermuteForms[] = {
   // VMRHG
@@ -3574,7 +3574,7 @@ struct GeneralShuffle {
   // The type of the shuffle result.
   EVT VT;
 };
-}
+} // namespace
 
 // Add an extra undefined element to the shuffle.
 void GeneralShuffle::addUndef() {
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 4346850..5d4a34f 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -362,7 +362,7 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
 unsigned
 SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                                MachineBasicBlock *FBB,
-                               const SmallVectorImpl<MachineOperand> &Cond,
+                               ArrayRef<MachineOperand> Cond,
                                DebugLoc DL) const {
   // In this function we output 32-bit branches, which should always
   // have enough range.  They can be shortened and relaxed by later code
@@ -530,8 +530,7 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB,
 }
 
 bool SystemZInstrInfo::
-PredicateInstruction(MachineInstr *MI,
-                     const SmallVectorImpl<MachineOperand> &Pred) const {
+PredicateInstruction(MachineInstr *MI, ArrayRef<MachineOperand> Pred) const {
   assert(Pred.size() == 2 && "Invalid condition");
   unsigned CCValid = Pred[0].getImm();
   unsigned CCMask = Pred[1].getImm();
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
index e47f2ee..31c9db2 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -149,8 +149,7 @@ public:
                      bool AllowModify) const override;
   unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
   unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
-                        MachineBasicBlock *FBB,
-                        const SmallVectorImpl<MachineOperand> &Cond,
+                        MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
                         DebugLoc DL) const override;
   bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
                       unsigned &SrcReg2, int &Mask, int &Value) const override;
@@ -167,8 +166,7 @@ public:
                            unsigned NumCyclesF, unsigned ExtraPredCyclesF,
                            const BranchProbability &Probability) const override;
   bool PredicateInstruction(MachineInstr *MI,
-                            const SmallVectorImpl<MachineOperand> &Pred) const
-    override;
+                            ArrayRef<MachineOperand> Pred) const override;
   void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
                    DebugLoc DL, unsigned DestReg, unsigned SrcReg,
                    bool KillSrc) const override;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
index 05aede3..eb5e5c0 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -32,8 +32,7 @@ SystemZSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
   return *this;
 }
 
-SystemZSubtarget::SystemZSubtarget(const std::string &TT,
-                                   const std::string &CPU,
+SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU,
                                    const std::string &FS,
                                    const TargetMachine &TM)
     : SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false),
@@ -41,9 +40,9 @@ SystemZSubtarget::SystemZSubtarget(const std::string &TT,
       HasPopulationCount(false), HasFastSerialization(false),
       HasInterlockedAccess1(false), HasMiscellaneousExtensions(false),
       HasTransactionalExecution(false), HasProcessorAssist(false),
-      HasVector(false),
-      TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
-      TLInfo(TM, *this), TSInfo(*TM.getDataLayout()), FrameLowering() {}
+      HasVector(false), TargetTriple(TT),
+      InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
+      TSInfo(*TM.getDataLayout()), FrameLowering() {}
 
 // Return true if GV binds locally under reloc model RM.
 static bool bindsLocally(const GlobalValue *GV, Reloc::Model RM) {
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h
index 9a1f593..f7eaf01c 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h
@@ -56,7 +56,7 @@ private:
   SystemZSubtarget &initializeSubtargetDependencies(StringRef CPU,
                                                     StringRef FS);
 public:
-  SystemZSubtarget(const std::string &TT, const std::string &CPU,
+  SystemZSubtarget(const Triple &TT, const std::string &CPU,
                    const std::string &FS, const TargetMachine &TM);
 
   const TargetFrameLowering *getFrameLowering() const override {
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
index a34cdaf..00cbbd1 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -43,9 +43,8 @@ static bool UsesVectorABI(StringRef CPU, StringRef FS) {
   return VectorABI;
 }
 
-static std::string computeDataLayout(StringRef TT, StringRef CPU,
+static std::string computeDataLayout(const Triple &TT, StringRef CPU,
                                      StringRef FS) {
-  const Triple Triple(TT);
   bool VectorABI = UsesVectorABI(CPU, FS);
   std::string Ret = "";
 
@@ -53,7 +52,7 @@ static std::string computeDataLayout(StringRef TT, StringRef CPU,
   Ret += "E";
 
   // Data mangling.
-  Ret += DataLayout::getManglingComponent(Triple);
+  Ret += DataLayout::getManglingComponent(TT);
 
   // Make sure that global data has at least 16 bits of alignment by
   // default, so that we can refer to it using LARL.  We don't have any
@@ -79,13 +78,13 @@ static std::string computeDataLayout(StringRef TT, StringRef CPU,
   return Ret;
 }
 
-SystemZTargetMachine::SystemZTargetMachine(const Target &T, StringRef TT,
+SystemZTargetMachine::SystemZTargetMachine(const Target &T, const Triple &TT,
                                            StringRef CPU, StringRef FS,
                                            const TargetOptions &Options,
                                            Reloc::Model RM, CodeModel::Model CM,
                                            CodeGenOpt::Level OL)
-    : LLVMTargetMachine(T, computeDataLayout(TT, CPU, FS),
-                        TT, CPU, FS, Options, RM, CM, OL),
+    : LLVMTargetMachine(T, computeDataLayout(TT, CPU, FS), TT, CPU, FS, Options,
+                        RM, CM, OL),
       TLOF(make_unique<TargetLoweringObjectFileELF>()),
       Subtarget(TT, CPU, FS, *this) {
   initAsmInfo();
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
index 5ded07c..0a81e1f 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -27,7 +27,7 @@ class SystemZTargetMachine : public LLVMTargetMachine {
   SystemZSubtarget        Subtarget;
 
 public:
-  SystemZTargetMachine(const Target &T, StringRef TT, StringRef CPU,
+  SystemZTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
                        StringRef FS, const TargetOptions &Options,
                        Reloc::Model RM, CodeModel::Model CM,
                        CodeGenOpt::Level OL);
diff --git a/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
index d498bb1..19b5e2a 100644
--- a/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
+++ b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
@@ -44,8 +44,8 @@ void TargetLoweringObjectFile::Initialize(MCContext &ctx,
                                           const TargetMachine &TM) {
   Ctx = &ctx;
   DL = TM.getDataLayout();
-  InitMCObjectFileInfo(TM.getTargetTriple(),
-                       TM.getRelocationModel(), TM.getCodeModel(), *Ctx);
+  InitMCObjectFileInfo(TM.getTargetTriple(), TM.getRelocationModel(),
+                       TM.getCodeModel(), *Ctx);
 }
 
 TargetLoweringObjectFile::~TargetLoweringObjectFile() {
diff --git a/contrib/llvm/lib/Target/TargetMachine.cpp b/contrib/llvm/lib/Target/TargetMachine.cpp
index 2824250..0b05303 100644
--- a/contrib/llvm/lib/Target/TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/TargetMachine.cpp
@@ -38,7 +38,7 @@ using namespace llvm;
 //
 
 TargetMachine::TargetMachine(const Target &T, StringRef DataLayoutString,
-                             StringRef TT, StringRef CPU, StringRef FS,
+                             const Triple &TT, StringRef CPU, StringRef FS,
                              const TargetOptions &Options)
     : TheTarget(T), DL(DataLayoutString), TargetTriple(TT), TargetCPU(CPU),
       TargetFS(FS), CodeGenInfo(nullptr), AsmInfo(nullptr), MRI(nullptr),
@@ -70,7 +70,6 @@ void TargetMachine::resetTargetOptions(const Function &F) const {
   RESET_OPTION(UnsafeFPMath, "unsafe-fp-math");
   RESET_OPTION(NoInfsFPMath, "no-infs-fp-math");
   RESET_OPTION(NoNaNsFPMath, "no-nans-fp-math");
-  RESET_OPTION(DisableTailCalls, "disable-tail-calls");
 }
 
 /// getRelocationModel - Returns the code generation relocation model. The
diff --git a/contrib/llvm/lib/Target/TargetMachineC.cpp b/contrib/llvm/lib/Target/TargetMachineC.cpp
index 623b3e8..7199235 100644
--- a/contrib/llvm/lib/Target/TargetMachineC.cpp
+++ b/contrib/llvm/lib/Target/TargetMachineC.cpp
@@ -156,7 +156,7 @@ LLVMTargetRef LLVMGetTargetMachineTarget(LLVMTargetMachineRef T) {
 }
 
 char* LLVMGetTargetMachineTriple(LLVMTargetMachineRef T) {
-  std::string StringRep = unwrap(T)->getTargetTriple();
+  std::string StringRep = unwrap(T)->getTargetTriple().str();
   return strdup(StringRep.c_str());
 }
 
diff --git a/contrib/llvm/lib/Target/TargetSubtargetInfo.cpp b/contrib/llvm/lib/Target/TargetSubtargetInfo.cpp
index b2bb59e..87df7af 100644
--- a/contrib/llvm/lib/Target/TargetSubtargetInfo.cpp
+++ b/contrib/llvm/lib/Target/TargetSubtargetInfo.cpp
@@ -40,7 +40,7 @@ bool TargetSubtargetInfo::enableRALocalReassignment(
   return true;
 }
 
-bool TargetSubtargetInfo::enablePostMachineScheduler() const {
+bool TargetSubtargetInfo::enablePostRAScheduler() const {
   return getSchedModel().PostRAScheduler;
 }
 
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
index 9eee4a0..6ba897b 100644
--- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
@@ -1080,4 +1080,4 @@ CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
   return new X86AsmInstrumentation(STI);
 }
 
-} // End llvm namespace
+} // namespace llvm
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.h b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
index 19ebcc4..341fc81 100644
--- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
@@ -61,6 +61,6 @@ protected:
   unsigned InitialFrameReg;
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index e896571..418f043 100644
--- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -42,15 +42,16 @@ namespace {
 
 static const char OpPrecedence[] = {
   0, // IC_OR
-  1, // IC_AND
-  2, // IC_LSHIFT
-  2, // IC_RSHIFT
-  3, // IC_PLUS
-  3, // IC_MINUS
-  4, // IC_MULTIPLY
-  4, // IC_DIVIDE
-  5, // IC_RPAREN
-  6, // IC_LPAREN
+  1, // IC_XOR
+  2, // IC_AND
+  3, // IC_LSHIFT
+  3, // IC_RSHIFT
+  4, // IC_PLUS
+  4, // IC_MINUS
+  5, // IC_MULTIPLY
+  5, // IC_DIVIDE
+  6, // IC_RPAREN
+  7, // IC_LPAREN
   0, // IC_IMM
   0  // IC_REGISTER
 };
@@ -70,6 +71,7 @@ private:
 
   enum InfixCalculatorTok {
     IC_OR = 0,
+    IC_XOR,
     IC_AND,
     IC_LSHIFT,
     IC_RSHIFT,
@@ -204,6 +206,12 @@ private:
             Val = Op1.second | Op2.second;
             OperandStack.push_back(std::make_pair(IC_IMM, Val));
             break;
+          case IC_XOR:
+            assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
+              "Xor operation with an immediate and a register!");
+            Val = Op1.second ^ Op2.second;
+            OperandStack.push_back(std::make_pair(IC_IMM, Val));
+            break;
           case IC_AND:
             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
                     "And operation with an immediate and a register!");
@@ -232,6 +240,7 @@ private:
 
   enum IntelExprState {
     IES_OR,
+    IES_XOR,
     IES_AND,
     IES_LSHIFT,
     IES_RSHIFT,
@@ -297,6 +306,21 @@ private:
       }
       PrevState = CurrState;
     }
+    void onXor() {
+      IntelExprState CurrState = State;
+      switch (State) {
+      default:
+        State = IES_ERROR;
+        break;
+      case IES_INTEGER:
+      case IES_RPAREN:
+      case IES_REGISTER:
+        State = IES_XOR;
+        IC.pushOperator(IC_XOR);
+        break;
+      }
+      PrevState = CurrState;
+    }
     void onAnd() {
       IntelExprState CurrState = State;
       switch (State) {
@@ -473,6 +497,7 @@ private:
       case IES_MINUS:
       case IES_NOT:
       case IES_OR:
+      case IES_XOR:
       case IES_AND:
       case IES_LSHIFT:
       case IES_RSHIFT:
@@ -496,7 +521,7 @@ private:
                     PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
                     PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
                     PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
-                    PrevState == IES_NOT) &&
+                    PrevState == IES_NOT || PrevState == IES_XOR) &&
                    CurrState == IES_MINUS) {
           // Unary minus.  No need to pop the minus operand because it was never
           // pushed.
@@ -506,7 +531,7 @@ private:
                     PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
                     PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
                     PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
-                    PrevState == IES_NOT) &&
+                    PrevState == IES_NOT || PrevState == IES_XOR) &&
                    CurrState == IES_NOT) {
           // Unary not.  No need to pop the not operand because it was never
           // pushed.
@@ -593,6 +618,7 @@ private:
       case IES_MINUS:
       case IES_NOT:
       case IES_OR:
+      case IES_XOR:
       case IES_AND:
       case IES_LSHIFT:
       case IES_RSHIFT:
@@ -605,7 +631,7 @@ private:
             PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
             PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
             PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
-            PrevState == IES_NOT) &&
+            PrevState == IES_NOT || PrevState == IES_XOR) &&
             (CurrState == IES_MINUS || CurrState == IES_NOT)) {
           State = IES_ERROR;
           break;
@@ -1217,6 +1243,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
     case AsmToken::Star:    SM.onStar(); break;
     case AsmToken::Slash:   SM.onDivide(); break;
     case AsmToken::Pipe:    SM.onOr(); break;
+    case AsmToken::Caret:   SM.onXor(); break;
     case AsmToken::Amp:     SM.onAnd(); break;
     case AsmToken::LessLess:
                             SM.onLShift(); break;
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 6e99c37..5b53fbe 100644
--- a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -69,7 +69,7 @@ namespace X86 {
 
 extern Target TheX86_32Target, TheX86_64Target;
 
-}
+} // namespace llvm
 
 static bool translateInstruction(MCInst &target,
                                 InternalInstruction &source,
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
index 62b6b73..ac484f3 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -140,6 +140,6 @@ public:
 private:
   bool HasCustomInstComment;
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
index 6e371da..2bee518 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -159,6 +159,6 @@ public:
   }
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 1ac656d..2d85f84 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -426,7 +426,7 @@ namespace CU {
     UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
   };
 
-} // end CU namespace
+} // namespace CU
 
 class DarwinX86AsmBackend : public X86AsmBackend {
   const MCRegisterInfo &MRI;
@@ -790,10 +790,8 @@ public:
 
 MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
                                            const MCRegisterInfo &MRI,
-                                           StringRef TT,
+                                           const Triple &TheTriple,
                                            StringRef CPU) {
-  Triple TheTriple(TT);
-
   if (TheTriple.isOSBinFormatMachO())
     return new DarwinX86_32AsmBackend(T, MRI, CPU);
 
@@ -806,10 +804,8 @@ MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
 
 MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
                                            const MCRegisterInfo &MRI,
-                                           StringRef TT,
+                                           const Triple &TheTriple,
                                            StringRef CPU) {
-  Triple TheTriple(TT);
-
   if (TheTriple.isOSBinFormatMachO()) {
     MachO::CPUSubTypeX86 CS =
         StringSwitch<MachO::CPUSubTypeX86>(TheTriple.getArchName())
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 85b0006..69e9c7b 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -41,7 +41,7 @@ namespace X86 {
     /// AddrNumOperands - Total number of operands in a memory reference.
     AddrNumOperands = 5
   };
-} // end namespace X86;
+} // namespace X86
 
 /// X86II - This namespace holds all of the target specific flags that
 /// instruction info tracks.
@@ -271,7 +271,7 @@ namespace X86II {
     /// register DI/EDI/ESI.
     RawFrmDst      = 9,
 
-    /// RawFrmSrc - This form is for instructions that use the the source index
+    /// RawFrmSrc - This form is for instructions that use the source index
     /// register SI/ESI/ERI with a possible segment override, and also the
     /// destination index register DI/ESI/RDI.
     RawFrmDstSrc   = 10,
@@ -762,8 +762,8 @@ namespace X86II {
     return (reg == X86::SPL || reg == X86::BPL ||
             reg == X86::SIL || reg == X86::DIL);
   }
-}
+} // namespace X86II
 
-} // end namespace llvm;
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index a33468d..512afeb 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -28,7 +28,7 @@ namespace {
     unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
                           bool IsPCRel) const override;
   };
-}
+} // namespace
 
 X86ELFObjectWriter::X86ELFObjectWriter(bool IsELF64, uint8_t OSABI,
                                        uint16_t EMachine)
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
index 2943dd3..7c09e5d 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
@@ -32,7 +32,8 @@ public:
     StringRef SymName; SymI->getName(SymName);
     uint64_t  SymAddr; SymI->getAddress(SymAddr);
     uint64_t SymSize = SymI->getSize();
-    int64_t  Addend;  getELFRelocationAddend(Rel, Addend);
+    auto *Obj = cast<ELFObjectFileBase>(Rel.getObjectFile());
+    int64_t Addend = *Obj->getRelocationAddend(Rel.getRawDataRefImpl());
 
     MCSymbol *Sym = Ctx.getOrCreateSymbol(SymName);
     // FIXME: check that the value is actually the same.
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86FixupKinds.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
index 4899900..a523a32 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
@@ -28,7 +28,7 @@ enum Fixups {
   LastTargetFixupKind,
   NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
 };
-}
-}
+} // namespace X86
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index cc98e55..431010d 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -42,12 +42,11 @@ using namespace llvm;
 #define GET_SUBTARGETINFO_MC_DESC
 #include "X86GenSubtargetInfo.inc"
 
-std::string X86_MC::ParseX86Triple(StringRef TT) {
-  Triple TheTriple(TT);
+std::string X86_MC::ParseX86Triple(const Triple &TT) {
   std::string FS;
-  if (TheTriple.getArch() == Triple::x86_64)
+  if (TT.getArch() == Triple::x86_64)
     FS = "+64bit-mode,-32bit-mode,-16bit-mode";
-  else if (TheTriple.getEnvironment() != Triple::CODE16)
+  else if (TT.getEnvironment() != Triple::CODE16)
     FS = "-64bit-mode,+32bit-mode,-16bit-mode";
   else
     FS = "-64bit-mode,-32bit-mode,+16bit-mode";
@@ -55,7 +54,7 @@ std::string X86_MC::ParseX86Triple(StringRef TT) {
   return FS;
 }
 
-unsigned X86_MC::getDwarfRegFlavour(Triple TT, bool isEH) {
+unsigned X86_MC::getDwarfRegFlavour(const Triple &TT, bool isEH) {
   if (TT.getArch() == Triple::x86_64)
     return DWARFFlavour::X86_64;
 
@@ -75,8 +74,8 @@ void X86_MC::InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI) {
   }
 }
 
-MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(StringRef TT, StringRef CPU,
-                                                  StringRef FS) {
+MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(const Triple &TT,
+                                                  StringRef CPU, StringRef FS) {
   std::string ArchFS = X86_MC::ParseX86Triple(TT);
   if (!FS.empty()) {
     if (!ArchFS.empty())
@@ -219,15 +218,14 @@ static MCInstPrinter *createX86MCInstPrinter(const Triple &T,
   return nullptr;
 }
 
-static MCRelocationInfo *createX86MCRelocationInfo(StringRef TT,
+static MCRelocationInfo *createX86MCRelocationInfo(const Triple &TheTriple,
                                                    MCContext &Ctx) {
-  Triple TheTriple(TT);
   if (TheTriple.isOSBinFormatMachO() && TheTriple.getArch() == Triple::x86_64)
     return createX86_64MachORelocationInfo(Ctx);
   else if (TheTriple.isOSBinFormatELF())
     return createX86_64ELFRelocationInfo(Ctx);
   // Default to the stock relocation info.
-  return llvm::createMCRelocationInfo(TT, Ctx);
+  return llvm::createMCRelocationInfo(TheTriple, Ctx);
 }
 
 static MCInstrAnalysis *createX86MCInstrAnalysis(const MCInstrInfo *Info) {
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index dcdae1d..020803b 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -52,26 +52,26 @@ namespace N86 {
 }
 
 namespace X86_MC {
-  std::string ParseX86Triple(StringRef TT);
+std::string ParseX86Triple(const Triple &TT);
 
-  unsigned getDwarfRegFlavour(Triple TT, bool isEH);
+unsigned getDwarfRegFlavour(const Triple &TT, bool isEH);
 
-  void InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI);
+void InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI);
 
-  /// Create a X86 MCSubtargetInfo instance. This is exposed so Asm parser, etc.
-  /// do not need to go through TargetRegistry.
-  MCSubtargetInfo *createX86MCSubtargetInfo(StringRef TT, StringRef CPU,
-                                            StringRef FS);
-}
+/// Create a X86 MCSubtargetInfo instance. This is exposed so Asm parser, etc.
+/// do not need to go through TargetRegistry.
+MCSubtargetInfo *createX86MCSubtargetInfo(const Triple &TT, StringRef CPU,
+                                          StringRef FS);
+} // namespace X86_MC
 
 MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII,
                                       const MCRegisterInfo &MRI,
                                       MCContext &Ctx);
 
 MCAsmBackend *createX86_32AsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                     StringRef TT, StringRef CPU);
+                                     const Triple &TT, StringRef CPU);
 MCAsmBackend *createX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                     StringRef TT, StringRef CPU);
+                                     const Triple &TT, StringRef CPU);
 
 /// Construct an X86 Windows COFF machine code streamer which will generate
 /// PE/COFF format object files.
@@ -98,7 +98,7 @@ MCRelocationInfo *createX86_64MachORelocationInfo(MCContext &Ctx);
 
 /// Construct X86-64 ELF relocation info.
 MCRelocationInfo *createX86_64ELFRelocationInfo(MCContext &Ctx);
-} // End llvm namespace
+} // namespace llvm
 
 
 // Defines symbolic names for X86 registers.  This defines a mapping from
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
index 95acc07..773fbf4 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -69,7 +69,7 @@ public:
                           FixedValue);
   }
 };
-}
+} // namespace
 
 static bool isFixupKindRIPRel(unsigned Kind) {
   return Kind == X86::reloc_riprel_4byte ||
@@ -205,7 +205,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(
     if (Symbol->isTemporary() && Value) {
       const MCSection &Sec = Symbol->getSection();
       if (!Asm.getContext().getAsmInfo()->isSectionAtomizableBySymbols(Sec))
-        Asm.addLocalUsedInReloc(*Symbol);
+        Symbol->setUsedInReloc();
     }
     RelSymbol = Asm.getAtom(*Symbol);
 
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
index bd1bc99..7d262cd 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
@@ -31,7 +31,7 @@ namespace {
                           bool IsCrossSection,
                           const MCAsmBackend &MAB) const override;
   };
-}
+} // namespace
 
 X86WinCOFFObjectWriter::X86WinCOFFObjectWriter(bool Is64Bit)
     : MCWinCOFFObjectTargetWriter(Is64Bit ? COFF::IMAGE_FILE_MACHINE_AMD64
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
index 92f42b6..dc6dd66 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
@@ -46,7 +46,7 @@ void X86WinCOFFStreamer::FinishImpl() {
 
   MCWinCOFFStreamer::FinishImpl();
 }
-}
+} // namespace
 
 MCStreamer *llvm::createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB,
                                            raw_pwrite_stream &OS,
diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index ef3318b..1e7d942 100644
--- a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -431,4 +431,4 @@ void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl<int> &Mask) {
   for (unsigned i = 1; i < NumElts; i++)
     Mask.push_back(IsLoad ? static_cast<int>(SM_SentinelZero) : i);
 }
-} // llvm namespace
+} // namespace llvm
diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
index 14b6943..0139297 100644
--- a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -100,6 +100,6 @@ void DecodeZeroMoveLowMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
 /// \brief Decode a scalar float move instruction as a shuffle mask.
 void DecodeScalarMoveMask(MVT VT, bool IsLoad,
                           SmallVectorImpl<int> &ShuffleMask);
-} // llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/X86/X86.h b/contrib/llvm/lib/Target/X86/X86.h
index 8403ae6..80f4579 100644
--- a/contrib/llvm/lib/Target/X86/X86.h
+++ b/contrib/llvm/lib/Target/X86/X86.h
@@ -80,6 +80,6 @@ FunctionPass *createX86WinEHStatePass();
 /// must run after prologue/epilogue insertion and before lowering
 /// the MachineInstr to MC.
 FunctionPass *createX86ExpandPseudoPass();
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
index 64fc6d0..2051401 100644
--- a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -511,7 +511,7 @@ bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
 }
 
 void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
-  Triple TT(TM.getTargetTriple());
+  const Triple &TT = TM.getTargetTriple();
 
   if (TT.isOSBinFormatMachO())
     OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
@@ -585,7 +585,7 @@ void X86AsmPrinter::GenerateExportDirective(const MCSymbol *Sym, bool IsData) {
   SmallString<128> Directive;
   raw_svector_ostream OS(Directive);
   StringRef Name = Sym->getName();
-  Triple TT(TM.getTargetTriple());
+  const Triple &TT = TM.getTargetTriple();
 
   if (TT.isKnownWindowsMSVCEnvironment())
     OS << " /EXPORT:";
@@ -610,7 +610,7 @@ void X86AsmPrinter::GenerateExportDirective(const MCSymbol *Sym, bool IsData) {
 }
 
 void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
-  Triple TT(TM.getTargetTriple());
+  const Triple &TT = TM.getTargetTriple();
 
   if (TT.isOSBinFormatMachO()) {
     // All darwin targets use mach-o.
@@ -674,6 +674,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
     }
 
     SM.serializeToStackMapSection();
+    FM.serializeToFaultMapSection();
 
     // Funny Darwin hack: This flag tells the linker that no global symbols
     // contain code that falls through to other global symbols (e.g. the obvious
@@ -726,8 +727,10 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
     }
   }
 
-  if (TT.isOSBinFormatELF())
+  if (TT.isOSBinFormatELF()) {
     SM.serializeToStackMapSection();
+    FM.serializeToFaultMapSection();
+  }
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/X86/X86AsmPrinter.h b/contrib/llvm/lib/Target/X86/X86AsmPrinter.h
index 3beeb17..acba211 100644
--- a/contrib/llvm/lib/Target/X86/X86AsmPrinter.h
+++ b/contrib/llvm/lib/Target/X86/X86AsmPrinter.h
@@ -12,6 +12,7 @@
 
 #include "X86Subtarget.h"
 #include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/FaultMaps.h"
 #include "llvm/CodeGen/StackMaps.h"
 #include "llvm/Target/TargetMachine.h"
 
@@ -27,6 +28,7 @@ class MCSymbol;
 class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
   const X86Subtarget *Subtarget;
   StackMaps SM;
+  FaultMaps FM;
 
   void GenerateExportDirective(const MCSymbol *Sym, bool IsData);
 
@@ -83,13 +85,15 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
   void LowerSTACKMAP(const MachineInstr &MI);
   void LowerPATCHPOINT(const MachineInstr &MI, X86MCInstLower &MCIL);
   void LowerSTATEPOINT(const MachineInstr &MI, X86MCInstLower &MCIL);
+  void LowerFAULTING_LOAD_OP(const MachineInstr &MI, X86MCInstLower &MCIL);
 
   void LowerTlsAddr(X86MCInstLower &MCInstLowering, const MachineInstr &MI);
 
  public:
    explicit X86AsmPrinter(TargetMachine &TM,
                           std::unique_ptr<MCStreamer> Streamer)
-       : AsmPrinter(TM, std::move(Streamer)), SM(*this), SMShadowTracker(TM) {}
+       : AsmPrinter(TM, std::move(Streamer)), SM(*this), FM(*this),
+         SMShadowTracker(TM) {}
 
   const char *getPassName() const override {
     return "X86 Assembly / Object Emitter";
diff --git a/contrib/llvm/lib/Target/X86/X86CallFrameOptimization.cpp b/contrib/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
index 4412125..6d6831b 100644
--- a/contrib/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
+++ b/contrib/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
@@ -99,7 +99,7 @@ private:
 };
 
 char X86CallFrameOptimization::ID = 0;
-}
+} // namespace
 
 FunctionPass *llvm::createX86CallFrameOptimization() {
   return new X86CallFrameOptimization();
diff --git a/contrib/llvm/lib/Target/X86/X86CallingConv.h b/contrib/llvm/lib/Target/X86/X86CallingConv.h
index 0eb2494..a377eb6 100644
--- a/contrib/llvm/lib/Target/X86/X86CallingConv.h
+++ b/contrib/llvm/lib/Target/X86/X86CallingConv.h
@@ -42,7 +42,7 @@ inline bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &,
   return false;
 }
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
 
diff --git a/contrib/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/contrib/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index 1b00997..6a5a28e 100644
--- a/contrib/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -84,19 +84,9 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
     int StackAdj = StackAdjust.getImm();
 
     if (StackAdj) {
-      bool Is64Bit = STI->is64Bit();
-      // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
-      const bool Uses64BitFramePtr =
-          STI->isTarget64BitLP64() || STI->isTargetNaCl64();
-      // Check if we should use LEA for SP.
-      bool UseLEAForSP = STI->useLeaForSP() &&
-                         X86FL->canUseLEAForSPInEpilogue(*MBB.getParent());
-      unsigned StackPtr = TRI->getStackRegister();
       // Check for possible merge with preceding ADD instruction.
-      StackAdj += X86FrameLowering::mergeSPUpdates(MBB, MBBI, StackPtr, true);
-      X86FrameLowering::emitSPUpdate(MBB, MBBI, StackPtr, StackAdj, Is64Bit,
-                                     Uses64BitFramePtr, UseLEAForSP, *TII,
-                                     *TRI);
+      StackAdj += X86FL->mergeSPUpdates(MBB, MBBI, true);
+      X86FL->emitSPUpdate(MBB, MBBI, StackAdj, /*InEpilogue=*/true);
     }
 
     // Jump to label or value in register.
diff --git a/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp b/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp
index b39c5ab..8305a04 100644
--- a/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp
@@ -44,7 +44,7 @@ class FixupLEAPass : public MachineFunctionPass {
   /// \brief Given a machine register, look for the instruction
   /// which writes it in the current basic block. If found,
   /// try to replace it with an equivalent LEA instruction.
-  /// If replacement succeeds, then also process the the newly created
+  /// If replacement succeeds, then also process the newly created
   /// instruction.
   void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I,
                     MachineFunction::iterator MFI);
@@ -91,7 +91,7 @@ private:
   const X86InstrInfo *TII; // Machine instruction info.
 };
 char FixupLEAPass::ID = 0;
-}
+} // namespace
 
 MachineInstr *
 FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI,
diff --git a/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp
index 3b0bd03..6f1d8e5 100644
--- a/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp
@@ -279,7 +279,7 @@ namespace {
     void setKillFlags(MachineBasicBlock &MBB) const;
   };
   char FPS::ID = 0;
-}
+} // namespace
 
 FunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); }
 
@@ -544,7 +544,7 @@ namespace {
       return V < TE.from;
     }
   };
-}
+} // namespace
 
 #ifndef NDEBUG
 static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) {
diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
index db58d9c..85c5b64 100644
--- a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -37,6 +37,20 @@ using namespace llvm;
 // FIXME: completely move here.
 extern cl::opt<bool> ForceStackAlign;
 
+X86FrameLowering::X86FrameLowering(const X86Subtarget &STI,
+                                   unsigned StackAlignOverride)
+    : TargetFrameLowering(StackGrowsDown, StackAlignOverride,
+                          STI.is64Bit() ? -8 : -4),
+      STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) {
+  // Cache a bunch of frame-related predicates for this subtarget.
+  SlotSize = TRI->getSlotSize();
+  Is64Bit = STI.is64Bit();
+  IsLP64 = STI.isTarget64BitLP64();
+  // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
+  Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
+  StackPtr = TRI->getStackRegister();
+}
+
 bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
   return !MF.getFrameInfo()->hasVarSizedObjects() &&
          !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
@@ -48,11 +62,9 @@ bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
 /// Use a more nuanced condition.
 bool
 X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
-  const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>
-                               (MF.getSubtarget().getRegisterInfo());
   return hasReservedCallFrame(MF) ||
-         (hasFP(MF) && !TRI->needsStackRealignment(MF))
-         || TRI->hasBasePointer(MF);
+         (hasFP(MF) && !TRI->needsStackRealignment(MF)) ||
+         TRI->hasBasePointer(MF);
 }
 
 // needsFrameIndexResolution - Do we need to perform FI resolution for
@@ -74,10 +86,9 @@ X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const {
 bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   const MachineModuleInfo &MMI = MF.getMMI();
-  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
 
   return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
-          RegInfo->needsStackRealignment(MF) ||
+          TRI->needsStackRealignment(MF) ||
           MFI->hasVarSizedObjects() ||
           MFI->isFrameAddressTaken() || MFI->hasInlineAsmWithSPAdjust() ||
           MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
@@ -137,7 +148,7 @@ static unsigned getLEArOpcode(unsigned IsLP64) {
 /// to this register without worry about clobbering it.
 static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
                                        MachineBasicBlock::iterator &MBBI,
-                                       const TargetRegisterInfo &TRI,
+                                       const TargetRegisterInfo *TRI,
                                        bool Is64Bit) {
   const MachineFunction *MF = MBB.getParent();
   const Function *F = MF->getFunction();
@@ -176,7 +187,7 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
       unsigned Reg = MO.getReg();
       if (!Reg)
         continue;
-      for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI)
+      for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
         Uses.insert(*AI);
     }
 
@@ -203,23 +214,36 @@ static bool isEAXLiveIn(MachineFunction &MF) {
   return false;
 }
 
+/// Check whether or not the terminators of \p MBB needs to read EFLAGS.
+static bool terminatorsNeedFlagsAsInput(const MachineBasicBlock &MBB) {
+  for (const MachineInstr &MI : MBB.terminators()) {
+    bool BreakNext = false;
+    for (const MachineOperand &MO : MI.operands()) {
+      if (!MO.isReg())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (Reg != X86::EFLAGS)
+        continue;
+
+      // This terminator needs an eflag that is not defined
+      // by a previous terminator.
+      if (!MO.isDef())
+        return true;
+      BreakNext = true;
+    }
+    if (BreakNext)
+      break;
+  }
+  return false;
+}
+
 /// emitSPUpdate - Emit a series of instructions to increment / decrement the
 /// stack pointer by a constant value.
 void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator &MBBI,
-                                    unsigned StackPtr, int64_t NumBytes,
-                                    bool Is64BitTarget, bool Is64BitStackPtr,
-                                    bool UseLEA, const TargetInstrInfo &TII,
-                                    const TargetRegisterInfo &TRI) {
+                                    int64_t NumBytes, bool InEpilogue) const {
   bool isSub = NumBytes < 0;
   uint64_t Offset = isSub ? -NumBytes : NumBytes;
-  unsigned Opc;
-  if (UseLEA)
-    Opc = getLEArOpcode(Is64BitStackPtr);
-  else
-    Opc = isSub
-      ? getSUBriOpcode(Is64BitStackPtr, Offset)
-      : getADDriOpcode(Is64BitStackPtr, Offset);
 
   uint64_t Chunk = (1LL << 31) - 1;
   DebugLoc DL = MBB.findDebugLoc(MBBI);
@@ -231,17 +255,17 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
       unsigned Reg = 0;
 
       if (isSub && !isEAXLiveIn(*MBB.getParent()))
-        Reg = (unsigned)(Is64BitTarget ? X86::RAX : X86::EAX);
+        Reg = (unsigned)(Is64Bit ? X86::RAX : X86::EAX);
       else
-        Reg = findDeadCallerSavedReg(MBB, MBBI, TRI, Is64BitTarget);
+        Reg = findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit);
 
       if (Reg) {
-        Opc = Is64BitTarget ? X86::MOV64ri : X86::MOV32ri;
+        unsigned Opc = Is64Bit ? X86::MOV64ri : X86::MOV32ri;
         BuildMI(MBB, MBBI, DL, TII.get(Opc), Reg)
           .addImm(Offset);
         Opc = isSub
-          ? getSUBrrOpcode(Is64BitTarget)
-          : getADDrrOpcode(Is64BitTarget);
+          ? getSUBrrOpcode(Is64Bit)
+          : getADDrrOpcode(Is64Bit);
         MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
           .addReg(StackPtr)
           .addReg(Reg);
@@ -252,15 +276,15 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
     }
 
     uint64_t ThisVal = std::min(Offset, Chunk);
-    if (ThisVal == (Is64BitTarget ? 8 : 4)) {
+    if (ThisVal == (Is64Bit ? 8 : 4)) {
       // Use push / pop instead.
       unsigned Reg = isSub
-        ? (unsigned)(Is64BitTarget ? X86::RAX : X86::EAX)
-        : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64BitTarget);
+        ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
+        : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit);
       if (Reg) {
-        Opc = isSub
-          ? (Is64BitTarget ? X86::PUSH64r : X86::PUSH32r)
-          : (Is64BitTarget ? X86::POP64r  : X86::POP32r);
+        unsigned Opc = isSub
+          ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
+          : (Is64Bit ? X86::POP64r  : X86::POP32r);
         MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc))
           .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub));
         if (isSub)
@@ -270,25 +294,59 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
       }
     }
 
-    MachineInstr *MI = nullptr;
-
-    if (UseLEA) {
-      MI =  addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
-                          StackPtr, false, isSub ? -ThisVal : ThisVal);
-    } else {
-      MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
-            .addReg(StackPtr)
-            .addImm(ThisVal);
-      MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
-    }
-
+    MachineInstrBuilder MI = BuildStackAdjustment(
+        MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue);
     if (isSub)
-      MI->setFlag(MachineInstr::FrameSetup);
+      MI.setMIFlag(MachineInstr::FrameSetup);
 
     Offset -= ThisVal;
   }
 }
 
+MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL,
+    int64_t Offset, bool InEpilogue) const {
+  assert(Offset != 0 && "zero offset stack adjustment requested");
+
+  // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue
+  // is tricky.
+  bool UseLEA;
+  if (!InEpilogue) {
+    UseLEA = STI.useLeaForSP();
+  } else {
+    // If we can use LEA for SP but we shouldn't, check that none
+    // of the terminators uses the eflags. Otherwise we will insert
+    // a ADD that will redefine the eflags and break the condition.
+    // Alternatively, we could move the ADD, but this may not be possible
+    // and is an optimization anyway.
+    UseLEA = canUseLEAForSPInEpilogue(*MBB.getParent());
+    if (UseLEA && !STI.useLeaForSP())
+      UseLEA = terminatorsNeedFlagsAsInput(MBB);
+    // If that assert breaks, that means we do not do the right thing
+    // in canUseAsEpilogue.
+    assert((UseLEA || !terminatorsNeedFlagsAsInput(MBB)) &&
+           "We shouldn't have allowed this insertion point");
+  }
+
+  MachineInstrBuilder MI;
+  if (UseLEA) {
+    MI = addRegOffset(BuildMI(MBB, MBBI, DL,
+                              TII.get(getLEArOpcode(Uses64BitFramePtr)),
+                              StackPtr),
+                      StackPtr, false, Offset);
+  } else {
+    bool IsSub = Offset < 0;
+    uint64_t AbsOffset = IsSub ? -Offset : Offset;
+    unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr, AbsOffset)
+                         : getADDriOpcode(Uses64BitFramePtr, AbsOffset);
+    MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
+             .addReg(StackPtr)
+             .addImm(AbsOffset);
+    MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+  }
+  return MI;
+}
+
 /// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
 static
 void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
@@ -315,8 +373,7 @@ void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
 
 int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator &MBBI,
-                                     unsigned StackPtr,
-                                     bool doMergeWithPrevious) {
+                                     bool doMergeWithPrevious) const {
   if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
       (!doMergeWithPrevious && MBBI == MBB.end()))
     return 0;
@@ -345,6 +402,15 @@ int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
   return Offset;
 }
 
+void X86FrameLowering::BuildCFI(MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator MBBI, DebugLoc DL,
+                                MCCFIInstruction CFIInst) const {
+  MachineFunction &MF = *MBB.getParent();
+  unsigned CFIIndex = MF.getMMI().addFrameInst(CFIInst);
+  BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+      .addCFIIndex(CFIIndex);
+}
+
 void
 X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
                                             MachineBasicBlock::iterator MBBI,
@@ -353,7 +419,6 @@ X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MachineModuleInfo &MMI = MF.getMMI();
   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
-  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
 
   // Add callee saved registers to move list.
   const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
@@ -366,11 +431,8 @@ X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
     unsigned Reg = I->getReg();
 
     unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
-    unsigned CFIIndex =
-        MMI.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg,
-                                                        Offset));
-    BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
-        .addCFIIndex(CFIIndex);
+    BuildCFI(MBB, MBBI, DL,
+             MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
   }
 }
 
@@ -394,10 +456,7 @@ static bool usesTheStack(const MachineFunction &MF) {
 void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
                                           MachineBasicBlock &MBB,
                                           MachineBasicBlock::iterator MBBI,
-                                          DebugLoc DL) {
-  const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
-  const TargetInstrInfo &TII = *STI.getInstrInfo();
-  bool Is64Bit = STI.is64Bit();
+                                          DebugLoc DL) const {
   bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
 
   unsigned CallOp;
@@ -463,13 +522,10 @@ static unsigned calculateSetFPREG(uint64_t SPAdjust) {
 // info, we need to know the ABI stack alignment as well in case we
 // have a call out.  Otherwise just make sure we have some alignment - we'll
 // go with the minimum SlotSize.
-static uint64_t calculateMaxStackAlign(const MachineFunction &MF) {
+uint64_t X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment.
-  const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
-  const X86RegisterInfo *RegInfo = STI.getRegisterInfo();
-  unsigned SlotSize = RegInfo->getSlotSize();
-  unsigned StackAlign = STI.getFrameLowering()->getStackAlignment();
+  unsigned StackAlign = getStackAlignment();
   if (ForceStackAlign) {
     if (MFI->hasCalls())
       MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
@@ -479,6 +535,22 @@ static uint64_t calculateMaxStackAlign(const MachineFunction &MF) {
   return MaxAlign;
 }
 
+void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
+                                          MachineBasicBlock::iterator MBBI,
+                                          DebugLoc DL,
+                                          uint64_t MaxAlign) const {
+  uint64_t Val = -MaxAlign;
+  MachineInstr *MI =
+      BuildMI(MBB, MBBI, DL, TII.get(getANDriOpcode(Uses64BitFramePtr, Val)),
+              StackPtr)
+          .addReg(StackPtr)
+          .addImm(Val)
+          .setMIFlag(MachineInstr::FrameSetup);
+
+  // The EFLAGS implicit def is dead.
+  MI->getOperand(3).setIsDead();
+}
+
 /// emitPrologue - Push callee-saved registers onto the stack, which
 /// automatically adjust the stack pointer. Adjust the stack pointer to allocate
 /// space for local variables. Also emit labels used by the exception handler to
@@ -565,40 +637,32 @@ static uint64_t calculateMaxStackAlign(const MachineFunction &MF) {
 
 void X86FrameLowering::emitPrologue(MachineFunction &MF,
                                     MachineBasicBlock &MBB) const {
+  assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
+         "MF used frame lowering for wrong subtarget");
   MachineBasicBlock::iterator MBBI = MBB.begin();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   const Function *Fn = MF.getFunction();
-  const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
-  const X86RegisterInfo *RegInfo = STI.getRegisterInfo();
-  const TargetInstrInfo &TII = *STI.getInstrInfo();
   MachineModuleInfo &MMI = MF.getMMI();
   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
   uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
   uint64_t StackSize = MFI->getStackSize();    // Number of bytes to allocate.
   bool HasFP = hasFP(MF);
-  bool Is64Bit = STI.is64Bit();
-  // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
-  const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
-  bool IsWin64 = STI.isCallingConvWin64(Fn->getCallingConv());
-  // Not necessarily synonymous with IsWin64.
-  bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
-  bool NeedsWinEH = IsWinEH && Fn->needsUnwindTableEntry();
+  bool IsWin64CC = STI.isCallingConvWin64(Fn->getCallingConv());
+  bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
+  bool NeedsWinCFI = IsWin64Prologue && Fn->needsUnwindTableEntry();
   bool NeedsDwarfCFI =
-      !IsWinEH && (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry());
-  bool UseLEA = STI.useLeaForSP();
-  unsigned SlotSize = RegInfo->getSlotSize();
-  unsigned FramePtr = RegInfo->getFrameRegister(MF);
+      !IsWin64Prologue && (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry());
+  unsigned FramePtr = TRI->getFrameRegister(MF);
   const unsigned MachineFramePtr =
       STI.isTarget64BitILP32()
           ? getX86SubSuperRegister(FramePtr, MVT::i64, false)
           : FramePtr;
-  unsigned StackPtr = RegInfo->getStackRegister();
-  unsigned BasePtr = RegInfo->getBaseRegister();
+  unsigned BasePtr = TRI->getBaseRegister();
   DebugLoc DL;
 
   // Add RETADDR move area to callee saved frame size.
   int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
-  if (TailCallReturnAddrDelta && IsWinEH)
+  if (TailCallReturnAddrDelta && IsWin64Prologue)
     report_fatal_error("Can't handle guaranteed tail call under win64 yet");
 
   if (TailCallReturnAddrDelta < 0)
@@ -621,10 +685,10 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
   // stack pointer (we fit in the Red Zone). We also check that we don't
   // push and pop from the stack.
   if (Is64Bit && !Fn->hasFnAttribute(Attribute::NoRedZone) &&
-      !RegInfo->needsStackRealignment(MF) &&
+      !TRI->needsStackRealignment(MF) &&
       !MFI->hasVarSizedObjects() && // No dynamic alloca.
       !MFI->adjustsStack() &&       // No calls.
-      !IsWin64 &&                   // Win64 has no Red Zone
+      !IsWin64CC &&                 // Win64 has no Red Zone
       !usesTheStack(MF) &&          // Don't push and pop.
       !MF.shouldSplitStack()) {     // Regular stack
     uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
@@ -637,14 +701,9 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
   // applies to tail call optimized functions where the callee argument stack
   // size is bigger than the callers.
   if (TailCallReturnAddrDelta < 0) {
-    MachineInstr *MI =
-      BuildMI(MBB, MBBI, DL,
-              TII.get(getSUBriOpcode(Uses64BitFramePtr, -TailCallReturnAddrDelta)),
-              StackPtr)
-        .addReg(StackPtr)
-        .addImm(-TailCallReturnAddrDelta)
+    BuildStackAdjustment(MBB, MBBI, DL, TailCallReturnAddrDelta,
+                         /*InEpilogue=*/false)
         .setMIFlag(MachineInstr::FrameSetup);
-    MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
   }
 
   // Mapping for machine moves:
@@ -674,7 +733,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
     NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
 
     // Callee-saved registers are pushed on stack before the stack is realigned.
-    if (RegInfo->needsStackRealignment(MF) && !IsWinEH)
+    if (TRI->needsStackRealignment(MF) && !IsWin64Prologue)
       NumBytes = RoundUpToAlignment(NumBytes, MaxAlign);
 
     // Get the offset of the stack slot for the EBP register, which is
@@ -691,27 +750,22 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
       // Mark the place where EBP/RBP was saved.
       // Define the current CFA rule to use the provided offset.
       assert(StackSize);
-      unsigned CFIIndex = MMI.addFrameInst(
-          MCCFIInstruction::createDefCfaOffset(nullptr, 2 * stackGrowth));
-      BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
-          .addCFIIndex(CFIIndex);
+      BuildCFI(MBB, MBBI, DL,
+               MCCFIInstruction::createDefCfaOffset(nullptr, 2 * stackGrowth));
 
       // Change the rule for the FramePtr to be an "offset" rule.
-      unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(MachineFramePtr, true);
-      CFIIndex = MMI.addFrameInst(
-          MCCFIInstruction::createOffset(nullptr,
-                                         DwarfFramePtr, 2 * stackGrowth));
-      BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
-          .addCFIIndex(CFIIndex);
+      unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
+      BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createOffset(
+                                  nullptr, DwarfFramePtr, 2 * stackGrowth));
     }
 
-    if (NeedsWinEH) {
+    if (NeedsWinCFI) {
       BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
           .addImm(FramePtr)
           .setMIFlag(MachineInstr::FrameSetup);
     }
 
-    if (!IsWinEH) {
+    if (!IsWin64Prologue) {
       // Update EBP with the new base value.
       BuildMI(MBB, MBBI, DL,
               TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
@@ -723,11 +777,9 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
     if (NeedsDwarfCFI) {
       // Mark effective beginning of when frame pointer becomes valid.
       // Define the current CFA to use the EBP/RBP register.
-      unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(MachineFramePtr, true);
-      unsigned CFIIndex = MMI.addFrameInst(
-          MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr));
-      BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
-          .addCFIIndex(CFIIndex);
+      unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
+      BuildCFI(MBB, MBBI, DL,
+               MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr));
     }
 
     // Mark the FramePtr as live-in in every block.
@@ -752,14 +804,12 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
       // Mark callee-saved push instruction.
       // Define the current CFA rule to use the provided offset.
       assert(StackSize);
-      unsigned CFIIndex = MMI.addFrameInst(
-          MCCFIInstruction::createDefCfaOffset(nullptr, StackOffset));
-      BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
-          .addCFIIndex(CFIIndex);
+      BuildCFI(MBB, MBBI, DL,
+               MCCFIInstruction::createDefCfaOffset(nullptr, StackOffset));
       StackOffset += stackGrowth;
     }
 
-    if (NeedsWinEH) {
+    if (NeedsWinCFI) {
       BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)).addImm(Reg).setMIFlag(
           MachineInstr::FrameSetup);
     }
@@ -768,24 +818,15 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
   // Realign stack after we pushed callee-saved registers (so that we'll be
   // able to calculate their offsets from the frame pointer).
   // Don't do this for Win64, it needs to realign the stack after the prologue.
-  if (!IsWinEH && RegInfo->needsStackRealignment(MF)) {
+  if (!IsWin64Prologue && TRI->needsStackRealignment(MF)) {
     assert(HasFP && "There should be a frame pointer if stack is realigned.");
-    uint64_t Val = -MaxAlign;
-    MachineInstr *MI =
-        BuildMI(MBB, MBBI, DL, TII.get(getANDriOpcode(Uses64BitFramePtr, Val)),
-                StackPtr)
-            .addReg(StackPtr)
-            .addImm(Val)
-            .setMIFlag(MachineInstr::FrameSetup);
-
-    // The EFLAGS implicit def is dead.
-    MI->getOperand(3).setIsDead();
+    BuildStackAlignAND(MBB, MBBI, DL, MaxAlign);
   }
 
   // If there is an SUB32ri of ESP immediately before this instruction, merge
   // the two. This can be the case when tail call elimination is enabled and
   // the callee has more arguments then the caller.
-  NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
+  NumBytes -= mergeSPUpdates(MBB, MBBI, true);
 
   // Adjust stack pointer: ESP -= numbytes.
 
@@ -798,7 +839,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
   // increments is necessary to ensure that the guard pages used by the OS
   // virtual memory manager are allocated in correct sequence.
   uint64_t AlignedNumBytes = NumBytes;
-  if (IsWinEH && RegInfo->needsStackRealignment(MF))
+  if (IsWin64Prologue && TRI->needsStackRealignment(MF))
     AlignedNumBytes = RoundUpToAlignment(AlignedNumBytes, MaxAlign);
   if (AlignedNumBytes >= StackProbeSize && UseStackProbe) {
     // Check whether EAX is livein for this function.
@@ -859,17 +900,16 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
       MBB.insert(MBBI, MI);
     }
   } else if (NumBytes) {
-    emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, Uses64BitFramePtr,
-                 UseLEA, TII, *RegInfo);
+    emitSPUpdate(MBB, MBBI, -(int64_t)NumBytes, /*InEpilogue=*/false);
   }
 
-  if (NeedsWinEH && NumBytes)
+  if (NeedsWinCFI && NumBytes)
     BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
         .addImm(NumBytes)
         .setMIFlag(MachineInstr::FrameSetup);
 
   int SEHFrameOffset = 0;
-  if (IsWinEH && HasFP) {
+  if (IsWin64Prologue && HasFP) {
     SEHFrameOffset = calculateSetFPREG(NumBytes);
     if (SEHFrameOffset)
       addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr),
@@ -877,7 +917,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
     else
       BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr).addReg(StackPtr);
 
-    if (NeedsWinEH)
+    if (NeedsWinCFI)
       BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
           .addImm(FramePtr)
           .addImm(SEHFrameOffset)
@@ -888,7 +928,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
     const MachineInstr *FrameInstr = &*MBBI;
     ++MBBI;
 
-    if (NeedsWinEH) {
+    if (NeedsWinCFI) {
       int FI;
       if (unsigned Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
         if (X86::FR64RegClass.contains(Reg)) {
@@ -904,32 +944,23 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
     }
   }
 
-  if (NeedsWinEH)
+  if (NeedsWinCFI)
     BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
         .setMIFlag(MachineInstr::FrameSetup);
 
   // Realign stack after we spilled callee-saved registers (so that we'll be
   // able to calculate their offsets from the frame pointer).
   // Win64 requires aligning the stack after the prologue.
-  if (IsWinEH && RegInfo->needsStackRealignment(MF)) {
+  if (IsWin64Prologue && TRI->needsStackRealignment(MF)) {
     assert(HasFP && "There should be a frame pointer if stack is realigned.");
-    uint64_t Val = -MaxAlign;
-    MachineInstr *MI =
-        BuildMI(MBB, MBBI, DL, TII.get(getANDriOpcode(Uses64BitFramePtr, Val)),
-                StackPtr)
-            .addReg(StackPtr)
-            .addImm(Val)
-            .setMIFlag(MachineInstr::FrameSetup);
-
-    // The EFLAGS implicit def is dead.
-    MI->getOperand(3).setIsDead();
+    BuildStackAlignAND(MBB, MBBI, DL, MaxAlign);
   }
 
   // If we need a base pointer, set it up here. It's whatever the value
   // of the stack pointer is at this point. Any variable size objects
   // will be allocated after this, so we can still use the base pointer
   // to reference locals.
-  if (RegInfo->hasBasePointer(MF)) {
+  if (TRI->hasBasePointer(MF)) {
     // Update the base pointer with the current stack pointer.
     unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
     BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
@@ -950,12 +981,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
     if (!HasFP && NumBytes) {
       // Define the current CFA rule to use the provided offset.
       assert(StackSize);
-      unsigned CFIIndex = MMI.addFrameInst(
-          MCCFIInstruction::createDefCfaOffset(nullptr,
-                                               -StackSize + stackGrowth));
-
-      BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
-          .addCFIIndex(CFIIndex);
+      BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfaOffset(
+                                  nullptr, -StackSize + stackGrowth));
     }
 
     // Emit DWARF info specifying the offsets of the callee-saved registers.
@@ -975,65 +1002,24 @@ bool X86FrameLowering::canUseLEAForSPInEpilogue(
   return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF);
 }
 
-/// Check whether or not the terminators of \p MBB needs to read EFLAGS.
-static bool terminatorsNeedFlagsAsInput(const MachineBasicBlock &MBB) {
-  for (const MachineInstr &MI : MBB.terminators()) {
-    bool BreakNext = false;
-    for (const MachineOperand &MO : MI.operands()) {
-      if (!MO.isReg())
-        continue;
-      unsigned Reg = MO.getReg();
-      if (Reg != X86::EFLAGS)
-        continue;
-
-      // This terminator needs an eflag that is not defined
-      // by a previous terminator.
-      if (!MO.isDef())
-        return true;
-      BreakNext = true;
-    }
-    if (BreakNext)
-      break;
-  }
-  return false;
-}
-
 void X86FrameLowering::emitEpilogue(MachineFunction &MF,
                                     MachineBasicBlock &MBB) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
-  const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
-  const X86RegisterInfo *RegInfo = STI.getRegisterInfo();
-  const TargetInstrInfo &TII = *STI.getInstrInfo();
   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
   DebugLoc DL;
   if (MBBI != MBB.end())
     DL = MBBI->getDebugLoc();
-  bool Is64Bit = STI.is64Bit();
   // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
-  const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
   const bool Is64BitILP32 = STI.isTarget64BitILP32();
-  unsigned SlotSize = RegInfo->getSlotSize();
-  unsigned FramePtr = RegInfo->getFrameRegister(MF);
+  unsigned FramePtr = TRI->getFrameRegister(MF);
   unsigned MachineFramePtr =
       Is64BitILP32 ? getX86SubSuperRegister(FramePtr, MVT::i64, false)
                    : FramePtr;
-  unsigned StackPtr = RegInfo->getStackRegister();
-
-  bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
-  bool NeedsWinEH = IsWinEH && MF.getFunction()->needsUnwindTableEntry();
-  bool UseLEAForSP = canUseLEAForSPInEpilogue(MF);
-  // If we can use LEA for SP but we shouldn't, check that none
-  // of the terminators uses the eflags. Otherwise we will insert
-  // a ADD that will redefine the eflags and break the condition.
-  // Alternatively, we could move the ADD, but this may not be possible
-  // and is an optimization anyway.
-  if (UseLEAForSP && !MF.getSubtarget<X86Subtarget>().useLeaForSP())
-    UseLEAForSP = terminatorsNeedFlagsAsInput(MBB);
-  // If that assert breaks, that means we do not do the right thing
-  // in canUseAsEpilogue.
-  assert((UseLEAForSP || !terminatorsNeedFlagsAsInput(MBB)) &&
-         "We shouldn't have allowed this insertion point");
+
+  bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
+  bool NeedsWinCFI =
+      IsWin64Prologue && MF.getFunction()->needsUnwindTableEntry();
 
   // Get the number of bytes to allocate from the FrameInfo.
   uint64_t StackSize = MFI->getStackSize();
@@ -1048,7 +1034,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
 
     // Callee-saved registers were pushed on stack before the stack was
     // realigned.
-    if (RegInfo->needsStackRealignment(MF) && !IsWinEH)
+    if (TRI->needsStackRealignment(MF) && !IsWin64Prologue)
       NumBytes = RoundUpToAlignment(FrameSize, MaxAlign);
 
     // Pop EBP.
@@ -1083,11 +1069,12 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
   // If dynamic alloca is used, then reset esp to point to the last callee-saved
   // slot before popping them off! Same applies for the case, when stack was
   // realigned.
-  if (RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) {
-    if (RegInfo->needsStackRealignment(MF))
+  if (TRI->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) {
+    if (TRI->needsStackRealignment(MF))
       MBBI = FirstCSPop;
     unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt);
-    uint64_t LEAAmount = IsWinEH ? SEHStackAllocAmt - SEHFrameOffset : -CSSize;
+    uint64_t LEAAmount =
+        IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize;
 
     // There are only two legal forms of epilogue:
     // - add SEHAllocationSize, %rsp
@@ -1109,8 +1096,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
     }
   } else if (NumBytes) {
     // Adjust stack pointer back: ESP += numbytes.
-    emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, Uses64BitFramePtr,
-                 UseLEAForSP, TII, *RegInfo);
+    emitSPUpdate(MBB, MBBI, NumBytes, /*InEpilogue=*/true);
     --MBBI;
   }
 
@@ -1120,7 +1106,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
   // into the epilogue.  To cope with that, we insert an epilogue marker here,
   // then replace it with a 'nop' if it ends up immediately after a CALL in the
   // final emitted code.
-  if (NeedsWinEH)
+  if (NeedsWinCFI)
     BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue));
 
   // Add the return addr area delta back since we are not tail calling.
@@ -1130,16 +1116,13 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
     MBBI = MBB.getFirstTerminator();
 
     // Check for possible merge with preceding ADD instruction.
-    Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
-    emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, Uses64BitFramePtr,
-                 UseLEAForSP, TII, *RegInfo);
+    Offset += mergeSPUpdates(MBB, MBBI, true);
+    emitSPUpdate(MBB, MBBI, Offset, /*InEpilogue=*/true);
   }
 }
 
 int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
                                           int FI) const {
-  const X86RegisterInfo *RegInfo =
-      MF.getSubtarget<X86Subtarget>().getRegisterInfo();
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   // Offset will hold the offset from the stack pointer at function entry to the
   // object.
@@ -1149,12 +1132,11 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
   const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
   unsigned CSSize = X86FI->getCalleeSavedFrameSize();
   uint64_t StackSize = MFI->getStackSize();
-  unsigned SlotSize = RegInfo->getSlotSize();
   bool HasFP = hasFP(MF);
-  bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
+  bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
   int64_t FPDelta = 0;
 
-  if (IsWinEH) {
+  if (IsWin64Prologue) {
     assert(!MFI->hasCalls() || (StackSize % 16) == 8);
 
     // Calculate required stack adjustment.
@@ -1178,7 +1160,7 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
   }
 
 
-  if (RegInfo->hasBasePointer(MF)) {
+  if (TRI->hasBasePointer(MF)) {
     assert(HasFP && "VLAs and dynamic stack realign, but no FP?!");
     if (FI < 0) {
       // Skip the saved EBP.
@@ -1187,7 +1169,7 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
       assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
       return Offset + StackSize;
     }
-  } else if (RegInfo->needsStackRealignment(MF)) {
+  } else if (TRI->needsStackRealignment(MF)) {
     if (FI < 0) {
       // Skip the saved EBP.
       return Offset + SlotSize + FPDelta;
@@ -1214,17 +1196,15 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
 
 int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
                                              unsigned &FrameReg) const {
-  const X86RegisterInfo *RegInfo =
-      MF.getSubtarget<X86Subtarget>().getRegisterInfo();
   // We can't calculate offset from frame pointer if the stack is realigned,
   // so enforce usage of stack/base pointer.  The base pointer is used when we
   // have dynamic allocas in addition to dynamic realignment.
-  if (RegInfo->hasBasePointer(MF))
-    FrameReg = RegInfo->getBaseRegister();
-  else if (RegInfo->needsStackRealignment(MF))
-    FrameReg = RegInfo->getStackRegister();
+  if (TRI->hasBasePointer(MF))
+    FrameReg = TRI->getBaseRegister();
+  else if (TRI->needsStackRealignment(MF))
+    FrameReg = TRI->getStackRegister();
   else
-    FrameReg = RegInfo->getFrameRegister(MF);
+    FrameReg = TRI->getFrameRegister(MF);
   return getFrameIndexOffset(MF, FI);
 }
 
@@ -1235,8 +1215,6 @@ int X86FrameLowering::getFrameIndexOffsetFromSP(const MachineFunction &MF, int F
   const uint64_t StackSize = MFI->getStackSize();
   {
 #ifndef NDEBUG
-    const X86RegisterInfo *RegInfo =
-        MF.getSubtarget<X86Subtarget>().getRegisterInfo();
     // Note: LLVM arranges the stack as:
     // Args > Saved RetPC (<--FP) > CSRs > dynamic alignment (<--BP)
     //      > "Stack Slots" (<--SP)
@@ -1248,7 +1226,7 @@ int X86FrameLowering::getFrameIndexOffsetFromSP(const MachineFunction &MF, int F
     // frame).  As a result, THE RESULT OF THIS CALL IS MEANINGLESS FOR CSRs
     // AND FixedObjects IFF needsStackRealignment or hasVarSizedObject.
 
-    assert(!RegInfo->hasBasePointer(MF) && "we don't handle this case");
+    assert(!TRI->hasBasePointer(MF) && "we don't handle this case");
 
     // We don't handle tail calls, and shouldn't be seeing them
     // either.
@@ -1293,11 +1271,9 @@ int X86FrameLowering::getFrameIndexOffsetFromSP(const MachineFunction &MF, int F
 int X86FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
                                                    int FI,
                                                    unsigned &FrameReg) const {
-  const X86RegisterInfo *RegInfo =
-      MF.getSubtarget<X86Subtarget>().getRegisterInfo();
-  assert(!RegInfo->hasBasePointer(MF) && "we don't handle this case");
+  assert(!TRI->hasBasePointer(MF) && "we don't handle this case");
 
-  FrameReg = RegInfo->getStackRegister();
+  FrameReg = TRI->getStackRegister();
   return getFrameIndexOffsetFromSP(MF, FI);
 }
 
@@ -1305,9 +1281,6 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
     MachineFunction &MF, const TargetRegisterInfo *TRI,
     std::vector<CalleeSavedInfo> &CSI) const {
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  const X86RegisterInfo *RegInfo =
-      MF.getSubtarget<X86Subtarget>().getRegisterInfo();
-  unsigned SlotSize = RegInfo->getSlotSize();
   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
 
   unsigned CalleeSavedFrameSize = 0;
@@ -1321,7 +1294,7 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
     // Since emitPrologue and emitEpilogue will handle spilling and restoring of
     // the frame register, we can delete it from CSI list and not have to worry
     // about avoiding it later.
-    unsigned FPReg = RegInfo->getFrameRegister(MF);
+    unsigned FPReg = TRI->getFrameRegister(MF);
     for (unsigned i = 0; i < CSI.size(); ++i) {
       if (TRI->regsOverlap(CSI[i].getReg(),FPReg)) {
         CSI.erase(CSI.begin() + i);
@@ -1352,7 +1325,7 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
     if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
       continue;
 
-    const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
+    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
     // ensure alignment
     SpillSlotOffset -= std::abs(SpillSlotOffset) % RC->getAlignment();
     // spill into slot
@@ -1372,10 +1345,6 @@ bool X86FrameLowering::spillCalleeSavedRegisters(
     const TargetRegisterInfo *TRI) const {
   DebugLoc DL = MBB.findDebugLoc(MI);
 
-  MachineFunction &MF = *MBB.getParent();
-  const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
-  const TargetInstrInfo &TII = *STI.getInstrInfo();
-
   // Push GPRs. It increases frame size.
   unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
   for (unsigned i = CSI.size(); i != 0; --i) {
@@ -1419,10 +1388,6 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
 
   DebugLoc DL = MBB.findDebugLoc(MI);
 
-  MachineFunction &MF = *MBB.getParent();
-  const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
-  const TargetInstrInfo &TII = *STI.getInstrInfo();
-
   // Reload XMMs from stack frame.
   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
     unsigned Reg = CSI[i].getReg();
@@ -1451,9 +1416,6 @@ void
 X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                                        RegScavenger *RS) const {
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  const X86RegisterInfo *RegInfo =
-      MF.getSubtarget<X86Subtarget>().getRegisterInfo();
-  unsigned SlotSize = RegInfo->getSlotSize();
 
   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
   int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
@@ -1473,8 +1435,8 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
   }
 
   // Spill the BasePtr if it's used.
-  if (RegInfo->hasBasePointer(MF))
-    MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister());
+  if (TRI->hasBasePointer(MF))
+    MF.getRegInfo().setPhysRegUsed(TRI->getBaseRegister());
 }
 
 static bool
@@ -1532,11 +1494,7 @@ static const uint64_t kSplitStackAvailable = 256;
 void X86FrameLowering::adjustForSegmentedStacks(
     MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
-  const TargetInstrInfo &TII = *STI.getInstrInfo();
   uint64_t StackSize;
-  bool Is64Bit = STI.is64Bit();
-  const bool IsLP64 = STI.isTarget64BitLP64();
   unsigned TlsReg, TlsOffset;
   DebugLoc DL;
 
@@ -1782,12 +1740,7 @@ void X86FrameLowering::adjustForSegmentedStacks(
 ///       if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
 void X86FrameLowering::adjustForHiPEPrologue(
     MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
-  const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
-  const TargetInstrInfo &TII = *STI.getInstrInfo();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  const unsigned SlotSize = STI.getRegisterInfo()->getSlotSize();
-  const bool Is64Bit = STI.is64Bit();
-  const bool IsLP64 = STI.isTarget64BitLP64();
   DebugLoc DL;
   // HiPE-specific values
   const unsigned HipeLeafWords = 24;
@@ -1915,14 +1868,9 @@ void X86FrameLowering::adjustForHiPEPrologue(
 void X86FrameLowering::
 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I) const {
-  const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
-  const TargetInstrInfo &TII = *STI.getInstrInfo();
-  const X86RegisterInfo &RegInfo = *STI.getRegisterInfo();
-  unsigned StackPtr = RegInfo.getStackRegister();
   bool reserveCallFrame = hasReservedCallFrame(MF);
   unsigned Opcode = I->getOpcode();
   bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
-  bool IsLP64 = STI.isTarget64BitLP64();
   DebugLoc DL = I->getDebugLoc();
   uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0;
   uint64_t InternalAmt = (isDestroy || Amount) ? I->getOperand(1).getImm() : 0;
@@ -1941,54 +1889,29 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
     unsigned StackAlign = getStackAlignment();
     Amount = RoundUpToAlignment(Amount, StackAlign);
 
-    MachineInstr *New = nullptr;
-
     // Factor out the amount that gets handled inside the sequence
     // (Pushes of argument for frame setup, callee pops for frame destroy)
     Amount -= InternalAmt;
 
     if (Amount) {
-      if (Opcode == TII.getCallFrameSetupOpcode()) {
-        New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)), StackPtr)
-          .addReg(StackPtr).addImm(Amount);
-      } else {
-        assert(Opcode == TII.getCallFrameDestroyOpcode());
-
-        unsigned Opc = getADDriOpcode(IsLP64, Amount);
-        New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
-          .addReg(StackPtr).addImm(Amount);
-      }
+      // Add Amount to SP to destroy a frame, and subtract to setup.
+      int Offset = isDestroy ? Amount : -Amount;
+      BuildStackAdjustment(MBB, I, DL, Offset, /*InEpilogue=*/false);
     }
-
-    if (New) {
-      // The EFLAGS implicit def is dead.
-      New->getOperand(3).setIsDead();
-
-      // Replace the pseudo instruction with a new instruction.
-      MBB.insert(I, New);
-    }
-
     return;
   }
 
-  if (Opcode == TII.getCallFrameDestroyOpcode() && InternalAmt) {
+  if (isDestroy && InternalAmt) {
     // If we are performing frame pointer elimination and if the callee pops
     // something off the stack pointer, add it back.  We do this until we have
     // more advanced stack pointer tracking ability.
-    unsigned Opc = getSUBriOpcode(IsLP64, InternalAmt);
-    MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
-      .addReg(StackPtr).addImm(InternalAmt);
-
-    // The EFLAGS implicit def is dead.
-    New->getOperand(3).setIsDead();
-
     // We are not tracking the stack pointer adjustment by the callee, so make
     // sure we restore the stack pointer immediately after the call, there may
     // be spill code inserted between the CALL and ADJCALLSTACKUP instructions.
     MachineBasicBlock::iterator B = MBB.begin();
     while (I != B && !std::prev(I)->isCall())
       --I;
-    MBB.insert(I, New);
+    BuildStackAdjustment(MBB, I, DL, -InternalAmt, /*InEpilogue=*/false);
   }
 }
 
diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.h b/contrib/llvm/lib/Target/X86/X86FrameLowering.h
index 5d03b4d..2858e86 100644
--- a/contrib/llvm/lib/Target/X86/X86FrameLowering.h
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.h
@@ -18,16 +18,40 @@
 
 namespace llvm {
 
+class MachineInstrBuilder;
+class MCCFIInstruction;
+class X86Subtarget;
+class X86RegisterInfo;
+
 class X86FrameLowering : public TargetFrameLowering {
 public:
-  explicit X86FrameLowering(StackDirection D, unsigned StackAl, int LAO)
-    : TargetFrameLowering(StackGrowsDown, StackAl, LAO) {}
+  X86FrameLowering(const X86Subtarget &STI, unsigned StackAlignOverride);
+
+  // Cached subtarget predicates.
+
+  const X86Subtarget &STI;
+  const TargetInstrInfo &TII;
+  const X86RegisterInfo *TRI;
+
+  unsigned SlotSize;
+
+  /// Is64Bit implies that x86_64 instructions are available.
+  bool Is64Bit;
+
+  bool IsLP64;
+
+  /// True if the 64-bit frame or stack pointer should be used. True for most
+  /// 64-bit targets with the exception of x32. If this is false, 32-bit
+  /// instruction operands should be used to manipulate StackPtr and FramePtr.
+  bool Uses64BitFramePtr;
+
+  unsigned StackPtr;
 
   /// Emit a call to the target's stack probe function. This is required for all
   /// large stack allocations on Windows. The caller is required to materialize
   /// the number of bytes to probe in RAX/EAX.
-  static void emitStackProbeCall(MachineFunction &MF, MachineBasicBlock &MBB,
-                                 MachineBasicBlock::iterator MBBI, DebugLoc DL);
+  void emitStackProbeCall(MachineFunction &MF, MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MBBI, DebugLoc DL) const;
 
   void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator MBBI,
@@ -83,18 +107,13 @@ public:
   /// it is an ADD/SUB/LEA instruction it is deleted argument and the
   /// stack adjustment is returned as a positive value for ADD/LEA and
   /// a negative for SUB.
-  static int mergeSPUpdates(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator &MBBI,
-                            unsigned StackPtr, bool doMergeWithPrevious);
+  int mergeSPUpdates(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+                     bool doMergeWithPrevious) const;
 
   /// Emit a series of instructions to increment / decrement the stack
   /// pointer by a constant value.
-  static void emitSPUpdate(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator &MBBI, unsigned StackPtr,
-                           int64_t NumBytes, bool Is64BitTarget,
-                           bool Is64BitStackPtr, bool UseLEA,
-                           const TargetInstrInfo &TII,
-                           const TargetRegisterInfo &TRI);
+  void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+                    int64_t NumBytes, bool InEpilogue) const;
 
   /// Check that LEA can be used on SP in an epilogue sequence for \p MF.
   bool canUseLEAForSPInEpilogue(const MachineFunction &MF) const;
@@ -115,8 +134,25 @@ private:
                               MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I, 
                               uint64_t Amount) const;
+
+  uint64_t calculateMaxStackAlign(const MachineFunction &MF) const;
+
+  /// Wraps up getting a CFI index and building a MachineInstr for it.
+  void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+                DebugLoc DL, MCCFIInstruction CFIInst) const;
+
+  /// Aligns the stack pointer by ANDing it with -MaxAlign.
+  void BuildStackAlignAND(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MBBI, DebugLoc DL,
+                          uint64_t MaxAlign) const;
+
+  /// Adjusts the stack pointer using LEA, SUB, or ADD.
+  MachineInstrBuilder BuildStackAdjustment(MachineBasicBlock &MBB,
+                                           MachineBasicBlock::iterator MBBI,
+                                           DebugLoc DL, int64_t Offset,
+                                           bool InEpilogue) const;
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index de59109..f6785e1 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -138,7 +138,7 @@ namespace {
     }
 #endif
   };
-}
+} // namespace
 
 namespace {
   //===--------------------------------------------------------------------===//
@@ -310,7 +310,7 @@ namespace {
       return true;
     }
   };
-}
+} // namespace
 
 
 bool
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
index e3ec288..ce1ca20 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -915,6 +915,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::FP_TO_SINT,         MVT::v4i32, Legal);
     setOperationAction(ISD::SINT_TO_FP,         MVT::v4i32, Legal);
 
+    setOperationAction(ISD::SINT_TO_FP,         MVT::v2i32, Custom);
+
     setOperationAction(ISD::UINT_TO_FP,         MVT::v4i8,  Custom);
     setOperationAction(ISD::UINT_TO_FP,         MVT::v4i16, Custom);
     // As there is no 64-bit GPR available, we need build a special custom
@@ -2233,7 +2235,9 @@ static bool IsCCallConvention(CallingConv::ID CC) {
 }
 
 bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
-  if (!CI->isTailCall() || getTargetMachine().Options.DisableTailCalls)
+  auto Attr =
+      CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
+  if (!CI->isTailCall() || Attr.getValueAsString() == "true")
     return false;
 
   CallSite CS(CI);
@@ -2762,8 +2766,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   StructReturnType SR = callIsStructReturn(Outs);
   bool IsSibcall      = false;
   X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
+  auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
 
-  if (MF.getTarget().Options.DisableTailCalls)
+  if (Attr.getValueAsString() == "true")
     isTailCall = false;
 
   if (Subtarget->isPICStyleGOT() &&
@@ -5441,7 +5446,7 @@ static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode,
 ///
 /// Otherwise, the first horizontal binop dag node takes as input the lower
 /// 128-bit of V0 and the lower 128-bit of V1, and the second horizontal binop
-/// dag node takes the the upper 128-bit of V0 and the upper 128-bit of V1.
+/// dag node takes the upper 128-bit of V0 and the upper 128-bit of V1.
 ///   Example:
 ///     HADD V0_LO, V1_LO
 ///     HADD V0_HI, V1_HI
@@ -6353,7 +6358,7 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, SDLoc DL,
 ///
 /// This helper function produces an 8-bit shuffle immediate corresponding to
 /// the ubiquitous shuffle encoding scheme used in x86 instructions for
-/// shuffling 8 lanes. 
+/// shuffling 8 lanes.
 static SDValue get1bitLaneShuffleImm8ForMask(ArrayRef<int> Mask, SDLoc DL,
                                              SelectionDAG &DAG) {
   assert(Mask.size() <= 8 &&
@@ -9380,6 +9385,30 @@ static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1,
                      DAG.getConstant(PermMask, DL, MVT::i8));
 }
 
+/// \brief Handle lowering 4-lane 128-bit shuffles.
+static SDValue lowerV4X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1,
+                                        SDValue V2, ArrayRef<int> WidenedMask,
+                                        SelectionDAG &DAG) {
+
+  assert(WidenedMask.size() == 4 && "Unexpected mask size for 128bit shuffle!");
+  // form a 128-bit permutation.
+  // convert the 64-bit shuffle mask selection values into 128-bit selection
+  // bits defined by a vshuf64x2 instruction's immediate control byte.
+  unsigned PermMask = 0, Imm = 0;
+
+  for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) {
+    if(WidenedMask[i] == SM_SentinelZero)
+      return SDValue();
+
+    // use first element in place of undef musk
+    Imm = (WidenedMask[i] == SM_SentinelUndef) ? 0 : WidenedMask[i];
+    PermMask |= (Imm % 4) << (i * 2);
+  }
+
+  return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2,
+                     DAG.getConstant(PermMask, DL, MVT::i8));
+}
+
 /// \brief Lower a vector shuffle by first fixing the 128-bit lanes and then
 /// shuffling each lane.
 ///
@@ -10173,6 +10202,10 @@ static SDValue lowerV8X64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
   ArrayRef<int> Mask = SVOp->getMask();
   assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
 
+  SmallVector<int, 4> WidenedMask;
+  if (canWidenShuffleElements(Mask, WidenedMask))
+    if(SDValue Op = lowerV4X128VectorShuffle(DL, VT, V1, V2, WidenedMask, DAG))
+      return Op;
   // X86 has dedicated unpack instructions that can handle specific blend
   // operations: UNPCKH and UNPCKL.
   if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 2, 10, 4, 12, 6, 14}))
@@ -11023,9 +11056,8 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
     if (auto *Idx2 = dyn_cast<ConstantSDNode>(Vec.getOperand(2))) {
       if (Idx2->getZExtValue() == 0) {
         SDValue Ops[] = { SubVec2, SubVec };
-        SDValue LD = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false);
-        if (LD.getNode())
-          return LD;
+        if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false))
+          return Ld;
       }
     }
   }
@@ -11617,15 +11649,21 @@ static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) {
 
 SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
                                            SelectionDAG &DAG) const {
-  MVT SrcVT = Op.getOperand(0).getSimpleValueType();
+  SDValue Src = Op.getOperand(0);
+  MVT SrcVT = Src.getSimpleValueType();
+  MVT VT = Op.getSimpleValueType();
   SDLoc dl(Op);
 
   if (SrcVT.isVector()) {
+    if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) {
+      return DAG.getNode(X86ISD::CVTDQ2PD, dl, VT,
+                         DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src,
+                         DAG.getUNDEF(SrcVT)));
+    }
     if (SrcVT.getVectorElementType() == MVT::i1) {
       MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
       return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
-                         DAG.getNode(ISD::SIGN_EXTEND, dl, IntegerVT,
-                                     Op.getOperand(0)));
+                         DAG.getNode(ISD::SIGN_EXTEND, dl, IntegerVT, Src));
     }
     return SDValue();
   }
@@ -13018,11 +13056,11 @@ SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op,
     RecipOp = "vec-sqrtf";
   else
     return SDValue();
-  
+
   TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
   if (!Recips.isEnabled(RecipOp))
     return SDValue();
-  
+
   RefinementSteps = Recips.getRefinementSteps(RecipOp);
   UseOneConstNR = false;
   return DCI.DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op);
@@ -13035,7 +13073,7 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op,
                                             unsigned &RefinementSteps) const {
   EVT VT = Op.getValueType();
   const char *RecipOp;
-  
+
   // SSE1 has rcpss and rcpps. AVX adds a 256-bit variant for rcpps.
   // TODO: Add support for AVX512 (v16f32).
   // It is likely not profitable to do this for f64 because a double-precision
@@ -13050,7 +13088,7 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op,
     RecipOp = "vec-divf";
   else
     return SDValue();
-  
+
   TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
   if (!Recips.isEnabled(RecipOp))
     return SDValue();
@@ -13236,13 +13274,13 @@ static SDValue LowerBoolVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
                                DAG.getConstant(-1, dl, VT));
   switch (SetCCOpcode) {
   default: llvm_unreachable("Unexpected SETCC condition");
-  case ISD::SETNE:
-    // (x != y) -> ~(x ^ y)
+  case ISD::SETEQ:
+    // (x == y) -> ~(x ^ y)
     return DAG.getNode(ISD::XOR, dl, VT,
                        DAG.getNode(ISD::XOR, dl, VT, Op0, Op1),
                        DAG.getConstant(-1, dl, VT));
-  case ISD::SETEQ:
-    // (x == y) -> (x ^ y)
+  case ISD::SETNE:
+    // (x != y) -> (x ^ y)
     return DAG.getNode(ISD::XOR, dl, VT, Op0, Op1);
   case ISD::SETUGT:
   case ISD::SETGT:
@@ -15107,7 +15145,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
       unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
       if (IntrWithRoundingModeOpcode != 0) {
         unsigned Round = cast<ConstantSDNode>(RoundingMode)->getZExtValue();
-        if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION) 
+        if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION)
           return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
                                       dl, Op.getValueType(), Src, RoundingMode),
                                       Mask, PassThru, Subtarget, DAG);
@@ -15687,14 +15725,49 @@ static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget,
   return DAG.getMergeValues(Results, DL);
 }
 
+static SDValue LowerEXCEPTIONINFO(SDValue Op, const X86Subtarget *Subtarget,
+                                  SelectionDAG &DAG) {
+  MachineFunction &MF = DAG.getMachineFunction();
+  SDLoc dl(Op);
+  SDValue FnOp = Op.getOperand(2);
+  SDValue FPOp = Op.getOperand(3);
+
+  // Compute the symbol for the parent EH registration. We know it'll get
+  // emitted later.
+  auto *Fn = cast<Function>(cast<GlobalAddressSDNode>(FnOp)->getGlobal());
+  MCSymbol *ParentFrameSym =
+      MF.getMMI().getContext().getOrCreateParentFrameOffsetSymbol(
+          GlobalValue::getRealLinkageName(Fn->getName()));
+  StringRef Name = ParentFrameSym->getName();
+  assert(Name.data()[Name.size()] == '\0' && "not null terminated");
+
+  // Create a TargetExternalSymbol for the label to avoid any target lowering
+  // that would make this PC relative.
+  MVT PtrVT = Op.getSimpleValueType();
+  SDValue OffsetSym = DAG.getTargetExternalSymbol(Name.data(), PtrVT);
+  SDValue OffsetVal =
+      DAG.getNode(ISD::FRAME_ALLOC_RECOVER, dl, PtrVT, OffsetSym);
+
+  // Add the offset to the FP.
+  SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, FPOp, OffsetVal);
+
+  // Load the second field of the struct, which is 4 bytes in. See
+  // WinEHStatePass for more info.
+  Add = DAG.getNode(ISD::ADD, dl, PtrVT, Add, DAG.getConstant(4, dl, PtrVT));
+  return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Add, MachinePointerInfo(),
+                     false, false, false, 0);
+}
 
 static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
                                       SelectionDAG &DAG) {
   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
 
   const IntrinsicData* IntrData = getIntrinsicWithChain(IntNo);
-  if (!IntrData)
+  if (!IntrData) {
+    if (IntNo == Intrinsic::x86_seh_exceptioninfo)
+      return LowerEXCEPTIONINFO(Op, Subtarget, DAG);
     return SDValue();
+  }
 
   SDLoc dl(Op);
   switch(IntrData->Type) {
@@ -16464,6 +16537,8 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
   SDValue Ahi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, A, 32, DAG);
   SDValue Bhi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, B, 32, DAG);
 
+  SDValue AhiBlo = Ahi;
+  SDValue AloBhi = Bhi;
   // Bit cast to 32-bit vectors for MULUDQ
   EVT MulVT = (VT == MVT::v2i64) ? MVT::v4i32 :
                                   (VT == MVT::v4i64) ? MVT::v8i32 : MVT::v16i32;
@@ -16473,11 +16548,15 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
   Bhi = DAG.getBitcast(MulVT, Bhi);
 
   SDValue AloBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, B);
-  SDValue AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, Bhi);
-  SDValue AhiBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Ahi, B);
-
-  AloBhi = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, AloBhi, 32, DAG);
-  AhiBlo = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, AhiBlo, 32, DAG);
+  // After shifting right const values the result may be all-zero.
+  if (!ISD::isBuildVectorAllZeros(Ahi.getNode())) {
+    AhiBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Ahi, B);
+    AhiBlo = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, AhiBlo, 32, DAG);
+  }
+  if (!ISD::isBuildVectorAllZeros(Bhi.getNode())) {
+    AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, Bhi);
+    AloBhi = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, AloBhi, 32, DAG);
+  }
 
   SDValue Res = DAG.getNode(ISD::ADD, dl, VT, AloBlo, AloBhi);
   return DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
@@ -16992,36 +17071,111 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
     }
   }
 
-  if (VT == MVT::v16i8 && Op->getOpcode() == ISD::SHL) {
-    // Turn 'a' into a mask suitable for VSELECT: a = a << 5;
-    Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(5, dl, VT));
-
-    SDValue VSelM = DAG.getConstant(0x80, dl, VT);
-    SDValue OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
-    OpVSel = DAG.getNode(X86ISD::PCMPEQ, dl, VT, OpVSel, VSelM);
-
-    // r = VSELECT(r, shl(r, 4), a);
-    SDValue M = DAG.getNode(ISD::SHL, dl, VT, R, DAG.getConstant(4, dl, VT));
-    R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R);
-
-    // a += a
-    Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
-    OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
-    OpVSel = DAG.getNode(X86ISD::PCMPEQ, dl, VT, OpVSel, VSelM);
+  if (VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget->hasInt256())) {
+    MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
+    unsigned ShiftOpcode = Op->getOpcode();
 
-    // r = VSELECT(r, shl(r, 2), a);
-    M = DAG.getNode(ISD::SHL, dl, VT, R, DAG.getConstant(2, dl, VT));
-    R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R);
+    auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) {
+      // On SSE41 targets we make use of the fact that VSELECT lowers
+      // to PBLENDVB which selects bytes based just on the sign bit.
+      if (Subtarget->hasSSE41()) {
+        V0 = DAG.getBitcast(VT, V0);
+        V1 = DAG.getBitcast(VT, V1);
+        Sel = DAG.getBitcast(VT, Sel);
+        return DAG.getBitcast(SelVT,
+                              DAG.getNode(ISD::VSELECT, dl, VT, Sel, V0, V1));
+      }
+      // On pre-SSE41 targets we test for the sign bit by comparing to
+      // zero - a negative value will set all bits of the lanes to true
+      // and VSELECT uses that in its OR(AND(V0,C),AND(V1,~C)) lowering.
+      SDValue Z = getZeroVector(SelVT, Subtarget, DAG, dl);
+      SDValue C = DAG.getNode(X86ISD::PCMPGT, dl, SelVT, Z, Sel);
+      return DAG.getNode(ISD::VSELECT, dl, SelVT, C, V0, V1);
+    };
 
-    // a += a
-    Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
-    OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
-    OpVSel = DAG.getNode(X86ISD::PCMPEQ, dl, VT, OpVSel, VSelM);
+    // Turn 'a' into a mask suitable for VSELECT: a = a << 5;
+    // We can safely do this using i16 shifts as we're only interested in
+    // the 3 lower bits of each byte.
+    Amt = DAG.getBitcast(ExtVT, Amt);
+    Amt = DAG.getNode(ISD::SHL, dl, ExtVT, Amt, DAG.getConstant(5, dl, ExtVT));
+    Amt = DAG.getBitcast(VT, Amt);
+
+    if (Op->getOpcode() == ISD::SHL || Op->getOpcode() == ISD::SRL) {
+      // r = VSELECT(r, shift(r, 4), a);
+      SDValue M =
+          DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(4, dl, VT));
+      R = SignBitSelect(VT, Amt, M, R);
+
+      // a += a
+      Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);
+
+      // r = VSELECT(r, shift(r, 2), a);
+      M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(2, dl, VT));
+      R = SignBitSelect(VT, Amt, M, R);
+
+      // a += a
+      Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);
+
+      // return VSELECT(r, shift(r, 1), a);
+      M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(1, dl, VT));
+      R = SignBitSelect(VT, Amt, M, R);
+      return R;
+    }
 
-    // return VSELECT(r, r+r, a);
-    R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel,
-                    DAG.getNode(ISD::ADD, dl, VT, R, R), R);
-    return R;
+    if (Op->getOpcode() == ISD::SRA) {
+      // For SRA we need to unpack each byte to the higher byte of a i16 vector
+      // so we can correctly sign extend. We don't care what happens to the
+      // lower byte.
+      SDValue ALo = DAG.getNode(X86ISD::UNPCKL, dl, VT, DAG.getUNDEF(VT), Amt);
+      SDValue AHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, DAG.getUNDEF(VT), Amt);
+      SDValue RLo = DAG.getNode(X86ISD::UNPCKL, dl, VT, DAG.getUNDEF(VT), R);
+      SDValue RHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, DAG.getUNDEF(VT), R);
+      ALo = DAG.getBitcast(ExtVT, ALo);
+      AHi = DAG.getBitcast(ExtVT, AHi);
+      RLo = DAG.getBitcast(ExtVT, RLo);
+      RHi = DAG.getBitcast(ExtVT, RHi);
+
+      // r = VSELECT(r, shift(r, 4), a);
+      SDValue MLo = DAG.getNode(ShiftOpcode, dl, ExtVT, RLo,
+                                DAG.getConstant(4, dl, ExtVT));
+      SDValue MHi = DAG.getNode(ShiftOpcode, dl, ExtVT, RHi,
+                                DAG.getConstant(4, dl, ExtVT));
+      RLo = SignBitSelect(ExtVT, ALo, MLo, RLo);
+      RHi = SignBitSelect(ExtVT, AHi, MHi, RHi);
+
+      // a += a
+      ALo = DAG.getNode(ISD::ADD, dl, ExtVT, ALo, ALo);
+      AHi = DAG.getNode(ISD::ADD, dl, ExtVT, AHi, AHi);
+
+      // r = VSELECT(r, shift(r, 2), a);
+      MLo = DAG.getNode(ShiftOpcode, dl, ExtVT, RLo,
+                        DAG.getConstant(2, dl, ExtVT));
+      MHi = DAG.getNode(ShiftOpcode, dl, ExtVT, RHi,
+                        DAG.getConstant(2, dl, ExtVT));
+      RLo = SignBitSelect(ExtVT, ALo, MLo, RLo);
+      RHi = SignBitSelect(ExtVT, AHi, MHi, RHi);
+
+      // a += a
+      ALo = DAG.getNode(ISD::ADD, dl, ExtVT, ALo, ALo);
+      AHi = DAG.getNode(ISD::ADD, dl, ExtVT, AHi, AHi);
+
+      // r = VSELECT(r, shift(r, 1), a);
+      MLo = DAG.getNode(ShiftOpcode, dl, ExtVT, RLo,
+                        DAG.getConstant(1, dl, ExtVT));
+      MHi = DAG.getNode(ShiftOpcode, dl, ExtVT, RHi,
+                        DAG.getConstant(1, dl, ExtVT));
+      RLo = SignBitSelect(ExtVT, ALo, MLo, RLo);
+      RHi = SignBitSelect(ExtVT, AHi, MHi, RHi);
+
+      // Logical shift the result back to the lower byte, leaving a zero upper
+      // byte
+      // meaning that we can safely pack with PACKUSWB.
+      RLo =
+          DAG.getNode(ISD::SRL, dl, ExtVT, RLo, DAG.getConstant(8, dl, ExtVT));
+      RHi =
+          DAG.getNode(ISD::SRL, dl, ExtVT, RHi, DAG.getConstant(8, dl, ExtVT));
+      return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi);
+    }
   }
 
   // It's worth extending once and using the v8i32 shifts for 16-bit types, but
@@ -17055,6 +17209,67 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
     return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi);
   }
 
+  if (VT == MVT::v8i16) {
+    unsigned ShiftOpcode = Op->getOpcode();
+
+    auto SignBitSelect = [&](SDValue Sel, SDValue V0, SDValue V1) {
+      // On SSE41 targets we make use of the fact that VSELECT lowers
+      // to PBLENDVB which selects bytes based just on the sign bit.
+      if (Subtarget->hasSSE41()) {
+        MVT ExtVT = MVT::getVectorVT(MVT::i8, VT.getVectorNumElements() * 2);
+        V0 = DAG.getBitcast(ExtVT, V0);
+        V1 = DAG.getBitcast(ExtVT, V1);
+        Sel = DAG.getBitcast(ExtVT, Sel);
+        return DAG.getBitcast(
+            VT, DAG.getNode(ISD::VSELECT, dl, ExtVT, Sel, V0, V1));
+      }
+      // On pre-SSE41 targets we splat the sign bit - a negative value will
+      // set all bits of the lanes to true and VSELECT uses that in
+      // its OR(AND(V0,C),AND(V1,~C)) lowering.
+      SDValue C =
+          DAG.getNode(ISD::SRA, dl, VT, Sel, DAG.getConstant(15, dl, VT));
+      return DAG.getNode(ISD::VSELECT, dl, VT, C, V0, V1);
+    };
+
+    // Turn 'a' into a mask suitable for VSELECT: a = a << 12;
+    if (Subtarget->hasSSE41()) {
+      // On SSE41 targets we need to replicate the shift mask in both
+      // bytes for PBLENDVB.
+      Amt = DAG.getNode(
+          ISD::OR, dl, VT,
+          DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(4, dl, VT)),
+          DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(12, dl, VT)));
+    } else {
+      Amt = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(12, dl, VT));
+    }
+
+    // r = VSELECT(r, shift(r, 8), a);
+    SDValue M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(8, dl, VT));
+    R = SignBitSelect(Amt, M, R);
+
+    // a += a
+    Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);
+
+    // r = VSELECT(r, shift(r, 4), a);
+    M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(4, dl, VT));
+    R = SignBitSelect(Amt, M, R);
+
+    // a += a
+    Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);
+
+    // r = VSELECT(r, shift(r, 2), a);
+    M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(2, dl, VT));
+    R = SignBitSelect(Amt, M, R);
+
+    // a += a
+    Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);
+
+    // return VSELECT(r, shift(r, 1), a);
+    M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(1, dl, VT));
+    R = SignBitSelect(Amt, M, R);
+    return R;
+  }
+
   // Decompose 256-bit shifts into smaller 128-bit shifts.
   if (VT.is256BitVector()) {
     unsigned NumElems = VT.getVectorNumElements();
@@ -18290,6 +18505,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::VINSERT:            return "X86ISD::VINSERT";
   case X86ISD::VFPEXT:             return "X86ISD::VFPEXT";
   case X86ISD::VFPROUND:           return "X86ISD::VFPROUND";
+  case X86ISD::CVTDQ2PD:           return "X86ISD::CVTDQ2PD";
   case X86ISD::VSHLDQ:             return "X86ISD::VSHLDQ";
   case X86ISD::VSRLDQ:             return "X86ISD::VSRLDQ";
   case X86ISD::VSHL:               return "X86ISD::VSHL";
@@ -18404,6 +18620,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::FGETEXP_RND:        return "X86ISD::FGETEXP_RND";
   case X86ISD::ADDS:               return "X86ISD::ADDS";
   case X86ISD::SUBS:               return "X86ISD::SUBS";
+  case X86ISD::AVG:               return "X86ISD::AVG";
+  case X86ISD::SINT_TO_FP_RND:     return "X86ISD::SINT_TO_FP_RND";
+  case X86ISD::UINT_TO_FP_RND:     return "X86ISD::UINT_TO_FP_RND";
   }
   return nullptr;
 }
@@ -19464,7 +19683,8 @@ X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
 
   assert(!Subtarget->isTargetMachO());
 
-  X86FrameLowering::emitStackProbeCall(*BB->getParent(), *BB, MI, DL);
+  Subtarget->getFrameLowering()->emitStackProbeCall(*BB->getParent(), *BB, MI,
+                                                    DL);
 
   MI->eraseFromParent();   // The pseudo instruction is gone now.
   return BB;
@@ -24019,7 +24239,7 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
   EVT SVT = VT.getScalarType();
-  EVT InVT = N0->getValueType(0);
+  EVT InVT = N0.getValueType();
   EVT InSVT = InVT.getScalarType();
   SDLoc DL(N);
 
@@ -24037,7 +24257,7 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
   }
 
   if (!DCI.isBeforeLegalizeOps()) {
-    if (N0.getValueType() == MVT::i1) {
+    if (InVT == MVT::i1) {
       SDValue Zero = DAG.getConstant(0, DL, VT);
       SDValue AllOnes =
         DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT);
@@ -24048,7 +24268,7 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
 
   if (VT.isVector()) {
     auto ExtendToVec128 = [&DAG](SDLoc DL, SDValue N) {
-      EVT InVT = N->getValueType(0);
+      EVT InVT = N.getValueType();
       EVT OutVT = EVT::getVectorVT(*DAG.getContext(), InVT.getScalarType(),
                                    128 / InVT.getScalarSizeInBits());
       SmallVector<SDValue, 8> Opnds(128 / InVT.getSizeInBits(),
@@ -24470,18 +24690,19 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
                                         const X86Subtarget *Subtarget) {
   // First try to optimize away the conversion entirely when it's
   // conditionally from a constant. Vectors only.
-  SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG);
-  if (Res != SDValue())
+  if (SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG))
     return Res;
 
   // Now move on to more general possibilities.
   SDValue Op0 = N->getOperand(0);
   EVT InVT = Op0->getValueType(0);
 
-  // SINT_TO_FP(v4i8) -> SINT_TO_FP(SEXT(v4i8 to v4i32))
-  if (InVT == MVT::v8i8 || InVT == MVT::v4i8) {
+  // SINT_TO_FP(vXi8) -> SINT_TO_FP(SEXT(vXi8 to vXi32))
+  // SINT_TO_FP(vXi16) -> SINT_TO_FP(SEXT(vXi16 to vXi32))
+  if (InVT == MVT::v8i8 || InVT == MVT::v4i8 ||
+      InVT == MVT::v8i16 || InVT == MVT::v4i16) {
     SDLoc dl(N);
-    MVT DstVT = InVT == MVT::v4i8 ? MVT::v4i32 : MVT::v8i32;
+    MVT DstVT = MVT::getVectorVT(MVT::i32, InVT.getVectorNumElements());
     SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0);
     return DAG.getNode(ISD::SINT_TO_FP, dl, N->getValueType(0), P);
   }
@@ -24490,7 +24711,7 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
   // a 32-bit target where SSE doesn't support i64->FP operations.
   if (Op0.getOpcode() == ISD::LOAD) {
     LoadSDNode *Ld = cast<LoadSDNode>(Op0.getNode());
-    EVT VT = Ld->getValueType(0);
+    EVT LdVT = Ld->getValueType(0);
 
     // This transformation is not supported if the result type is f16
     if (N->getValueType(0) == MVT::f16)
@@ -24498,9 +24719,9 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
 
     if (!Ld->isVolatile() && !N->getValueType(0).isVector() &&
         ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() &&
-        !Subtarget->is64Bit() && VT == MVT::i64) {
+        !Subtarget->is64Bit() && LdVT == MVT::i64) {
       SDValue FILDChain = Subtarget->getTargetLowering()->BuildFILD(
-          SDValue(N, 0), Ld->getValueType(0), Ld->getChain(), Op0, DAG);
+          SDValue(N, 0), LdVT, Ld->getChain(), Op0, DAG);
       DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), FILDChain.getValue(1));
       return FILDChain;
     }
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
index b5d062f..9c98333 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
@@ -218,7 +218,8 @@ namespace llvm {
       // Integer add/sub with signed saturation.
       ADDS,
       SUBS,
-
+      // Unsigned Integer average 
+      AVG,
       /// Integer horizontal add.
       HADD,
 
@@ -293,6 +294,9 @@ namespace llvm {
       // Vector FP round.
       VFPROUND,
 
+      // Vector signed integer to double.
+      CVTDQ2PD,
+
       // 128-bit vector logical left / right shift
       VSHLDQ, VSRLDQ,
 
@@ -417,6 +421,10 @@ namespace llvm {
       COMPRESS,
       EXPAND,
 
+      //Convert Unsigned/Integer to Scalar Floating-Point Value
+      //with rounding mode
+      SINT_TO_FP_RND,
+      UINT_TO_FP_RND,
       // Save xmm argument registers to the stack, according to %al. An operator
       // is needed so that this can be expanded with control flow.
       VASTART_SAVE_XMM_REGS,
@@ -508,7 +516,7 @@ namespace llvm {
       // have memop! In fact, starting from ATOMADD64_DAG all opcodes will be
       // thought as target memory ops!
     };
-  }
+  } // namespace X86ISD
 
   /// Define some predicates that are used for node matching.
   namespace X86 {
@@ -575,7 +583,7 @@ namespace llvm {
       TO_ZERO = 3,
       CUR_DIRECTION = 4
     };
-  }
+  } // namespace X86
 
   //===--------------------------------------------------------------------===//
   //  X86 Implementation of the TargetLowering interface
@@ -1112,6 +1120,6 @@ namespace llvm {
     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
                              const TargetLibraryInfo *libInfo);
   }
-}
+} // namespace llvm
 
 #endif    // X86ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
index c1d0aef..de6a835 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -1058,118 +1058,87 @@ def : Pat<(v8i64 (X86VPermilpi VR512:$src1, (i8 imm:$imm))),
           (VPERMILPDZri VR512:$src1, imm:$imm)>;
 
 // -- VPERM2I - 3 source operands form --
-multiclass avx512_perm_3src<bits<8> opc, string OpcodeStr, RegisterClass RC,
-                          PatFrag mem_frag, X86MemOperand x86memop,
-                          SDNode OpNode, ValueType OpVT, RegisterClass KRC> {
+multiclass avx512_perm_3src<bits<8> opc, string OpcodeStr,
+                            SDNode OpNode, X86VectorVTInfo _> {
 let Constraints = "$src1 = $dst" in {
-  def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
-                   (ins RC:$src1, RC:$src2, RC:$src3),
-                   !strconcat(OpcodeStr,
-                       "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-                   [(set RC:$dst,
-                     (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>,
-                    EVEX_4V;
-
-  def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
-                   (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3),
-                   !strconcat(OpcodeStr,
-                       "\t{$src3, $src2, $dst {${mask}}|"
-                       "$dst {${mask}}, $src2, $src3}"),
-                   [(set RC:$dst, (OpVT (vselect KRC:$mask,
-                                           (OpNode RC:$src1, RC:$src2,
-                                              RC:$src3),
-                                           RC:$src1)))]>,
-                    EVEX_4V, EVEX_K;
-
-  let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
-    def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
-                   (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3),
-                   !strconcat(OpcodeStr,
-                       "\t{$src3, $src2, $dst {${mask}} {z} |",
-                       "$dst {${mask}} {z}, $src2, $src3}"),
-                   [(set RC:$dst, (OpVT (vselect KRC:$mask,
-                                           (OpNode RC:$src1, RC:$src2,
-                                              RC:$src3),
-                                           (OpVT (bitconvert
-                                              (v16i32 immAllZerosV))))))]>,
-                    EVEX_4V, EVEX_KZ;
+  defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+          (ins _.RC:$src2, _.RC:$src3),
+          OpcodeStr, "$src3, $src2", "$src2, $src3",
+          (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>, EVEX_4V,
+         AVX5128IBase;
 
-  def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
-                   (ins RC:$src1, RC:$src2, x86memop:$src3),
-                   !strconcat(OpcodeStr,
-                    "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-                   [(set RC:$dst,
-                     (OpVT (OpNode RC:$src1, RC:$src2,
-                      (mem_frag addr:$src3))))]>, EVEX_4V;
+  let mayLoad = 1 in
+  defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+            (ins _.RC:$src2, _.MemOp:$src3),
+            OpcodeStr, "$src3, $src2", "$src2, $src3",
+            (_.VT (OpNode _.RC:$src1, _.RC:$src2,
+                   (_.VT (bitconvert (_.LdFrag addr:$src3)))))>,
+            EVEX_4V, AVX5128IBase;
+  }
+}
+multiclass avx512_perm_3src_mb<bits<8> opc, string OpcodeStr,
+                               SDNode OpNode, X86VectorVTInfo _> {
+  let mayLoad = 1, Constraints = "$src1 = $dst" in
+  defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+              (ins _.RC:$src2, _.ScalarMemOp:$src3),
+              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
+              !strconcat("$src2, ${src3}", _.BroadcastStr ),
+              (_.VT (OpNode _.RC:$src1,
+               _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))))>, 
+              AVX5128IBase, EVEX_4V, EVEX_B;
+}
 
-  def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
-                   (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3),
-                   !strconcat(OpcodeStr,
-                    "\t{$src3, $src2, $dst {${mask}}|"
-                    "$dst {${mask}}, $src2, $src3}"),
-                   [(set RC:$dst,
-                       (OpVT (vselect KRC:$mask,
-                                      (OpNode RC:$src1, RC:$src2,
-                                         (mem_frag addr:$src3)),
-                                      RC:$src1)))]>,
-                    EVEX_4V, EVEX_K;
-
-  let AddedComplexity = 10 in // Prefer over the rrkz variant
-    def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
-                   (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3),
-                   !strconcat(OpcodeStr,
-                    "\t{$src3, $src2, $dst {${mask}} {z}|"
-                    "$dst {${mask}} {z}, $src2, $src3}"),
-                   [(set RC:$dst,
-                     (OpVT (vselect KRC:$mask,
-                                    (OpNode RC:$src1, RC:$src2,
-                                            (mem_frag addr:$src3)),
-                                    (OpVT (bitconvert
-                                       (v16i32 immAllZerosV))))))]>,
-                    EVEX_4V, EVEX_KZ;
+multiclass avx512_perm_3src_sizes<bits<8> opc, string OpcodeStr,
+                                  SDNode OpNode, AVX512VLVectorVTInfo VTInfo> {
+  let Predicates = [HasAVX512] in
+  defm NAME: avx512_perm_3src<opc, OpcodeStr, OpNode, VTInfo.info512>, 
+            avx512_perm_3src_mb<opc, OpcodeStr, OpNode, VTInfo.info512>, EVEX_V512;
+  let Predicates = [HasVLX] in {
+  defm NAME#128: avx512_perm_3src<opc, OpcodeStr, OpNode, VTInfo.info128>, 
+                 avx512_perm_3src_mb<opc, OpcodeStr, OpNode, VTInfo.info128>,
+                 EVEX_V128;
+  defm NAME#256: avx512_perm_3src<opc, OpcodeStr, OpNode, VTInfo.info256>, 
+                 avx512_perm_3src_mb<opc, OpcodeStr, OpNode, VTInfo.info256>,
+                 EVEX_V256;
   }
 }
-defm VPERMI2D  : avx512_perm_3src<0x76, "vpermi2d",  VR512, loadv16i32,
-                                  i512mem, X86VPermiv3, v16i32, VK16WM>,
-                 EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPERMI2Q  : avx512_perm_3src<0x76, "vpermi2q",  VR512, loadv8i64,
-                                  i512mem, X86VPermiv3, v8i64, VK8WM>,
-                 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps",  VR512, loadv16f32,
-                                  i512mem, X86VPermiv3, v16f32, VK16WM>,
-                 EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd",  VR512, loadv8f64,
-                                  i512mem, X86VPermiv3, v8f64, VK8WM>,
-                  EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-
-multiclass avx512_perm_table_3src<bits<8> opc, string Suffix, RegisterClass RC,
-                          PatFrag mem_frag, X86MemOperand x86memop,
-                          SDNode OpNode, ValueType OpVT, RegisterClass KRC,
-                          ValueType MaskVT, RegisterClass MRC> :
-        avx512_perm_3src<opc, "vpermt2"##Suffix, RC, mem_frag, x86memop, OpNode,
-                         OpVT, KRC> {
-  def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512")
-                     VR512:$idx, VR512:$src1, VR512:$src2, -1)),
-            (!cast<Instruction>(NAME#rr) VR512:$src1, VR512:$idx, VR512:$src2)>;
-
-  def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512")
-                     VR512:$idx, VR512:$src1, VR512:$src2, MRC:$mask)),
-            (!cast<Instruction>(NAME#rrk) VR512:$src1,
-              (MaskVT (COPY_TO_REGCLASS MRC:$mask, KRC)), VR512:$idx, VR512:$src2)>;
-}
-
-defm VPERMT2D  : avx512_perm_table_3src<0x7E, "d",  VR512, loadv16i32, i512mem,
-                               X86VPermv3, v16i32, VK16WM, v16i1, GR16>,
-                 EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPERMT2Q  : avx512_perm_table_3src<0x7E, "q",  VR512, loadv8i64, i512mem,
-                               X86VPermv3, v8i64, VK8WM, v8i1, GR8>,
-                 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPERMT2PS : avx512_perm_table_3src<0x7F, "ps",  VR512, loadv16f32, i512mem,
-                               X86VPermv3, v16f32, VK16WM, v16i1, GR16>,
-                 EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPERMT2PD : avx512_perm_table_3src<0x7F, "pd",  VR512, loadv8f64, i512mem,
-                               X86VPermv3, v8f64, VK8WM, v8i1, GR8>,
-                 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+multiclass avx512_perm_3src_sizes_w<bits<8> opc, string OpcodeStr, 
+                                   SDNode OpNode, AVX512VLVectorVTInfo VTInfo> {
+  let Predicates = [HasBWI] in
+  defm NAME: avx512_perm_3src<opc, OpcodeStr, OpNode, VTInfo.info512>, 
+             avx512_perm_3src_mb<opc, OpcodeStr, OpNode, VTInfo.info512>,
+             EVEX_V512;
+  let Predicates = [HasBWI, HasVLX] in {
+  defm NAME#128: avx512_perm_3src<opc, OpcodeStr, OpNode, VTInfo.info128>, 
+                 avx512_perm_3src_mb<opc, OpcodeStr, OpNode, VTInfo.info128>,
+                 EVEX_V128;
+  defm NAME#256: avx512_perm_3src<opc, OpcodeStr, OpNode, VTInfo.info256>, 
+                 avx512_perm_3src_mb<opc, OpcodeStr, OpNode, VTInfo.info256>,
+                 EVEX_V256;
+  }
+}
+defm VPERMI2D  : avx512_perm_3src_sizes<0x76, "vpermi2d", X86VPermiv3,
+                                  avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
+defm VPERMI2Q  : avx512_perm_3src_sizes<0x76, "vpermi2q", X86VPermiv3,
+                                  avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPERMI2PS : avx512_perm_3src_sizes<0x77, "vpermi2ps", X86VPermiv3,
+                                  avx512vl_f32_info>, EVEX_CD8<32, CD8VF>;
+defm VPERMI2PD : avx512_perm_3src_sizes<0x77, "vpermi2pd", X86VPermiv3,
+                                  avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VPERMT2D  : avx512_perm_3src_sizes<0x7E, "vpermt2d", X86VPermv3,
+                                  avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
+defm VPERMT2Q  : avx512_perm_3src_sizes<0x7E, "vpermt2q", X86VPermv3,
+                                  avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPERMT2PS : avx512_perm_3src_sizes<0x7F, "vpermt2ps", X86VPermv3,
+                                  avx512vl_f32_info>, EVEX_CD8<32, CD8VF>;
+defm VPERMT2PD : avx512_perm_3src_sizes<0x7F, "vpermt2pd", X86VPermv3,
+                                  avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VPERMT2W  : avx512_perm_3src_sizes_w<0x7D, "vpermt2w", X86VPermv3,
+                                  avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
+defm VPERMI2W  : avx512_perm_3src_sizes_w<0x75, "vpermi2w", X86VPermiv3,
+                                  avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
 
 //===----------------------------------------------------------------------===//
 // AVX-512 - BLEND using mask
@@ -2044,11 +2013,11 @@ defm : avx512_binop_pat<xor,  KXORWrr>;
 def : Pat<(xor (xor VK16:$src1, VK16:$src2), (v16i1 immAllOnesV)),
           (KXNORWrr VK16:$src1, VK16:$src2)>;
 def : Pat<(xor (xor VK8:$src1, VK8:$src2), (v8i1 immAllOnesV)),
-          (KXNORBrr VK8:$src1, VK8:$src2)>;
+          (KXNORBrr VK8:$src1, VK8:$src2)>, Requires<[HasDQI]>;
 def : Pat<(xor (xor VK32:$src1, VK32:$src2), (v32i1 immAllOnesV)),
-          (KXNORDrr VK32:$src1, VK32:$src2)>;
+          (KXNORDrr VK32:$src1, VK32:$src2)>, Requires<[HasBWI]>;
 def : Pat<(xor (xor VK64:$src1, VK64:$src2), (v64i1 immAllOnesV)),
-          (KXNORQrr VK64:$src1, VK64:$src2)>;
+          (KXNORQrr VK64:$src1, VK64:$src2)>, Requires<[HasBWI]>;
 
 let Predicates = [NoDQI] in
 def : Pat<(xor (xor VK8:$src1, VK8:$src2), (v8i1 immAllOnesV)),
@@ -3157,7 +3126,8 @@ defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmull", mul,
                                    SSE_INTALU_ITINS_P, HasBWI, 1>;
 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmull", mul,
                                    SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD;
-
+defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
+                                    SSE_INTALU_ITINS_P, HasBWI, 1>;
                                    
 multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, OpndItins itins,
                             SDNode OpNode, bit IsCommutable = 0> {
@@ -3278,30 +3248,6 @@ defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminu", X86umin,
 defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", X86umin,
                                      SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
 
-def : Pat <(v16i32 (int_x86_avx512_mask_pmaxs_d_512 (v16i32 VR512:$src1),
-                    (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
-           (VPMAXSDZrr VR512:$src1, VR512:$src2)>;
-def : Pat <(v16i32 (int_x86_avx512_mask_pmaxu_d_512 (v16i32 VR512:$src1),
-                    (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
-           (VPMAXUDZrr VR512:$src1, VR512:$src2)>;
-def : Pat <(v8i64 (int_x86_avx512_mask_pmaxs_q_512 (v8i64 VR512:$src1),
-                (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
-           (VPMAXSQZrr VR512:$src1, VR512:$src2)>;
-def : Pat <(v8i64 (int_x86_avx512_mask_pmaxu_q_512 (v8i64 VR512:$src1),
-                (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
-           (VPMAXUQZrr VR512:$src1, VR512:$src2)>;
-def : Pat <(v16i32 (int_x86_avx512_mask_pmins_d_512 (v16i32 VR512:$src1),
-                    (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
-           (VPMINSDZrr VR512:$src1, VR512:$src2)>;
-def : Pat <(v16i32 (int_x86_avx512_mask_pminu_d_512 (v16i32 VR512:$src1),
-                    (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
-           (VPMINUDZrr VR512:$src1, VR512:$src2)>;
-def : Pat <(v8i64 (int_x86_avx512_mask_pmins_q_512 (v8i64 VR512:$src1),
-                (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
-           (VPMINSQZrr VR512:$src1, VR512:$src2)>;
-def : Pat <(v8i64 (int_x86_avx512_mask_pminu_q_512 (v8i64 VR512:$src1),
-                (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
-           (VPMINUQZrr VR512:$src1, VR512:$src2)>;
 //===----------------------------------------------------------------------===//
 // AVX-512 - Unpack Instructions
 //===----------------------------------------------------------------------===//
@@ -4191,29 +4137,72 @@ defm VFNMSUBSDZ  : avx512_fma3s_rm<0xAF, "vfnmsub213sd", X86Fnmsub, FR64X,
 // AVX-512  Scalar convert from sign integer to float/double
 //===----------------------------------------------------------------------===//
 
-multiclass avx512_vcvtsi<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
-                          X86MemOperand x86memop, string asm> {
-let hasSideEffects = 0 in {
-  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
+multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, RegisterClass SrcRC,
+                    X86VectorVTInfo DstVT, X86MemOperand x86memop,
+                    PatFrag ld_frag, string asm> {
+  let hasSideEffects = 0 in {
+    def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
+              (ins DstVT.FRC:$src1, SrcRC:$src),
               !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
               EVEX_4V;
-  let mayLoad = 1 in
-  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
-              (ins DstRC:$src1, x86memop:$src),
+    let mayLoad = 1 in
+      def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
+              (ins DstVT.FRC:$src1, x86memop:$src),
               !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
               EVEX_4V;
-} // hasSideEffects = 0
+  } // hasSideEffects = 0
+  let isCodeGenOnly = 1 in {
+    def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
+                  (ins DstVT.RC:$src1, SrcRC:$src2),
+                  !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                  [(set DstVT.RC:$dst,
+                        (OpNode (DstVT.VT DstVT.RC:$src1),
+                                 SrcRC:$src2,
+                                 (i32 FROUND_CURRENT)))]>, EVEX_4V;
+
+    def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
+                  (ins DstVT.RC:$src1, x86memop:$src2),
+                  !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                  [(set DstVT.RC:$dst,
+                        (OpNode (DstVT.VT DstVT.RC:$src1),
+                                 (ld_frag addr:$src2),
+                                 (i32 FROUND_CURRENT)))]>, EVEX_4V;
+  }//isCodeGenOnly = 1
+}
+
+multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, RegisterClass SrcRC,
+                    X86VectorVTInfo DstVT, string asm> {
+  def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
+              (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
+              !strconcat(asm,
+                  "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
+              [(set DstVT.RC:$dst,
+                    (OpNode (DstVT.VT DstVT.RC:$src1),
+                             SrcRC:$src2,
+                             (i32 imm:$rc)))]>, EVEX_4V, EVEX_B, EVEX_RC;
+}
+
+multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, RegisterClass SrcRC,
+                    X86VectorVTInfo DstVT, X86MemOperand x86memop,
+                    PatFrag ld_frag, string asm> {
+  defm NAME : avx512_vcvtsi_round<opc, OpNode, SrcRC, DstVT, asm>,
+              avx512_vcvtsi<opc, OpNode, SrcRC, DstVT, x86memop, ld_frag, asm>,
+                        VEX_LIG;
 }
 
 let Predicates = [HasAVX512] in {
-defm VCVTSI2SSZ   : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}">,
-                                  XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
-defm VCVTSI642SSZ : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}">,
-                                  XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
-defm VCVTSI2SDZ   : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}">,
-                                  XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
-defm VCVTSI642SDZ : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}">,
-                                  XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
+defm VCVTSI2SSZ  : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR32,
+                                 v4f32x_info, i32mem, loadi32, "cvtsi2ss{l}">,
+                                 XS, EVEX_CD8<32, CD8VT1>;
+defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR64,
+                                 v4f32x_info, i64mem, loadi64, "cvtsi2ss{q}">,
+                                 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VCVTSI2SDZ  : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR32,
+                                 v2f64x_info, i32mem, loadi32, "cvtsi2sd{l}">,
+                                 XD, EVEX_CD8<32, CD8VT1>;
+defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR64,
+                                 v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">,
+                                 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
 
 def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
           (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
@@ -4233,14 +4222,18 @@ def : Pat<(f64 (sint_to_fp GR32:$src)),
 def : Pat<(f64 (sint_to_fp GR64:$src)),
           (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
 
-defm VCVTUSI2SSZ   : avx512_vcvtsi<0x7B, GR32, FR32X, i32mem, "cvtusi2ss{l}">,
-                                  XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
-defm VCVTUSI642SSZ : avx512_vcvtsi<0x7B, GR64, FR32X, i64mem, "cvtusi2ss{q}">,
-                                  XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
-defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, GR32, FR64X, i32mem, "cvtusi2sd{l}">,
+defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86SuintToFpRnd, GR32,
+                                  v4f32x_info, i32mem, loadi32,
+                                  "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>;
+defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86SuintToFpRnd, GR64,
+                                  v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">,
+                                  XS, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, X86SuintToFpRnd, GR32, v2f64x_info,
+                                  i32mem, loadi32, "cvtusi2sd{l}">,
                                   XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
-defm VCVTUSI642SDZ : avx512_vcvtsi<0x7B, GR64, FR64X, i64mem, "cvtusi2sd{q}">,
-                                  XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
+defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86SuintToFpRnd, GR64,
+                                  v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">,
+                                  XD, VEX_W, EVEX_CD8<64, CD8VT1>;
 
 def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
           (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
@@ -4321,18 +4314,9 @@ let isCodeGenOnly = 1 in {
             int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}",
             SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
 
-  defm Int_VCVTUSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
-            int_x86_avx512_cvtusi2ss, i32mem, loadi32, "cvtusi2ss{l}",
-            SSE_CVT_Scalar, 0>, XS, EVEX_4V;
-  defm Int_VCVTUSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
-            int_x86_avx512_cvtusi642ss, i64mem, loadi64, "cvtusi2ss{q}",
-            SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
   defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
             int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}",
             SSE_CVT_Scalar, 0>, XD, EVEX_4V;
-  defm Int_VCVTUSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
-            int_x86_avx512_cvtusi642sd, i64mem, loadi64, "cvtusi2sd{q}",
-            SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
 } // isCodeGenOnly = 1
 
 // Convert float/double to signed/unsigned int 32/64 with truncation
diff --git a/contrib/llvm/lib/Target/X86/X86InstrBuilder.h b/contrib/llvm/lib/Target/X86/X86InstrBuilder.h
index 2056056..eb4dc48 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrBuilder.h
+++ b/contrib/llvm/lib/Target/X86/X86InstrBuilder.h
@@ -179,6 +179,6 @@ addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI,
     .addConstantPoolIndex(CPI, 0, OpFlags).addReg(0);
 }
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index dfe58ef..16ae77d 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -72,6 +72,9 @@ def X86comi    : SDNode<"X86ISD::COMI",      SDTX86CmpTest>;
 def X86ucomi   : SDNode<"X86ISD::UCOMI",     SDTX86CmpTest>;
 def X86cmps    : SDNode<"X86ISD::FSETCC",     SDTX86Cmps>;
 //def X86cmpsd   : SDNode<"X86ISD::FSETCCsd",    SDTX86Cmpsd>;
+def X86cvtdq2pd: SDNode<"X86ISD::CVTDQ2PD",
+                 SDTypeProfile<1, 1, [SDTCisVT<0, v2f64>,
+                                      SDTCisVT<1, v4i32>]>>;
 def X86pshufb  : SDNode<"X86ISD::PSHUFB",
                  SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
                                       SDTCisSameAs<0,2>]>>;
@@ -184,6 +187,7 @@ def X86addus   : SDNode<"X86ISD::ADDUS", SDTIntBinOp>;
 def X86subus   : SDNode<"X86ISD::SUBUS", SDTIntBinOp>;
 def X86adds    : SDNode<"X86ISD::ADDS", SDTIntBinOp>;
 def X86subs    : SDNode<"X86ISD::SUBS", SDTIntBinOp>;
+def X86avg     : SDNode<"X86ISD::AVG" , SDTIntBinOp>;
 def X86ptest   : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
 def X86testp   : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
 def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>;
@@ -350,6 +354,12 @@ def X86expand  : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 3,
                               [SDTCisSameAs<0, 3>,
                                SDTCisVec<3>, SDTCisVec<1>, SDTCisInt<1>]>, []>;
 
+def SDTintToFPRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisFP<0>,
+                               SDTCisSameAs<0,1>, SDTCisInt<2>, SDTCisInt<3>]>;
+
+def X86SintToFpRnd   : SDNode<"X86ISD::SINT_TO_FP_RND",  SDTintToFPRound>;
+def X86SuintToFpRnd  : SDNode<"X86ISD::UINT_TO_FP_RND",  SDTintToFPRound>;
+
 //===----------------------------------------------------------------------===//
 // SSE Complex Patterns
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
index 6b7a929..4aa0ae6 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -3456,11 +3456,11 @@ bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
   return !isPredicated(MI);
 }
 
-bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
-                                 MachineBasicBlock *&TBB,
-                                 MachineBasicBlock *&FBB,
-                                 SmallVectorImpl<MachineOperand> &Cond,
-                                 bool AllowModify) const {
+bool X86InstrInfo::AnalyzeBranchImpl(
+    MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
+    SmallVectorImpl<MachineOperand> &Cond,
+    SmallVectorImpl<MachineInstr *> &CondBranches, bool AllowModify) const {
+
   // Start from the bottom of the block and work up, examining the
   // terminator instructions.
   MachineBasicBlock::iterator I = MBB.end();
@@ -3558,6 +3558,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
       FBB = TBB;
       TBB = I->getOperand(0).getMBB();
       Cond.push_back(MachineOperand::CreateImm(BranchCode));
+      CondBranches.push_back(I);
       continue;
     }
 
@@ -3595,11 +3596,90 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
 
     // Update the MachineOperand.
     Cond[0].setImm(BranchCode);
+    CondBranches.push_back(I);
   }
 
   return false;
 }
 
+bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+                                 MachineBasicBlock *&TBB,
+                                 MachineBasicBlock *&FBB,
+                                 SmallVectorImpl<MachineOperand> &Cond,
+                                 bool AllowModify) const {
+  SmallVector<MachineInstr *, 4> CondBranches;
+  return AnalyzeBranchImpl(MBB, TBB, FBB, Cond, CondBranches, AllowModify);
+}
+
+bool X86InstrInfo::AnalyzeBranchPredicate(MachineBasicBlock &MBB,
+                                          MachineBranchPredicate &MBP,
+                                          bool AllowModify) const {
+  using namespace std::placeholders;
+
+  SmallVector<MachineOperand, 4> Cond;
+  SmallVector<MachineInstr *, 4> CondBranches;
+  if (AnalyzeBranchImpl(MBB, MBP.TrueDest, MBP.FalseDest, Cond, CondBranches,
+                        AllowModify))
+    return true;
+
+  if (Cond.size() != 1)
+    return true;
+
+  assert(MBP.TrueDest && "expected!");
+
+  if (!MBP.FalseDest)
+    MBP.FalseDest = MBB.getNextNode();
+
+  const TargetRegisterInfo *TRI = &getRegisterInfo();
+
+  MachineInstr *ConditionDef = nullptr;
+  bool SingleUseCondition = true;
+
+  for (auto I = std::next(MBB.rbegin()), E = MBB.rend(); I != E; ++I) {
+    if (I->modifiesRegister(X86::EFLAGS, TRI)) {
+      ConditionDef = &*I;
+      break;
+    }
+
+    if (I->readsRegister(X86::EFLAGS, TRI))
+      SingleUseCondition = false;
+  }
+
+  if (!ConditionDef)
+    return true;
+
+  if (SingleUseCondition) {
+    for (auto *Succ : MBB.successors())
+      if (Succ->isLiveIn(X86::EFLAGS))
+        SingleUseCondition = false;
+  }
+
+  MBP.ConditionDef = ConditionDef;
+  MBP.SingleUseCondition = SingleUseCondition;
+
+  // Currently we only recognize the simple pattern:
+  //
+  //   test %reg, %reg
+  //   je %label
+  //
+  const unsigned TestOpcode =
+      Subtarget.is64Bit() ? X86::TEST64rr : X86::TEST32rr;
+
+  if (ConditionDef->getOpcode() == TestOpcode &&
+      ConditionDef->getNumOperands() == 3 &&
+      ConditionDef->getOperand(0).isIdenticalTo(ConditionDef->getOperand(1)) &&
+      (Cond[0].getImm() == X86::COND_NE || Cond[0].getImm() == X86::COND_E)) {
+    MBP.LHS = ConditionDef->getOperand(0);
+    MBP.RHS = MachineOperand::CreateImm(0);
+    MBP.Predicate = Cond[0].getImm() == X86::COND_NE
+                        ? MachineBranchPredicate::PRED_NE
+                        : MachineBranchPredicate::PRED_EQ;
+    return false;
+  }
+
+  return true;
+}
+
 unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator I = MBB.end();
   unsigned Count = 0;
@@ -3622,8 +3702,7 @@ unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
 
 unsigned
 X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
-                           MachineBasicBlock *FBB,
-                           const SmallVectorImpl<MachineOperand> &Cond,
+                           MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
                            DebugLoc DL) const {
   // Shouldn't be a fall through.
   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
@@ -3671,7 +3750,7 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
 
 bool X86InstrInfo::
 canInsertSelect(const MachineBasicBlock &MBB,
-                const SmallVectorImpl<MachineOperand> &Cond,
+                ArrayRef<MachineOperand> Cond,
                 unsigned TrueReg, unsigned FalseReg,
                 int &CondCycles, int &TrueCycles, int &FalseCycles) const {
   // Not all subtargets have cmov instructions.
@@ -3708,8 +3787,7 @@ canInsertSelect(const MachineBasicBlock &MBB,
 
 void X86InstrInfo::insertSelect(MachineBasicBlock &MBB,
                                 MachineBasicBlock::iterator I, DebugLoc DL,
-                                unsigned DstReg,
-                                const SmallVectorImpl<MachineOperand> &Cond,
+                                unsigned DstReg, ArrayRef<MachineOperand> Cond,
                                 unsigned TrueReg, unsigned FalseReg) const {
    MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
    assert(Cond.size() == 1 && "Invalid Cond array");
@@ -3967,6 +4045,36 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
   }
 }
 
+bool X86InstrInfo::getMemOpBaseRegImmOfs(MachineInstr *MemOp, unsigned &BaseReg,
+                                         unsigned &Offset,
+                                         const TargetRegisterInfo *TRI) const {
+  const MCInstrDesc &Desc = MemOp->getDesc();
+  int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags, MemOp->getOpcode());
+  if (MemRefBegin < 0)
+    return false;
+
+  MemRefBegin += X86II::getOperandBias(Desc);
+
+  BaseReg = MemOp->getOperand(MemRefBegin + X86::AddrBaseReg).getReg();
+  if (MemOp->getOperand(MemRefBegin + X86::AddrScaleAmt).getImm() != 1)
+    return false;
+
+  if (MemOp->getOperand(MemRefBegin + X86::AddrIndexReg).getReg() !=
+      X86::NoRegister)
+    return false;
+
+  const MachineOperand &DispMO = MemOp->getOperand(MemRefBegin + X86::AddrDisp);
+
+  // Displacement can be symbolic
+  if (!DispMO.isImm())
+    return false;
+
+  Offset = DispMO.getImm();
+
+  return (MemOp->getOperand(MemRefBegin + X86::AddrIndexReg).getReg() ==
+          X86::NoRegister);
+}
+
 static unsigned getStoreRegOpcode(unsigned SrcReg,
                                   const TargetRegisterClass *RC,
                                   bool isStackAligned,
@@ -6219,13 +6327,217 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const {
 }
 
 bool X86InstrInfo::
-hasHighOperandLatency(const InstrItineraryData *ItinData,
+hasHighOperandLatency(const TargetSchedModel &SchedModel,
                       const MachineRegisterInfo *MRI,
                       const MachineInstr *DefMI, unsigned DefIdx,
                       const MachineInstr *UseMI, unsigned UseIdx) const {
   return isHighLatencyDef(DefMI->getOpcode());
 }
 
+/// If the input instruction is part of a chain of dependent ops that are
+/// suitable for reassociation, return the earlier instruction in the sequence
+/// that defines its first operand, otherwise return a nullptr.
+/// If the instruction's operands must be commuted to be considered a
+/// reassociation candidate, Commuted will be set to true.
+static MachineInstr *isReassocCandidate(const MachineInstr &Inst,
+                                        unsigned AssocOpcode,
+                                        bool checkPrevOneUse,
+                                        bool &Commuted) {
+  if (Inst.getOpcode() != AssocOpcode)
+    return nullptr;
+  
+  MachineOperand Op1 = Inst.getOperand(1);
+  MachineOperand Op2 = Inst.getOperand(2);
+  
+  const MachineBasicBlock *MBB = Inst.getParent();
+  const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+  // We need virtual register definitions.
+  MachineInstr *MI1 = nullptr;
+  MachineInstr *MI2 = nullptr;
+  if (Op1.isReg() && TargetRegisterInfo::isVirtualRegister(Op1.getReg()))
+    MI1 = MRI.getUniqueVRegDef(Op1.getReg());
+  if (Op2.isReg() && TargetRegisterInfo::isVirtualRegister(Op2.getReg()))
+    MI2 = MRI.getUniqueVRegDef(Op2.getReg());
+  
+  // And they need to be in the trace (otherwise, they won't have a depth).
+  if (!MI1 || !MI2 || MI1->getParent() != MBB || MI2->getParent() != MBB)
+    return nullptr;
+  
+  Commuted = false;
+  if (MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode) {
+    std::swap(MI1, MI2);
+    Commuted = true;
+  }
+
+  // Avoid reassociating operands when it won't provide any benefit. If both
+  // operands are produced by instructions of this type, we may already
+  // have the optimal sequence.
+  if (MI2->getOpcode() == AssocOpcode)
+    return nullptr;
+  
+  // The instruction must only be used by the other instruction that we
+  // reassociate with.
+  if (checkPrevOneUse && !MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg()))
+    return nullptr;
+  
+  // We must match a simple chain of dependent ops.
+  // TODO: This check is not necessary for the earliest instruction in the
+  // sequence. Instead of a sequence of 3 dependent instructions with the same
+  // opcode, we only need to find a sequence of 2 dependent instructions with
+  // the same opcode plus 1 other instruction that adds to the height of the
+  // trace.
+  if (MI1->getOpcode() != AssocOpcode)
+    return nullptr;
+  
+  return MI1;
+}
+
+/// Select a pattern based on how the operands of each associative operation
+/// need to be commuted.
+static MachineCombinerPattern::MC_PATTERN getPattern(bool CommutePrev,
+                                                     bool CommuteRoot) {
+  if (CommutePrev) {
+    if (CommuteRoot)
+      return MachineCombinerPattern::MC_REASSOC_XA_YB;
+    return MachineCombinerPattern::MC_REASSOC_XA_BY;
+  } else {
+    if (CommuteRoot)
+      return MachineCombinerPattern::MC_REASSOC_AX_YB;
+    return MachineCombinerPattern::MC_REASSOC_AX_BY;
+  }
+}
+
+bool X86InstrInfo::getMachineCombinerPatterns(MachineInstr &Root,
+        SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Patterns) const {
+  if (!Root.getParent()->getParent()->getTarget().Options.UnsafeFPMath)
+    return false;
+
+  // TODO: There are many more associative instruction types to match:
+  //       1. Other forms of scalar FP add (non-AVX)
+  //       2. Other data types (double, integer, vectors)
+  //       3. Other math / logic operations (mul, and, or)
+  unsigned AssocOpcode = X86::VADDSSrr;
+
+  // TODO: There is nothing x86-specific here except the instruction type.
+  // This logic could be hoisted into the machine combiner pass itself.
+  bool CommuteRoot;
+  if (MachineInstr *Prev = isReassocCandidate(Root, AssocOpcode, true,
+                                              CommuteRoot)) {
+    bool CommutePrev;
+    if (isReassocCandidate(*Prev, AssocOpcode, false, CommutePrev)) {
+      // We found a sequence of instructions that may be suitable for a
+      // reassociation of operands to increase ILP.
+      Patterns.push_back(getPattern(CommutePrev, CommuteRoot));
+      return true;
+    }
+  }
+  
+  return false;
+}
+
+/// Attempt the following reassociation to reduce critical path length:
+///   B = A op X (Prev)
+///   C = B op Y (Root)
+///   ===>
+///   B = X op Y
+///   C = A op B
+static void reassociateOps(MachineInstr &Root, MachineInstr &Prev,
+                           MachineCombinerPattern::MC_PATTERN Pattern,
+                           SmallVectorImpl<MachineInstr *> &InsInstrs,
+                           SmallVectorImpl<MachineInstr *> &DelInstrs,
+                           DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
+  MachineFunction *MF = Root.getParent()->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+  const TargetRegisterClass *RC = Root.getRegClassConstraint(0, TII, TRI);
+
+  // This array encodes the operand index for each parameter because the
+  // operands may be commuted. Each row corresponds to a pattern value,
+  // and each column specifies the index of A, B, X, Y.
+  unsigned OpIdx[4][4] = {
+    { 1, 1, 2, 2 },
+    { 1, 2, 2, 1 },
+    { 2, 1, 1, 2 },
+    { 2, 2, 1, 1 }
+  };
+
+  MachineOperand &OpA = Prev.getOperand(OpIdx[Pattern][0]);
+  MachineOperand &OpB = Root.getOperand(OpIdx[Pattern][1]);
+  MachineOperand &OpX = Prev.getOperand(OpIdx[Pattern][2]);
+  MachineOperand &OpY = Root.getOperand(OpIdx[Pattern][3]);
+  MachineOperand &OpC = Root.getOperand(0);
+  
+  unsigned RegA = OpA.getReg();
+  unsigned RegB = OpB.getReg();
+  unsigned RegX = OpX.getReg();
+  unsigned RegY = OpY.getReg();
+  unsigned RegC = OpC.getReg();
+
+  if (TargetRegisterInfo::isVirtualRegister(RegA))
+    MRI.constrainRegClass(RegA, RC);
+  if (TargetRegisterInfo::isVirtualRegister(RegB))
+    MRI.constrainRegClass(RegB, RC);
+  if (TargetRegisterInfo::isVirtualRegister(RegX))
+    MRI.constrainRegClass(RegX, RC);
+  if (TargetRegisterInfo::isVirtualRegister(RegY))
+    MRI.constrainRegClass(RegY, RC);
+  if (TargetRegisterInfo::isVirtualRegister(RegC))
+    MRI.constrainRegClass(RegC, RC);
+
+  // Create a new virtual register for the result of (X op Y) instead of
+  // recycling RegB because the MachineCombiner's computation of the critical
+  // path requires a new register definition rather than an existing one.
+  unsigned NewVR = MRI.createVirtualRegister(RC);
+  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
+
+  unsigned Opcode = Root.getOpcode();
+  bool KillA = OpA.isKill();
+  bool KillX = OpX.isKill();
+  bool KillY = OpY.isKill();
+
+  // Create new instructions for insertion.
+  MachineInstrBuilder MIB1 =
+    BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR)
+      .addReg(RegX, getKillRegState(KillX))
+      .addReg(RegY, getKillRegState(KillY));
+  InsInstrs.push_back(MIB1);
+  
+  MachineInstrBuilder MIB2 =
+    BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC)
+      .addReg(RegA, getKillRegState(KillA))
+      .addReg(NewVR, getKillRegState(true));
+  InsInstrs.push_back(MIB2);
+
+  // Record old instructions for deletion.
+  DelInstrs.push_back(&Prev);
+  DelInstrs.push_back(&Root);
+}
+
+void X86InstrInfo::genAlternativeCodeSequence(
+    MachineInstr &Root,
+    MachineCombinerPattern::MC_PATTERN Pattern,
+    SmallVectorImpl<MachineInstr *> &InsInstrs,
+    SmallVectorImpl<MachineInstr *> &DelInstrs,
+    DenseMap<unsigned, unsigned> &InstIdxForVirtReg) const {
+  MachineRegisterInfo &MRI = Root.getParent()->getParent()->getRegInfo();
+
+  // Select the previous instruction in the sequence based on the input pattern.
+  MachineInstr *Prev = nullptr;
+  if (Pattern == MachineCombinerPattern::MC_REASSOC_AX_BY ||
+      Pattern == MachineCombinerPattern::MC_REASSOC_XA_BY)
+    Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
+  else if (Pattern == MachineCombinerPattern::MC_REASSOC_AX_YB ||
+           Pattern == MachineCombinerPattern::MC_REASSOC_XA_YB)
+    Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
+  else
+    llvm_unreachable("Unknown pattern for machine combiner");
+  
+  reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg);
+  return;
+}
+
 namespace {
   /// Create Global Base Reg pass. This initializes the PIC
   /// global base register for x86-32.
@@ -6292,7 +6604,7 @@ namespace {
       MachineFunctionPass::getAnalysisUsage(AU);
     }
   };
-}
+} // namespace
 
 char CGBR::ID = 0;
 FunctionPass*
@@ -6404,7 +6716,7 @@ namespace {
       MachineFunctionPass::getAnalysisUsage(AU);
     }
   };
-}
+} // namespace
 
 char LDTLSCleanup::ID = 0;
 FunctionPass*
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.h b/contrib/llvm/lib/Target/X86/X86InstrInfo.h
index ac1b2d4..4912951 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.h
@@ -26,6 +26,19 @@ namespace llvm {
   class X86RegisterInfo;
   class X86Subtarget;
 
+  namespace MachineCombinerPattern {
+    enum MC_PATTERN : int {
+      // These are commutative variants for reassociating a computation chain
+      // of the form:
+      //   B = A op X (Prev)
+      //   C = B op Y (Root)
+      MC_REASSOC_AX_BY = 0,
+      MC_REASSOC_AX_YB = 1,
+      MC_REASSOC_XA_BY = 2,
+      MC_REASSOC_XA_YB = 3,
+    };
+  } // end namespace MachineCombinerPattern
+
 namespace X86 {
   // X86 specific condition code. These correspond to X86_*_COND in
   // X86InstrInfo.td. They must be kept in synch.
@@ -77,7 +90,7 @@ namespace X86 {
   /// GetOppositeBranchCondition - Return the inverse of the specified cond,
   /// e.g. turning COND_E to COND_NE.
   CondCode GetOppositeBranchCondition(CondCode CC);
-}  // end namespace X86;
+} // namespace X86
 
 
 /// isGlobalStubReference - Return true if the specified TargetFlag operand is
@@ -166,6 +179,12 @@ class X86InstrInfo final : public X86GenInstrInfo {
 
   virtual void anchor();
 
+  bool AnalyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                         MachineBasicBlock *&FBB,
+                         SmallVectorImpl<MachineOperand> &Cond,
+                         SmallVectorImpl<MachineInstr *> &CondBranches,
+                         bool AllowModify) const;
+
 public:
   explicit X86InstrInfo(X86Subtarget &STI);
 
@@ -254,18 +273,23 @@ public:
                      MachineBasicBlock *&FBB,
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify) const override;
+
+  bool getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
+                             unsigned &Offset,
+                             const TargetRegisterInfo *TRI) const override;
+  bool AnalyzeBranchPredicate(MachineBasicBlock &MBB,
+                              TargetInstrInfo::MachineBranchPredicate &MBP,
+                              bool AllowModify = false) const override;
+
   unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
   unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
-                        MachineBasicBlock *FBB,
-                        const SmallVectorImpl<MachineOperand> &Cond,
+                        MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
                         DebugLoc DL) const override;
-  bool canInsertSelect(const MachineBasicBlock&,
-                       const SmallVectorImpl<MachineOperand> &Cond,
+  bool canInsertSelect(const MachineBasicBlock&, ArrayRef<MachineOperand> Cond,
                        unsigned, unsigned, int&, int&, int&) const override;
   void insertSelect(MachineBasicBlock &MBB,
                     MachineBasicBlock::iterator MI, DebugLoc DL,
-                    unsigned DstReg,
-                    const SmallVectorImpl<MachineOperand> &Cond,
+                    unsigned DstReg, ArrayRef<MachineOperand> Cond,
                     unsigned TrueReg, unsigned FalseReg) const override;
   void copyPhysReg(MachineBasicBlock &MBB,
                    MachineBasicBlock::iterator MI, DebugLoc DL,
@@ -423,12 +447,32 @@ public:
 
   bool isHighLatencyDef(int opc) const override;
 
-  bool hasHighOperandLatency(const InstrItineraryData *ItinData,
+  bool hasHighOperandLatency(const TargetSchedModel &SchedModel,
                              const MachineRegisterInfo *MRI,
                              const MachineInstr *DefMI, unsigned DefIdx,
                              const MachineInstr *UseMI,
                              unsigned UseIdx) const override;
 
+  
+  bool useMachineCombiner() const override {
+    return true;
+  }
+  
+  /// Return true when there is potentially a faster code sequence
+  /// for an instruction chain ending in <Root>. All potential patterns are
+  /// output in the <Pattern> array.
+  bool getMachineCombinerPatterns(
+      MachineInstr &Root,
+      SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &P) const override;
+  
+  /// When getMachineCombinerPatterns() finds a pattern, this function generates
+  /// the instructions that could replace the original code sequence.
+  void genAlternativeCodeSequence(
+          MachineInstr &Root, MachineCombinerPattern::MC_PATTERN P,
+          SmallVectorImpl<MachineInstr *> &InsInstrs,
+          SmallVectorImpl<MachineInstr *> &DelInstrs,
+          DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const override;
+
   /// analyzeCompare - For a comparison instruction, return the source registers
   /// in SrcReg and SrcReg2 if having two register operands, and the value it
   /// compares against in CmpValue. Return true if the comparison instruction
@@ -468,6 +512,6 @@ private:
                       int &FrameIndex) const;
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
index 8294e38..9562918 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
@@ -2234,14 +2234,27 @@ def CVTDQ2PDrr  : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))],
                        IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtI2F]>;
 
-// AVX 256-bit register conversion intrinsics
+// AVX register conversion intrinsics
 let Predicates = [HasAVX] in {
+  def : Pat<(v2f64 (X86cvtdq2pd (v4i32 VR128:$src))),
+            (VCVTDQ2PDrr VR128:$src)>;
+  def : Pat<(v2f64 (X86cvtdq2pd (bc_v4i32 (loadv2i64 addr:$src)))),
+            (VCVTDQ2PDrm addr:$src)>;
+
   def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))),
             (VCVTDQ2PDYrr VR128:$src)>;
   def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))),
             (VCVTDQ2PDYrm addr:$src)>;
 } // Predicates = [HasAVX]
 
+// SSE2 register conversion intrinsics
+let Predicates = [HasSSE2] in {
+  def : Pat<(v2f64 (X86cvtdq2pd (v4i32 VR128:$src))),
+            (CVTDQ2PDrr VR128:$src)>;
+  def : Pat<(v2f64 (X86cvtdq2pd (bc_v4i32 (loadv2i64 addr:$src)))),
+            (CVTDQ2PDrm addr:$src)>;
+} // Predicates = [HasSSE2]
+
 // Convert packed double to packed single
 // The assembler can recognize rr 256-bit instructions by seeing a ymm
 // register, but the same isn't true when using memory operands instead.
diff --git a/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 0268066..2b82930 100644
--- a/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -242,6 +242,13 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
   X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
   X86_INTRINSIC_DATA(avx2_vperm2i128, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
+  X86_INTRINSIC_DATA(avx512_cvtsi2sd32,  INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvtsi2sd64,  INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvtsi2ss32,  INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvtsi2ss64,  INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvtusi2ss,   INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvtusi642sd, INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0),
   X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
   X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
   X86_INTRINSIC_DATA(avx512_mask_add_pd_128, INTR_TYPE_2OP_MASK, ISD::FADD, 0),
@@ -469,6 +476,12 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_pandn_q_128, INTR_TYPE_2OP_MASK, X86ISD::ANDNP, 0),
   X86_INTRINSIC_DATA(avx512_mask_pandn_q_256, INTR_TYPE_2OP_MASK, X86ISD::ANDNP, 0),
   X86_INTRINSIC_DATA(avx512_mask_pandn_q_512, INTR_TYPE_2OP_MASK, X86ISD::ANDNP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pavg_b_128, INTR_TYPE_2OP_MASK, X86ISD::AVG, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pavg_b_256, INTR_TYPE_2OP_MASK, X86ISD::AVG, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pavg_b_512, INTR_TYPE_2OP_MASK, X86ISD::AVG, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pavg_w_128, INTR_TYPE_2OP_MASK, X86ISD::AVG, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pavg_w_256, INTR_TYPE_2OP_MASK, X86ISD::AVG, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pavg_w_512, INTR_TYPE_2OP_MASK, X86ISD::AVG, 0),
   X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_128,  CMP_MASK,  X86ISD::PCMPEQM, 0),
   X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_256,  CMP_MASK,  X86ISD::PCMPEQM, 0),
   X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_512,  CMP_MASK,  X86ISD::PCMPEQM, 0),
@@ -493,6 +506,54 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_128,  CMP_MASK,  X86ISD::PCMPGTM, 0),
   X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_256,  CMP_MASK,  X86ISD::PCMPGTM, 0),
   X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_512,  CMP_MASK,  X86ISD::PCMPGTM, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_128, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_256, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_512, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxs_d_128, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxs_d_256, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxs_d_512, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxs_q_128, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxs_q_256, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxs_q_512, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxs_w_128, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxs_w_256, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxs_w_512, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxu_b_128, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxu_b_256, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxu_b_512, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxu_d_128, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxu_d_256, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxu_d_512, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxu_q_128, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxu_q_256, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxu_q_512, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxu_w_128, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxu_w_256, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxu_w_512, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmins_b_128, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmins_b_256, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmins_b_512, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmins_d_128, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmins_d_256, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmins_d_512, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmins_q_128, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmins_q_256, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmins_q_512, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmins_w_128, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmins_w_256, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmins_w_512, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pminu_b_128, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pminu_b_256, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pminu_b_512, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pminu_d_128, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pminu_d_256, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pminu_d_512, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pminu_q_128, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pminu_q_256, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pminu_q_512, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pminu_w_128, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pminu_w_256, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pminu_w_512, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmul_dq_128, INTR_TYPE_2OP_MASK,
                      X86ISD::PMULDQ, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmul_dq_256, INTR_TYPE_2OP_MASK,
diff --git a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
index ff1436a..64135e0 100644
--- a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -17,6 +17,7 @@
 #include "InstPrinter/X86ATTInstPrinter.h"
 #include "MCTargetDesc/X86BaseInfo.h"
 #include "Utils/X86ShuffleDecode.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
@@ -50,6 +51,8 @@ class X86MCInstLower {
 public:
   X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter);
 
+  Optional<MCOperand> LowerMachineOperand(const MachineInstr *MI,
+                                          const MachineOperand &MO) const;
   void Lower(const MachineInstr *MI, MCInst &OutMI) const;
 
   MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const;
@@ -109,7 +112,7 @@ namespace llvm {
     OutStreamer->EmitInstruction(Inst, getSubtargetInfo());
     SMShadowTracker.count(Inst, getSubtargetInfo());
   }
-} // end llvm namespace
+} // namespace llvm
 
 X86MCInstLower::X86MCInstLower(const MachineFunction &mf,
                                X86AsmPrinter &asmprinter)
@@ -402,47 +405,43 @@ static unsigned getRetOpcode(const X86Subtarget &Subtarget) {
   return Subtarget.is64Bit() ? X86::RETQ : X86::RETL;
 }
 
+Optional<MCOperand>
+X86MCInstLower::LowerMachineOperand(const MachineInstr *MI,
+                                    const MachineOperand &MO) const {
+  switch (MO.getType()) {
+  default:
+    MI->dump();
+    llvm_unreachable("unknown operand type");
+  case MachineOperand::MO_Register:
+    // Ignore all implicit register operands.
+    if (MO.isImplicit())
+      return None;
+    return MCOperand::createReg(MO.getReg());
+  case MachineOperand::MO_Immediate:
+    return MCOperand::createImm(MO.getImm());
+  case MachineOperand::MO_MachineBasicBlock:
+  case MachineOperand::MO_GlobalAddress:
+  case MachineOperand::MO_ExternalSymbol:
+    return LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
+  case MachineOperand::MO_JumpTableIndex:
+    return LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()));
+  case MachineOperand::MO_ConstantPoolIndex:
+    return LowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex()));
+  case MachineOperand::MO_BlockAddress:
+    return LowerSymbolOperand(
+        MO, AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()));
+  case MachineOperand::MO_RegisterMask:
+    // Ignore call clobbers.
+    return None;
+  }
+}
+
 void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
   OutMI.setOpcode(MI->getOpcode());
 
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
-
-    MCOperand MCOp;
-    switch (MO.getType()) {
-    default:
-      MI->dump();
-      llvm_unreachable("unknown operand type");
-    case MachineOperand::MO_Register:
-      // Ignore all implicit register operands.
-      if (MO.isImplicit()) continue;
-      MCOp = MCOperand::createReg(MO.getReg());
-      break;
-    case MachineOperand::MO_Immediate:
-      MCOp = MCOperand::createImm(MO.getImm());
-      break;
-    case MachineOperand::MO_MachineBasicBlock:
-    case MachineOperand::MO_GlobalAddress:
-    case MachineOperand::MO_ExternalSymbol:
-      MCOp = LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
-      break;
-    case MachineOperand::MO_JumpTableIndex:
-      MCOp = LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()));
-      break;
-    case MachineOperand::MO_ConstantPoolIndex:
-      MCOp = LowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex()));
-      break;
-    case MachineOperand::MO_BlockAddress:
-      MCOp = LowerSymbolOperand(MO,
-                     AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()));
-      break;
-    case MachineOperand::MO_RegisterMask:
-      // Ignore call clobbers.
-      continue;
-    }
-
-    OutMI.addOperand(MCOp);
-  }
+  for (const MachineOperand &MO : MI->operands())
+    if (auto MaybeMCOp = LowerMachineOperand(MI, MO))
+      OutMI.addOperand(MaybeMCOp.getValue());
 
   // Handle a few special cases to eliminate operand modifiers.
 ReSimplify:
@@ -865,6 +864,28 @@ void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
   SM.recordStatepoint(MI);
 }
 
+void X86AsmPrinter::LowerFAULTING_LOAD_OP(const MachineInstr &MI,
+                                       X86MCInstLower &MCIL) {
+  // FAULTING_LOAD_OP <def>, <handler label>, <load opcode>, <load operands>
+
+  unsigned LoadDefRegister = MI.getOperand(0).getReg();
+  MCSymbol *HandlerLabel = MI.getOperand(1).getMCSymbol();
+  unsigned LoadOpcode = MI.getOperand(2).getImm();
+  unsigned LoadOperandsBeginIdx = 3;
+
+  FM.recordFaultingOp(FaultMaps::FaultingLoad, HandlerLabel);
+
+  MCInst LoadMI;
+  LoadMI.setOpcode(LoadOpcode);
+  LoadMI.addOperand(MCOperand::createReg(LoadDefRegister));
+  for (auto I = MI.operands_begin() + LoadOperandsBeginIdx,
+            E = MI.operands_end();
+       I != E; ++I)
+    if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, *I))
+      LoadMI.addOperand(MaybeOperand.getValue());
+
+  OutStreamer->EmitInstruction(LoadMI, getSubtargetInfo());
+}
 
 // Lower a stackmap of the form:
 // <id>, <shadowBytes>, ...
@@ -1120,6 +1141,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   case TargetOpcode::STATEPOINT:
     return LowerSTATEPOINT(*MI, MCInstLowering);
 
+  case TargetOpcode::FAULTING_LOAD_OP:
+    return LowerFAULTING_LOAD_OP(*MI, MCInstLowering);
+
   case TargetOpcode::STACKMAP:
     return LowerSTACKMAP(*MI);
 
diff --git a/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h
index d598b55..342d26a 100644
--- a/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h
@@ -179,6 +179,6 @@ public:
   }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/X86/X86PadShortFunction.cpp b/contrib/llvm/lib/Target/X86/X86PadShortFunction.cpp
index 143e70b..33aa78f 100644
--- a/contrib/llvm/lib/Target/X86/X86PadShortFunction.cpp
+++ b/contrib/llvm/lib/Target/X86/X86PadShortFunction.cpp
@@ -84,7 +84,7 @@ namespace {
   };
 
   char PadShortFunc::ID = 0;
-}
+} // namespace
 
 FunctionPass *llvm::createX86PadShortFunctions() {
   return new PadShortFunc();
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
index e9b6bfc..00e2134 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -419,6 +419,22 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   return Reserved;
 }
 
+void X86RegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const {
+  // Check if the EFLAGS register is marked as live-out. This shouldn't happen,
+  // because the calling convention defines the EFLAGS register as NOT
+  // preserved.
+  //
+  // Unfortunatelly the EFLAGS show up as live-out after branch folding. Adding
+  // an assert to track this and clear the register afterwards to avoid
+  // unnecessary crashes during release builds.
+  assert(!(Mask[X86::EFLAGS / 32] & (1U << (X86::EFLAGS % 32))) &&
+         "EFLAGS are not live-out from a patchpoint.");
+
+  // Also clean other registers that don't need preserving (IP).
+  for (auto Reg : {X86::EFLAGS, X86::RIP, X86::EIP, X86::IP})
+    Mask[Reg / 32] &= ~(1U << (Reg % 32));
+}
+
 //===----------------------------------------------------------------------===//
 // Stack Frame Processing methods
 //===----------------------------------------------------------------------===//
@@ -765,4 +781,4 @@ unsigned get512BitSuperRegister(unsigned Reg) {
   llvm_unreachable("Unexpected SIMD register");
 }
 
-}
+} // namespace llvm
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
index a714c2a..459ecf7 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -104,6 +104,8 @@ public:
   /// register scavenger to determine what registers are free.
   BitVector getReservedRegs(const MachineFunction &MF) const override;
 
+  void adjustStackMapLiveOutMask(uint32_t *Mask) const override;
+
   bool hasBasePointer(const MachineFunction &MF) const;
 
   bool canRealignStack(const MachineFunction &MF) const;
@@ -134,6 +136,6 @@ unsigned getX86SubSuperRegister(unsigned, MVT::SimpleValueType, bool High=false)
 //get512BitRegister - X86 utility - returns 512-bit super register
 unsigned get512BitSuperRegister(unsigned Reg);
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.h b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.h
index eb7e0ed..25606d3 100644
--- a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.h
@@ -48,6 +48,6 @@ public:
                                   MachinePointerInfo SrcPtrInfo) const override;
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
index 74af29f..3b25d30 100644
--- a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -287,7 +287,7 @@ X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU,
   return *this;
 }
 
-X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
+X86Subtarget::X86Subtarget(const Triple &TT, const std::string &CPU,
                            const std::string &FS, const X86TargetMachine &TM,
                            unsigned StackAlignOverride)
     : X86GenSubtargetInfo(TT, CPU, FS), X86ProcFamily(Others),
@@ -300,8 +300,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
                   TargetTriple.getEnvironment() == Triple::CODE16),
       TSInfo(*TM.getDataLayout()),
       InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
-      FrameLowering(TargetFrameLowering::StackGrowsDown, getStackAlignment(),
-                    is64Bit() ? -8 : -4) {
+      FrameLowering(*this, getStackAlignment()) {
   // Determine the PICStyle based on the target selected.
   if (TM.getRelocationModel() == Reloc::Static) {
     // Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None.
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm/lib/Target/X86/X86Subtarget.h
index a476f7a..6934061 100644
--- a/contrib/llvm/lib/Target/X86/X86Subtarget.h
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.h
@@ -253,9 +253,8 @@ public:
   /// This constructor initializes the data members to match that
   /// of the specified triple.
   ///
-  X86Subtarget(const std::string &TT, const std::string &CPU,
-               const std::string &FS, const X86TargetMachine &TM,
-               unsigned StackAlignOverride);
+  X86Subtarget(const Triple &TT, const std::string &CPU, const std::string &FS,
+               const X86TargetMachine &TM, unsigned StackAlignOverride);
 
   const X86TargetLowering *getTargetLowering() const override {
     return &TLInfo;
@@ -491,6 +490,6 @@ public:
   }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
index 646cff7..3d6eb4f7 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -24,6 +24,10 @@
 #include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
+static cl::opt<bool> EnableMachineCombinerPass("x86-machine-combiner",
+                               cl::desc("Enable the machine combiner pass"),
+                               cl::init(true), cl::Hidden);
+
 extern "C" void LLVMInitializeX86Target() {
   // Register the target.
   RegisterTargetMachine<X86TargetMachine> X(TheX86_32Target);
@@ -90,13 +94,14 @@ static std::string computeDataLayout(const Triple &TT) {
 
 /// X86TargetMachine ctor - Create an X86 target.
 ///
-X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, StringRef CPU,
-                                   StringRef FS, const TargetOptions &Options,
+X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT,
+                                   StringRef CPU, StringRef FS,
+                                   const TargetOptions &Options,
                                    Reloc::Model RM, CodeModel::Model CM,
                                    CodeGenOpt::Level OL)
-    : LLVMTargetMachine(T, computeDataLayout(Triple(TT)), TT, CPU, FS, Options,
-                        RM, CM, OL),
-      TLOF(createTLOF(Triple(getTargetTriple()))),
+    : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, RM, CM,
+                        OL),
+      TLOF(createTLOF(getTargetTriple())),
       Subtarget(TT, CPU, FS, *this, Options.StackAlignmentOverride) {
   // Windows stack unwinder gets confused when execution flow "falls through"
   // after a call to 'noreturn' function.
@@ -213,7 +218,7 @@ bool X86PassConfig::addInstSelector() {
   addPass(createX86ISelDag(getX86TargetMachine(), getOptLevel()));
 
   // For ELF, cleanup any local-dynamic TLS accesses.
-  if (Triple(TM->getTargetTriple()).isOSBinFormatELF() &&
+  if (TM->getTargetTriple().isOSBinFormatELF() &&
       getOptLevel() != CodeGenOpt::None)
     addPass(createCleanupLocalDynamicTLSPass());
 
@@ -224,12 +229,14 @@ bool X86PassConfig::addInstSelector() {
 
 bool X86PassConfig::addILPOpts() {
   addPass(&EarlyIfConverterID);
+  if (EnableMachineCombinerPass)
+    addPass(&MachineCombinerID);
   return true;
 }
 
 bool X86PassConfig::addPreISel() {
   // Only add this pass for 32-bit x86 Windows.
-  Triple TT(TM->getTargetTriple());
+  const Triple &TT = TM->getTargetTriple();
   if (TT.isOSWindows() && TT.getArch() == Triple::x86)
     addPass(createX86WinEHStatePass());
   return true;
diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.h b/contrib/llvm/lib/Target/X86/X86TargetMachine.h
index c9833ed..be56888 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetMachine.h
+++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.h
@@ -29,8 +29,8 @@ class X86TargetMachine final : public LLVMTargetMachine {
   mutable StringMap<std::unique_ptr<X86Subtarget>> SubtargetMap;
 
 public:
-  X86TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
-                   const TargetOptions &Options, Reloc::Model RM,
+  X86TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+                   StringRef FS, const TargetOptions &Options, Reloc::Model RM,
                    CodeModel::Model CM, CodeGenOpt::Level OL);
   ~X86TargetMachine() override;
   const X86Subtarget *getSubtargetImpl(const Function &F) const override;
@@ -44,6 +44,6 @@ public:
   }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index bbfeba8..13384fa 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -153,13 +153,13 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
     { ISD::SHL,     MVT::v4i64,    1 },
     { ISD::SRL,     MVT::v4i64,    1 },
 
-    { ISD::SHL,  MVT::v32i8,      42 }, // cmpeqb sequence.
+    { ISD::SHL,  MVT::v32i8,      11 }, // vpblendvb sequence.
     { ISD::SHL,  MVT::v16i16,     10 }, // extend/vpsrlvd/pack sequence.
 
-    { ISD::SRL,  MVT::v32i8,   32*10 }, // Scalarized.
+    { ISD::SRL,  MVT::v32i8,      11 }, // vpblendvb sequence.
     { ISD::SRL,  MVT::v16i16,     10 }, // extend/vpsrlvd/pack sequence.
 
-    { ISD::SRA,  MVT::v32i8,   32*10 }, // Scalarized.
+    { ISD::SRA,  MVT::v32i8,      24 }, // vpblendvb sequence.
     { ISD::SRA,  MVT::v16i16,     10 }, // extend/vpsravd/pack sequence.
     { ISD::SRA,  MVT::v4i64,    4*10 }, // Scalarized.
 
@@ -253,19 +253,19 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
     // to ISel. The cost model must return worst case assumptions because it is
     // used for vectorization and we don't want to make vectorized code worse
     // than scalar code.
-    { ISD::SHL,  MVT::v16i8,  30 }, // cmpeqb sequence.
-    { ISD::SHL,  MVT::v8i16,  8*10 }, // Scalarized.
-    { ISD::SHL,  MVT::v4i32,  2*5 }, // We optimized this using mul.
+    { ISD::SHL,  MVT::v16i8,    26 }, // cmpgtb sequence.
+    { ISD::SHL,  MVT::v8i16,    32 }, // cmpgtb sequence.
+    { ISD::SHL,  MVT::v4i32,   2*5 }, // We optimized this using mul.
     { ISD::SHL,  MVT::v2i64,  2*10 }, // Scalarized.
     { ISD::SHL,  MVT::v4i64,  4*10 }, // Scalarized.
 
-    { ISD::SRL,  MVT::v16i8,  16*10 }, // Scalarized.
-    { ISD::SRL,  MVT::v8i16,  8*10 }, // Scalarized.
+    { ISD::SRL,  MVT::v16i8,    26 }, // cmpgtb sequence.
+    { ISD::SRL,  MVT::v8i16,    32 }, // cmpgtb sequence.
     { ISD::SRL,  MVT::v4i32,  4*10 }, // Scalarized.
     { ISD::SRL,  MVT::v2i64,  2*10 }, // Scalarized.
 
-    { ISD::SRA,  MVT::v16i8,  16*10 }, // Scalarized.
-    { ISD::SRA,  MVT::v8i16,  8*10 }, // Scalarized.
+    { ISD::SRA,  MVT::v16i8,    54 }, // unpacked cmpgtb sequence.
+    { ISD::SRA,  MVT::v8i16,    32 }, // cmpgtb sequence.
     { ISD::SRA,  MVT::v4i32,  4*10 }, // Scalarized.
     { ISD::SRA,  MVT::v2i64,  2*10 }, // Scalarized.
 
diff --git a/contrib/llvm/lib/Target/X86/X86VZeroUpper.cpp b/contrib/llvm/lib/Target/X86/X86VZeroUpper.cpp
index 6925b27..71ce45b 100644
--- a/contrib/llvm/lib/Target/X86/X86VZeroUpper.cpp
+++ b/contrib/llvm/lib/Target/X86/X86VZeroUpper.cpp
@@ -86,7 +86,7 @@ namespace {
   };
 
   char VZeroUpperInserter::ID = 0;
-}
+} // namespace
 
 FunctionPass *llvm::createX86IssueVZeroUpperPass() {
   return new VZeroUpperInserter();
diff --git a/contrib/llvm/lib/Target/X86/X86WinEHState.cpp b/contrib/llvm/lib/Target/X86/X86WinEHState.cpp
index ce69ea7..c9e8094 100644
--- a/contrib/llvm/lib/Target/X86/X86WinEHState.cpp
+++ b/contrib/llvm/lib/Target/X86/X86WinEHState.cpp
@@ -60,9 +60,10 @@ public:
 private:
   void emitExceptionRegistrationRecord(Function *F);
 
-  void linkExceptionRegistration(IRBuilder<> &Builder, Value *Handler);
+  void linkExceptionRegistration(IRBuilder<> &Builder, Function *Handler);
   void unlinkExceptionRegistration(IRBuilder<> &Builder);
   void addCXXStateStores(Function &F, MachineModuleInfo &MMI);
+  void addSEHStateStores(Function &F, MachineModuleInfo &MMI);
   void addCXXStateStoresToFunclet(Value *ParentRegNode, WinEHFuncInfo &FuncInfo,
                                   Function &F, int BaseState);
   void insertStateNumberStore(Value *ParentRegNode, Instruction *IP, int State);
@@ -104,7 +105,7 @@ private:
   /// The linked list node subobject inside of RegNode.
   Value *Link = nullptr;
 };
-}
+} // namespace
 
 FunctionPass *llvm::createX86WinEHStatePass() { return new WinEHStatePass(); }
 
@@ -145,16 +146,10 @@ bool WinEHStatePass::runOnFunction(Function &F) {
     return false;
 
   // Check the personality. Do nothing if this is not an MSVC personality.
-  LandingPadInst *LP = nullptr;
-  for (BasicBlock &BB : F) {
-    LP = BB.getLandingPadInst();
-    if (LP)
-      break;
-  }
-  if (!LP)
+  if (!F.hasPersonalityFn())
     return false;
   PersonalityFn =
-      dyn_cast<Function>(LP->getPersonalityFn()->stripPointerCasts());
+      dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts());
   if (!PersonalityFn)
     return false;
   Personality = classifyEHPersonality(PersonalityFn);
@@ -171,8 +166,10 @@ bool WinEHStatePass::runOnFunction(Function &F) {
   auto *MMIPtr = getAnalysisIfAvailable<MachineModuleInfo>();
   assert(MMIPtr && "MachineModuleInfo should always be available");
   MachineModuleInfo &MMI = *MMIPtr;
-  if (Personality == EHPersonality::MSVC_CXX) {
-    addCXXStateStores(F, MMI);
+  switch (Personality) {
+  default: llvm_unreachable("unexpected personality function");
+  case EHPersonality::MSVC_CXX:    addCXXStateStores(F, MMI); break;
+  case EHPersonality::MSVC_X86SEH: addSEHStateStores(F, MMI); break;
   }
 
   // Reset per-function state.
@@ -258,7 +255,6 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) {
   if (Personality == EHPersonality::MSVC_CXX) {
     RegNodeTy = getCXXEHRegistrationType();
     RegNode = Builder.CreateAlloca(RegNodeTy);
-    // FIXME: We can skip this in -GS- mode, when we figure that out.
     // SavedESP = llvm.stacksave()
     Value *SP = Builder.CreateCall(
         Intrinsic::getDeclaration(TheModule, Intrinsic::stacksave), {});
@@ -360,11 +356,14 @@ Function *WinEHStatePass::generateLSDAInEAXThunk(Function *ParentFunc) {
 }
 
 void WinEHStatePass::linkExceptionRegistration(IRBuilder<> &Builder,
-                                               Value *Handler) {
+                                               Function *Handler) {
+  // Emit the .safeseh directive for this function.
+  Handler->addFnAttr("safeseh");
+
   Type *LinkTy = getEHLinkRegistrationType();
   // Handler = Handler
-  Handler = Builder.CreateBitCast(Handler, Builder.getInt8PtrTy());
-  Builder.CreateStore(Handler, Builder.CreateStructGEP(LinkTy, Link, 1));
+  Value *HandlerI8 = Builder.CreateBitCast(Handler, Builder.getInt8PtrTy());
+  Builder.CreateStore(HandlerI8, Builder.CreateStructGEP(LinkTy, Link, 1));
   // Next = [fs:00]
   Constant *FSZero =
       Constant::getNullValue(LinkTy->getPointerTo()->getPointerTo(257));
@@ -472,6 +471,74 @@ void WinEHStatePass::addCXXStateStoresToFunclet(Value *ParentRegNode,
   }
 }
 
+/// Assign every distinct landingpad a unique state number for SEH. Unlike C++
+/// EH, we can use this very simple algorithm while C++ EH cannot because catch
+/// handlers aren't outlined and the runtime doesn't have to figure out which
+/// catch handler frame to unwind to.
+/// FIXME: __finally blocks are outlined, so this approach may break down there.
+void WinEHStatePass::addSEHStateStores(Function &F, MachineModuleInfo &MMI) {
+  WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(&F);
+
+  // Remember and return the index that we used. We save it in WinEHFuncInfo so
+  // that we can lower llvm.x86.seh.exceptioninfo later in filter functions
+  // without too much trouble.
+  int RegNodeEscapeIndex = escapeRegNode(F);
+  FuncInfo.EHRegNodeEscapeIndex = RegNodeEscapeIndex;
+
+  // Iterate all the instructions and emit state number stores.
+  int CurState = 0;
+  SmallPtrSet<BasicBlock *, 4> ExceptBlocks;
+  for (BasicBlock &BB : F) {
+    for (auto I = BB.begin(), E = BB.end(); I != E; ++I) {
+      if (auto *CI = dyn_cast<CallInst>(I)) {
+        auto *Intrin = dyn_cast<IntrinsicInst>(CI);
+        if (Intrin) {
+          // Calls that "don't throw" are considered to be able to throw asynch
+          // exceptions, but intrinsics cannot.
+          continue;
+        }
+        insertStateNumberStore(RegNode, CI, -1);
+      } else if (auto *II = dyn_cast<InvokeInst>(I)) {
+        // Look up the state number of the landingpad this unwinds to.
+        LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst();
+        auto InsertionPair =
+            FuncInfo.LandingPadStateMap.insert(std::make_pair(LPI, CurState));
+        auto Iter = InsertionPair.first;
+        int &State = Iter->second;
+        bool Inserted = InsertionPair.second;
+        if (Inserted) {
+          // Each action consumes a state number.
+          auto *EHActions = cast<IntrinsicInst>(LPI->getNextNode());
+          SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList;
+          parseEHActions(EHActions, ActionList);
+          assert(!ActionList.empty());
+          CurState += ActionList.size();
+          State += ActionList.size() - 1;
+
+          // Remember all the __except block targets.
+          for (auto &Handler : ActionList) {
+            if (auto *CH = dyn_cast<CatchHandler>(Handler.get())) {
+              auto *BA = cast<BlockAddress>(CH->getHandlerBlockOrFunc());
+              ExceptBlocks.insert(BA->getBasicBlock());
+            }
+          }
+        }
+        insertStateNumberStore(RegNode, II, State);
+      }
+    }
+  }
+
+  // Insert llvm.stackrestore into each __except block.
+  Function *StackRestore =
+      Intrinsic::getDeclaration(TheModule, Intrinsic::stackrestore);
+  for (BasicBlock *ExceptBB : ExceptBlocks) {
+    IRBuilder<> Builder(ExceptBB->begin());
+    Value *SP =
+        Builder.CreateLoad(Builder.CreateStructGEP(RegNodeTy, RegNode, 0));
+    Builder.CreateCall(StackRestore, {SP});
+  }
+}
+
 void WinEHStatePass::insertStateNumberStore(Value *ParentRegNode,
                                             Instruction *IP, int State) {
   IRBuilder<> Builder(IP);
diff --git a/contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
index 2e44ac9..e1baeac 100644
--- a/contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
+++ b/contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
@@ -40,7 +40,7 @@ public:
                               raw_ostream &VStream,
                               raw_ostream &CStream) const override;
 };
-}
+} // namespace
 
 static bool readInstruction16(ArrayRef<uint8_t> Bytes, uint64_t Address,
                               uint64_t &Size, uint16_t &Insn) {
diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index f0e4596..8699ce8 100644
--- a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -46,8 +46,8 @@ static MCRegisterInfo *createXCoreMCRegisterInfo(StringRef TT) {
   return X;
 }
 
-static MCSubtargetInfo *createXCoreMCSubtargetInfo(StringRef TT, StringRef CPU,
-                                                   StringRef FS) {
+static MCSubtargetInfo *
+createXCoreMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
   MCSubtargetInfo *X = new MCSubtargetInfo();
   InitXCoreMCSubtargetInfo(X, TT, CPU, FS);
   return X;
@@ -123,7 +123,7 @@ void XCoreTargetAsmStreamer::emitCCBottomData(StringRef Name) {
 void XCoreTargetAsmStreamer::emitCCBottomFunction(StringRef Name) {
   OS << "\t.cc_bottom " << Name << ".function\n";
 }
-}
+} // namespace
 
 static MCTargetStreamer *createTargetAsmStreamer(MCStreamer &S,
                                                  formatted_raw_ostream &OS,
diff --git a/contrib/llvm/lib/Target/XCore/XCore.h b/contrib/llvm/lib/Target/XCore/XCore.h
index ba6ca84..eb8b5ec 100644
--- a/contrib/llvm/lib/Target/XCore/XCore.h
+++ b/contrib/llvm/lib/Target/XCore/XCore.h
@@ -32,6 +32,6 @@ namespace llvm {
                                    CodeGenOpt::Level OptLevel);
   ModulePass *createXCoreLowerThreadLocalPass();
 
-} // end namespace llvm;
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h
index 607c772..116e89a 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h
@@ -58,6 +58,6 @@ namespace llvm {
       return 4;
     }
   };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp b/contrib/llvm/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp
index 77292c4..8d96105 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp
@@ -34,7 +34,7 @@ namespace {
     }
   };
   char XCoreFTAOElim::ID = 0;
-}
+} // namespace
 
 /// createXCoreFrameToArgsOffsetEliminationPass - returns an instance of the
 /// Frame to args offset elimination pass
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h
index 97f0494..9c49a8d 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h
@@ -85,7 +85,7 @@ namespace llvm {
       // Memory barrier.
       MEMBARRIER
     };
-  }
+  } // namespace XCoreISD
 
   //===--------------------------------------------------------------------===//
   // TargetLowering Implementation
@@ -215,6 +215,6 @@ namespace llvm {
                      const SmallVectorImpl<ISD::OutputArg> &ArgsFlags,
                      LLVMContext &Context) const override;
   };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
index c310aa3..a6e974e 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -41,7 +41,7 @@ namespace XCore {
     COND_INVALID
   };
 }
-}
+} // namespace llvm
 
 // Pin the vtable to this file.
 void XCoreInstrInfo::anchor() {}
@@ -281,7 +281,7 @@ XCoreInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
 unsigned
 XCoreInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
                              MachineBasicBlock *FBB,
-                             const SmallVectorImpl<MachineOperand> &Cond,
+                             ArrayRef<MachineOperand> Cond,
                              DebugLoc DL)const{
   // Shouldn't be a fall through.
   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h
index 60bb3f8..70beb41 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h
@@ -56,8 +56,7 @@ public:
                      bool AllowModify) const override;
 
   unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
-                        MachineBasicBlock *FBB,
-                        const SmallVectorImpl<MachineOperand> &Cond,
+                        MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
                         DebugLoc DL) const override;
 
   unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
@@ -89,6 +88,6 @@ public:
                                             unsigned Reg, uint64_t Value) const;
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
index 996c6f5..f866ab0 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
@@ -50,7 +50,7 @@ namespace {
 
     bool runOnModule(Module &M) override;
   };
-}
+} // namespace
 
 char XCoreLowerThreadLocal::ID = 0;
 
diff --git a/contrib/llvm/lib/Target/XCore/XCoreMCInstLower.h b/contrib/llvm/lib/Target/XCore/XCoreMCInstLower.h
index 5691478..74a7f20 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreMCInstLower.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreMCInstLower.h
@@ -37,6 +37,6 @@ private:
   MCOperand LowerSymbolOperand(const MachineOperand &MO,
                                MachineOperandType MOTy, unsigned Offset) const;
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h b/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h
index 078ffde..8cce75f 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h
@@ -101,6 +101,6 @@ public:
     return SpillLabels;
   }
 };
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.h b/contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.h
index cfd80b3..6224843 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.h
@@ -35,6 +35,6 @@ public:
                           MachinePointerInfo SrcPtrInfo) const override;
 };
 
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/XCore/XCoreSubtarget.cpp b/contrib/llvm/lib/Target/XCore/XCoreSubtarget.cpp
index 7996020..c98518b 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreSubtarget.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreSubtarget.cpp
@@ -25,7 +25,7 @@ using namespace llvm;
 
 void XCoreSubtarget::anchor() { }
 
-XCoreSubtarget::XCoreSubtarget(const std::string &TT, const std::string &CPU,
+XCoreSubtarget::XCoreSubtarget(const Triple &TT, const std::string &CPU,
                                const std::string &FS, const TargetMachine &TM)
     : XCoreGenSubtargetInfo(TT, CPU, FS), InstrInfo(), FrameLowering(*this),
       TLInfo(TM, *this), TSInfo(*TM.getDataLayout()) {}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreSubtarget.h b/contrib/llvm/lib/Target/XCore/XCoreSubtarget.h
index da51ef1..74ee594 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreSubtarget.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreSubtarget.h
@@ -40,9 +40,9 @@ public:
   /// This constructor initializes the data members to match that
   /// of the specified triple.
   ///
-  XCoreSubtarget(const std::string &TT, const std::string &CPU,
+  XCoreSubtarget(const Triple &TT, const std::string &CPU,
                  const std::string &FS, const TargetMachine &TM);
-  
+
   /// ParseSubtargetFeatures - Parses features string setting specified 
   /// subtarget options.  Definition of function is auto generated by tblgen.
   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
@@ -61,6 +61,6 @@ public:
     return &InstrInfo.getRegisterInfo();
   }
 };
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
index 228dc1c..370b64b 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -22,7 +22,7 @@ using namespace llvm;
 
 /// XCoreTargetMachine ctor - Create an ILP32 architecture model
 ///
-XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT,
+XCoreTargetMachine::XCoreTargetMachine(const Target &T, const Triple &TT,
                                        StringRef CPU, StringRef FS,
                                        const TargetOptions &Options,
                                        Reloc::Model RM, CodeModel::Model CM,
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h
index 0d324ab..a8addfc 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h
@@ -23,8 +23,8 @@ class XCoreTargetMachine : public LLVMTargetMachine {
   std::unique_ptr<TargetLoweringObjectFile> TLOF;
   XCoreSubtarget Subtarget;
 public:
-  XCoreTargetMachine(const Target &T, StringRef TT,
-                     StringRef CPU, StringRef FS, const TargetOptions &Options,
+  XCoreTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+                     StringRef FS, const TargetOptions &Options,
                      Reloc::Model RM, CodeModel::Model CM,
                      CodeGenOpt::Level OL);
   ~XCoreTargetMachine() override;
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetStreamer.h b/contrib/llvm/lib/Target/XCore/XCoreTargetStreamer.h
index 3563dbc..a82702f 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreTargetStreamer.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetStreamer.h
@@ -22,6 +22,6 @@ public:
   virtual void emitCCBottomData(StringRef Name) = 0;
   virtual void emitCCBottomFunction(StringRef Name) = 0;
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index c7c57ab..86b3faa 100644
--- a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -92,7 +92,7 @@ namespace {
     unsigned maxElements;
     DenseMap<const Function *, DISubprogram *> FunctionDIs;
   };
-}
+} // namespace
 
 char ArgPromotion::ID = 0;
 INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
@@ -245,6 +245,24 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
     Argument *PtrArg = PointerArgs[i];
     Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
 
+    // Replace sret attribute with noalias. This reduces register pressure by
+    // avoiding a register copy.
+    if (PtrArg->hasStructRetAttr()) {
+      unsigned ArgNo = PtrArg->getArgNo();
+      F->setAttributes(
+          F->getAttributes()
+              .removeAttribute(F->getContext(), ArgNo + 1, Attribute::StructRet)
+              .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias));
+      for (Use &U : F->uses()) {
+        CallSite CS(U.getUser());
+        CS.setAttributes(
+            CS.getAttributes()
+                .removeAttribute(F->getContext(), ArgNo + 1,
+                                 Attribute::StructRet)
+                .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias));
+      }
+    }
+
     // If this is a byval argument, and if the aggregate type is small, just
     // pass the elements, which is always safe, if the passed value is densely
     // packed or if we can prove the padding bytes are never accessed. This does
@@ -553,7 +571,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
     LoadInst *Load = Loads[i];
     BasicBlock *BB = Load->getParent();
 
-    AliasAnalysis::Location Loc = MemoryLocation::get(Load);
+    MemoryLocation Loc = MemoryLocation::get(Load);
     if (AA.canInstructionRangeModRef(BB->front(), *Load, Loc,
         AliasAnalysis::Mod))
       return false;  // Pointer is invalidated!
diff --git a/contrib/llvm/lib/Transforms/IPO/BarrierNoopPass.cpp b/contrib/llvm/lib/Transforms/IPO/BarrierNoopPass.cpp
index 6af1043..7585fdc 100644
--- a/contrib/llvm/lib/Transforms/IPO/BarrierNoopPass.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/BarrierNoopPass.cpp
@@ -38,7 +38,7 @@ public:
 
   bool runOnModule(Module &M) override { return false; }
 };
-}
+} // namespace
 
 ModulePass *llvm::createBarrierNoopPass() { return new BarrierNoop(); }
 
diff --git a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
index 8ce7646..3b68743 100644
--- a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
@@ -53,7 +53,7 @@ namespace {
     unsigned getAlignment(GlobalVariable *GV) const;
 
   };
-}
+} // namespace
 
 char ConstantMerge::ID = 0;
 INITIALIZE_PASS(ConstantMerge, "constmerge",
diff --git a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 76898f2..6bfd3d1 100644
--- a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -159,7 +159,7 @@ namespace {
     bool DeleteDeadVarargs(Function &Fn);
     bool RemoveDeadArgumentsFromCallers(Function &Fn);
   };
-}
+} // namespace
 
 
 char DAE::ID = 0;
@@ -175,7 +175,7 @@ namespace {
 
     bool ShouldHackArguments() const override { return true; }
   };
-}
+} // namespace
 
 char DAH::ID = 0;
 INITIALIZE_PASS(DAH, "deadarghaX0r", 
diff --git a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
index 2f8c7d9..7e0dddc 100644
--- a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
@@ -146,7 +146,7 @@ namespace {
   };
 
   char GVExtractorPass::ID = 0;
-}
+} // namespace
 
 ModulePass *llvm::createGVExtractionPass(std::vector<GlobalValue *> &GVs,
                                          bool deleteFn) {
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index ef8f42f..749ff99 100644
--- a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -132,7 +132,7 @@ namespace {
     AliasAnalysis *AA;
     TargetLibraryInfo *TLI;
   };
-}
+} // namespace
 
 char FunctionAttrs::ID = 0;
 INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs",
@@ -208,8 +208,7 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
                 AAMDNodes AAInfo;
                 I->getAAMetadata(AAInfo);
 
-                AliasAnalysis::Location Loc(Arg,
-                                            AliasAnalysis::UnknownSize, AAInfo);
+                MemoryLocation Loc(Arg, MemoryLocation::UnknownSize, AAInfo);
                 if (!AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) {
                   if (MRB & AliasAnalysis::Mod)
                     // Writes non-local memory.  Give up.
@@ -232,20 +231,20 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
       } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
         // Ignore non-volatile loads from local memory. (Atomic is okay here.)
         if (!LI->isVolatile()) {
-          AliasAnalysis::Location Loc = MemoryLocation::get(LI);
+          MemoryLocation Loc = MemoryLocation::get(LI);
           if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
             continue;
         }
       } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
         // Ignore non-volatile stores to local memory. (Atomic is okay here.)
         if (!SI->isVolatile()) {
-          AliasAnalysis::Location Loc = MemoryLocation::get(SI);
+          MemoryLocation Loc = MemoryLocation::get(SI);
           if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
             continue;
         }
       } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) {
         // Ignore vaargs on local memory.
-        AliasAnalysis::Location Loc = MemoryLocation::get(VI);
+        MemoryLocation Loc = MemoryLocation::get(VI);
         if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
           continue;
       }
@@ -380,7 +379,7 @@ namespace {
 
     const SmallPtrSet<Function*, 8> &SCCNodes;
   };
-}
+} // namespace
 
 namespace llvm {
   template<> struct GraphTraits<ArgumentGraphNode*> {
@@ -407,7 +406,7 @@ namespace llvm {
       return AG->end();
     }
   };
-}
+} // namespace llvm
 
 // Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone.
 static Attribute::AttrKind
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
index ba04c80..7983104 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
@@ -57,7 +57,7 @@ namespace {
 
     bool RemoveUnusedGlobalValue(GlobalValue &GV);
   };
-}
+} // namespace
 
 /// Returns true if F contains only a single "ret" instruction.
 static bool isEmptyFunction(Function *F) {
@@ -228,6 +228,9 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
     if (F->hasPrologueData())
       MarkUsedGlobalsAsNeeded(F->getPrologueData());
 
+    if (F->hasPersonalityFn())
+      MarkUsedGlobalsAsNeeded(F->getPersonalityFn());
+
     for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
       for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
         for (User::op_iterator U = I->op_begin(), E = I->op_end(); U != E; ++U)
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index cc4a79f..0d83c82 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -89,7 +89,7 @@ namespace {
     TargetLibraryInfo *TLI;
     SmallSet<const Comdat *, 8> NotDiscardableComdats;
   };
-}
+} // namespace
 
 char GlobalOpt::ID = 0;
 INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt",
@@ -2786,7 +2786,7 @@ public:
       setUsedInitializer(*CompilerUsedV, CompilerUsed);
   }
 };
-}
+} // namespace
 
 static bool hasUseOtherThanLLVMUsed(GlobalAlias &GA, const LLVMUsed &U) {
   if (GA.use_empty()) // No use at all.
diff --git a/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp
index af541d1..d717b25 100644
--- a/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -45,7 +45,7 @@ namespace {
     bool PropagateConstantsIntoArguments(Function &F);
     bool PropagateConstantReturn(Function &F);
   };
-}
+} // namespace
 
 char IPCP::ID = 0;
 INITIALIZE_PASS(IPCP, "ipconstprop",
diff --git a/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
index dc56a02..37ff091 100644
--- a/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
@@ -62,7 +62,7 @@ public:
   }
 };
 
-}
+} // namespace
 
 char AlwaysInliner::ID = 0;
 INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline",
diff --git a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
index 8f65a98..93cdba6 100644
--- a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -93,19 +93,26 @@ static void AdjustCallerSSPLevel(Function *Caller, Function *Callee) {
   // clutter to the IR.
   AttrBuilder B;
   B.addAttribute(Attribute::StackProtect)
-    .addAttribute(Attribute::StackProtectStrong);
+    .addAttribute(Attribute::StackProtectStrong)
+    .addAttribute(Attribute::StackProtectReq);
   AttributeSet OldSSPAttr = AttributeSet::get(Caller->getContext(),
                                               AttributeSet::FunctionIndex,
                                               B);
 
-  if (Callee->hasFnAttribute(Attribute::StackProtectReq)) {
+  if (Callee->hasFnAttribute(Attribute::SafeStack)) {
+    Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
+    Caller->addFnAttr(Attribute::SafeStack);
+  } else if (Callee->hasFnAttribute(Attribute::StackProtectReq) &&
+             !Caller->hasFnAttribute(Attribute::SafeStack)) {
     Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
     Caller->addFnAttr(Attribute::StackProtectReq);
   } else if (Callee->hasFnAttribute(Attribute::StackProtectStrong) &&
+             !Caller->hasFnAttribute(Attribute::SafeStack) &&
              !Caller->hasFnAttribute(Attribute::StackProtectReq)) {
     Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
     Caller->addFnAttr(Attribute::StackProtectStrong);
   } else if (Callee->hasFnAttribute(Attribute::StackProtect) &&
+             !Caller->hasFnAttribute(Attribute::SafeStack) &&
              !Caller->hasFnAttribute(Attribute::StackProtectReq) &&
              !Caller->hasFnAttribute(Attribute::StackProtectStrong))
     Caller->addFnAttr(Attribute::StackProtect);
@@ -431,8 +438,8 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
 
   SmallPtrSet<Function*, 8> SCCFunctions;
   DEBUG(dbgs() << "Inliner visiting SCC:");
-  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
-    Function *F = (*I)->getFunction();
+  for (CallGraphNode *Node : SCC) {
+    Function *F = Node->getFunction();
     if (F) SCCFunctions.insert(F);
     DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE"));
   }
@@ -448,13 +455,13 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
   // index into the InlineHistory vector.
   SmallVector<std::pair<Function*, int>, 8> InlineHistory;
 
-  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
-    Function *F = (*I)->getFunction();
+  for (CallGraphNode *Node : SCC) {
+    Function *F = Node->getFunction();
     if (!F) continue;
     
-    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
-      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
-        CallSite CS(cast<Value>(I));
+    for (BasicBlock &BB : *F)
+      for (Instruction &I : BB) {
+        CallSite CS(cast<Value>(&I));
         // If this isn't a call, or it is a call to an intrinsic, it can
         // never be inlined.
         if (!CS || isa<IntrinsicInst>(I))
@@ -496,6 +503,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
     LocalChange = false;
     // Iterate over the outer loop because inlining functions can cause indirect
     // calls to become direct calls.
+    // CallSites may be modified inside so ranged for loop can not be used.
     for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
       CallSite CS = CallSites[CSi].first;
       
@@ -566,11 +574,8 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
           int NewHistoryID = InlineHistory.size();
           InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID));
 
-          for (unsigned i = 0, e = InlineInfo.InlinedCalls.size();
-               i != e; ++i) {
-            Value *Ptr = InlineInfo.InlinedCalls[i];
+          for (Value *Ptr : InlineInfo.InlinedCalls)
             CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID));
-          }
         }
       }
       
diff --git a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
index 41334ca..ada4a76 100644
--- a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
@@ -51,7 +51,7 @@ namespace {
       AU.addRequired<DominatorTreeWrapperPass>();
     }
   };
-}
+} // namespace
 
 char LoopExtractor::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopExtractor, "loop-extract",
@@ -183,7 +183,7 @@ namespace {
 
     bool runOnModule(Module &M) override;
   };
-}
+} // namespace
 
 char BlockExtractorPass::ID = 0;
 INITIALIZE_PASS(BlockExtractorPass, "extract-blocks",
diff --git a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index 052f1b4..5e41798 100644
--- a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -409,7 +409,7 @@ public:
     return (FunctionComparator(F, RHS.getFunc()).compare()) == -1;
   }
 };
-}
+} // namespace
 
 int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const {
   if (L < R) return -1;
@@ -1397,28 +1397,26 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
   if (F->mayBeOverridden()) {
     assert(G->mayBeOverridden());
 
-    if (HasGlobalAliases) {
-      // Make them both thunks to the same internal function.
-      Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "",
-                                     F->getParent());
-      H->copyAttributesFrom(F);
-      H->takeName(F);
-      removeUsers(F);
-      F->replaceAllUsesWith(H);
+    // Make them both thunks to the same internal function.
+    Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "",
+                                   F->getParent());
+    H->copyAttributesFrom(F);
+    H->takeName(F);
+    removeUsers(F);
+    F->replaceAllUsesWith(H);
 
-      unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment());
+    unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment());
 
+    if (HasGlobalAliases) {
       writeAlias(F, G);
       writeAlias(F, H);
-
-      F->setAlignment(MaxAlignment);
-      F->setLinkage(GlobalValue::PrivateLinkage);
     } else {
-      // We can't merge them. Instead, pick one and update all direct callers
-      // to call it and hope that we improve the instruction cache hit rate.
-      replaceDirectCallers(G, F);
+      writeThunk(F, G);
+      writeThunk(F, H);
     }
 
+    F->setAlignment(MaxAlignment);
+    F->setLinkage(GlobalValue::PrivateLinkage);
     ++NumDoubleWeak;
   } else {
     writeThunkOrAlias(F, G);
diff --git a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
index 4a7cb7b..7a7065c 100644
--- a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -40,7 +40,7 @@ namespace {
   private:
     Function* unswitchFunction(Function* F);
   };
-}
+} // namespace
 
 char PartialInliner::ID = 0;
 INITIALIZE_PASS(PartialInliner, "partial-inliner",
diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index 3496a66..963f1bb 100644
--- a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -94,7 +94,6 @@ PassManagerBuilder::PassManagerBuilder() {
     SizeLevel = 0;
     LibraryInfo = nullptr;
     Inliner = nullptr;
-    DisableTailCalls = false;
     DisableUnitAtATime = false;
     DisableUnrollLoops = false;
     BBVectorize = RunBBVectorization;
@@ -238,8 +237,7 @@ void PassManagerBuilder::populateModulePassManager(
   MPM.add(createInstructionCombiningPass());  // Combine silly seq's
   addExtensionsToPM(EP_Peephole, MPM);
 
-  if (!DisableTailCalls)
-    MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
+  MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
   MPM.add(createCFGSimplificationPass());     // Merge & remove BBs
   MPM.add(createReassociatePass());           // Reassociate expressions
   // Rotate Loop - disable header duplication at -Oz
diff --git a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
index 1943b93..a5ba9ee 100644
--- a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
@@ -49,7 +49,7 @@ namespace {
     bool SimplifyFunction(Function *F);
     void DeleteBasicBlock(BasicBlock *BB);
   };
-}
+} // namespace
 
 char PruneEH::ID = 0;
 INITIALIZE_PASS_BEGIN(PruneEH, "prune-eh",
@@ -177,7 +177,7 @@ bool PruneEH::SimplifyFunction(Function *F) {
   bool MadeChange = false;
   for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
     if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
-      if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(II)) {
+      if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(F)) {
         SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
         // Insert a call instruction before the invoke.
         CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II);
diff --git a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
index 60c9573..6f9af1d 100644
--- a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
@@ -95,7 +95,7 @@ namespace {
       AU.setPreservesAll();
     }
   };
-}
+} // namespace
 
 char StripSymbols::ID = 0;
 INITIALIZE_PASS(StripSymbols, "strip",
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index a8d0172..29ecc1d 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -193,7 +193,7 @@ namespace {
       void incCreateInstNum() {}
     #endif
   };
-}
+} // namespace
 
 //===----------------------------------------------------------------------===//
 //
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index e83b9dd..6de380b 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1391,11 +1391,29 @@ static IntrinsicInst *FindInitTrampoline(Value *Callee) {
 // visitCallSite - Improvements for call and invoke instructions.
 //
 Instruction *InstCombiner::visitCallSite(CallSite CS) {
+
   if (isAllocLikeFn(CS.getInstruction(), TLI))
     return visitAllocSite(*CS.getInstruction());
 
   bool Changed = false;
 
+  // Mark any parameters that are known to be non-null with the nonnull
+  // attribute.  This is helpful for inlining calls to functions with null
+  // checks on their arguments.
+  unsigned ArgNo = 0;
+  for (Value *V : CS.args()) {
+    if (!CS.paramHasAttr(ArgNo+1, Attribute::NonNull) &&
+        isKnownNonNull(V)) {
+      AttributeSet AS = CS.getAttributes();
+      AS = AS.addAttribute(CS.getInstruction()->getContext(), ArgNo+1,
+                           Attribute::NonNull);
+      CS.setAttributes(AS);
+      Changed = true;
+    }
+    ArgNo++;
+  }
+  assert(ArgNo == CS.arg_size() && "sanity check");
+
   // If the callee is a pointer to a function, attempt to move any casts to the
   // arguments of the call/invoke.
   Value *Callee = CS.getCalledValue();
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index a554e9f..6b384b4 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -948,7 +948,7 @@ struct UDivFoldAction {
   UDivFoldAction(FoldUDivOperandCb FA, Value *InputOperand, size_t SLHS)
       : FoldAction(FA), OperandToFold(InputOperand), SelectLHSIdx(SLHS) {}
 };
-}
+} // namespace
 
 // X udiv 2^C -> X >> C
 static Instruction *foldUDivPow2Cst(Value *Op0, Value *Op1,
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 6a6693c..a93ffbe 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -582,7 +582,7 @@ struct LoweredPHIRecord {
   LoweredPHIRecord(PHINode *pn, unsigned Sh)
     : PN(pn), Shift(Sh), Width(0) {}
 };
-}
+} // namespace
 
 namespace llvm {
   template<>
@@ -603,7 +603,7 @@ namespace llvm {
              LHS.Width == RHS.Width;
     }
   };
-}
+} // namespace llvm
 
 
 /// SliceUpIllegalIntegerPHI - This is an integer PHI and we know that it has an
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 9d602c6..53950ae 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2353,7 +2353,8 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
   // The logic here should be correct for any real-world personality function.
   // However if that turns out not to be true, the offending logic can always
   // be conditioned on the personality function, like the catch-all logic is.
-  EHPersonality Personality = classifyEHPersonality(LI.getPersonalityFn());
+  EHPersonality Personality =
+      classifyEHPersonality(LI.getParent()->getParent()->getPersonalityFn());
 
   // Simplify the list of clauses, eg by removing repeated catch clauses
   // (these are often created by inlining).
@@ -2620,7 +2621,6 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
   // with a new one.
   if (MakeNewInstruction) {
     LandingPadInst *NLI = LandingPadInst::Create(LI.getType(),
-                                                 LI.getPersonalityFn(),
                                                  NewClauses.size());
     for (unsigned i = 0, e = NewClauses.size(); i != e; ++i)
       NLI->addClause(NewClauses[i]);
@@ -2691,7 +2691,8 @@ bool InstCombiner::run() {
     }
 
     // Instruction isn't dead, see if we can constant propagate it.
-    if (!I->use_empty() && isa<Constant>(I->getOperand(0))) {
+    if (!I->use_empty() &&
+        (I->getNumOperands() == 0 || isa<Constant>(I->getOperand(0)))) {
       if (Constant *C = ConstantFoldInstruction(I, DL, TLI)) {
         DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
 
@@ -2846,7 +2847,8 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
       }
 
       // ConstantProp instruction if trivially constant.
-      if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
+      if (!Inst->use_empty() &&
+          (Inst->getNumOperands() == 0 || isa<Constant>(Inst->getOperand(0))))
         if (Constant *C = ConstantFoldInstruction(Inst, DL, TLI)) {
           DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: "
                        << *Inst << '\n');
@@ -3044,7 +3046,7 @@ public:
   void getAnalysisUsage(AnalysisUsage &AU) const override;
   bool runOnFunction(Function &F) override;
 };
-}
+} // namespace
 
 void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 25f78b0..2dd2fe6 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -67,6 +67,7 @@ static const uint64_t kDefaultShadowOffset32 = 1ULL << 29;
 static const uint64_t kIOSShadowOffset32 = 1ULL << 30;
 static const uint64_t kDefaultShadowOffset64 = 1ULL << 44;
 static const uint64_t kSmallX86_64ShadowOffset = 0x7FFF8000;  // < 2G.
+static const uint64_t kLinuxKasan_ShadowOffset64 = 0xdffffc0000000000;
 static const uint64_t kPPC64_ShadowOffset64 = 1ULL << 41;
 static const uint64_t kMIPS32_ShadowOffset32 = 0x0aaa0000;
 static const uint64_t kMIPS64_ShadowOffset64 = 1ULL << 37;
@@ -106,10 +107,8 @@ static const char *const kAsanUnpoisonStackMemoryName =
 static const char *const kAsanOptionDetectUAR =
     "__asan_option_detect_stack_use_after_return";
 
-static const char *const kAsanAllocaPoison =
-    "__asan_alloca_poison";
-static const char *const kAsanAllocasUnpoison =
-    "__asan_allocas_unpoison";
+static const char *const kAsanAllocaPoison = "__asan_alloca_poison";
+static const char *const kAsanAllocasUnpoison = "__asan_allocas_unpoison";
 
 // Accesses sizes are powers of two: 1, 2, 4, 8, 16.
 static const size_t kNumberOfAccessSizes = 5;
@@ -117,6 +116,9 @@ static const size_t kNumberOfAccessSizes = 5;
 static const unsigned kAllocaRzSize = 32;
 
 // Command-line flags.
+static cl::opt<bool> ClEnableKasan(
+    "asan-kernel", cl::desc("Enable KernelAddressSanitizer instrumentation"),
+    cl::Hidden, cl::init(false));
 
 // This flag may need to be replaced with -f[no-]asan-reads.
 static cl::opt<bool> ClInstrumentReads("asan-instrument-reads",
@@ -317,7 +319,8 @@ struct ShadowMapping {
   bool OrShadowOffset;
 };
 
-static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize) {
+static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
+                                      bool IsKasan) {
   bool IsAndroid = TargetTriple.getEnvironment() == llvm::Triple::Android;
   bool IsIOS = TargetTriple.isiOS();
   bool IsFreeBSD = TargetTriple.isOSFreeBSD();
@@ -352,9 +355,12 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize) {
       Mapping.Offset = kPPC64_ShadowOffset64;
     else if (IsFreeBSD)
       Mapping.Offset = kFreeBSD_ShadowOffset64;
-    else if (IsLinux && IsX86_64)
-      Mapping.Offset = kSmallX86_64ShadowOffset;
-    else if (IsMIPS64)
+    else if (IsLinux && IsX86_64) {
+      if (IsKasan)
+        Mapping.Offset = kLinuxKasan_ShadowOffset64;
+      else
+        Mapping.Offset = kSmallX86_64ShadowOffset;
+    } else if (IsMIPS64)
       Mapping.Offset = kMIPS64_ShadowOffset64;
     else if (IsAArch64)
       Mapping.Offset = kAArch64_ShadowOffset64;
@@ -383,7 +389,8 @@ static size_t RedzoneSizeForScale(int MappingScale) {
 
 /// AddressSanitizer: instrument the code in module to find memory bugs.
 struct AddressSanitizer : public FunctionPass {
-  AddressSanitizer() : FunctionPass(ID) {
+  explicit AddressSanitizer(bool CompileKernel = false)
+      : FunctionPass(ID), CompileKernel(CompileKernel || ClEnableKasan) {
     initializeAddressSanitizerPass(*PassRegistry::getPassRegistry());
   }
   const char *getPassName() const override {
@@ -410,8 +417,7 @@ struct AddressSanitizer : public FunctionPass {
   /// If it is an interesting memory access, return the PointerOperand
   /// and set IsWrite/Alignment. Otherwise return nullptr.
   Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite,
-                                   uint64_t *TypeSize,
-                                   unsigned *Alignment);
+                                   uint64_t *TypeSize, unsigned *Alignment);
   void instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, Instruction *I,
                      bool UseCalls, const DataLayout &DL);
   void instrumentPointerComparisonOrSubtraction(Instruction *I);
@@ -447,11 +453,12 @@ struct AddressSanitizer : public FunctionPass {
   LLVMContext *C;
   Triple TargetTriple;
   int LongSize;
+  bool CompileKernel;
   Type *IntptrTy;
   ShadowMapping Mapping;
   DominatorTree *DT;
-  Function *AsanCtorFunction;
-  Function *AsanInitFunction;
+  Function *AsanCtorFunction = nullptr;
+  Function *AsanInitFunction = nullptr;
   Function *AsanHandleNoReturnFunc;
   Function *AsanPtrCmpFunction, *AsanPtrSubFunction;
   // This array is indexed by AccessIsWrite, Experiment and log2(AccessSize).
@@ -470,7 +477,8 @@ struct AddressSanitizer : public FunctionPass {
 
 class AddressSanitizerModule : public ModulePass {
  public:
-  AddressSanitizerModule() : ModulePass(ID) {}
+  explicit AddressSanitizerModule(bool CompileKernel = false)
+      : ModulePass(ID), CompileKernel(CompileKernel || ClEnableKasan) {}
   bool runOnModule(Module &M) override;
   static char ID;  // Pass identification, replacement for typeid
   const char *getPassName() const override { return "AddressSanitizerModule"; }
@@ -487,6 +495,7 @@ class AddressSanitizerModule : public ModulePass {
   }
 
   GlobalsMetadata GlobalsMD;
+  bool CompileKernel;
   Type *IntptrTy;
   LLVMContext *C;
   Triple TargetTriple;
@@ -588,7 +597,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
                                         Value *SavedStack) {
     IRBuilder<> IRB(InstBefore);
     IRB.CreateCall(AsanAllocasUnpoisonFunc,
-                    {IRB.CreateLoad(DynamicAllocaLayout),
+                   {IRB.CreateLoad(DynamicAllocaLayout),
                     IRB.CreatePtrToInt(SavedStack, IntptrTy)});
   }
 
@@ -692,8 +701,8 @@ INITIALIZE_PASS_END(
     AddressSanitizer, "asan",
     "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false,
     false)
-FunctionPass *llvm::createAddressSanitizerFunctionPass() {
-  return new AddressSanitizer();
+FunctionPass *llvm::createAddressSanitizerFunctionPass(bool CompileKernel) {
+  return new AddressSanitizer(CompileKernel);
 }
 
 char AddressSanitizerModule::ID = 0;
@@ -702,8 +711,8 @@ INITIALIZE_PASS(
     "AddressSanitizer: detects use-after-free and out-of-bounds bugs."
     "ModulePass",
     false, false)
-ModulePass *llvm::createAddressSanitizerModulePass() {
-  return new AddressSanitizerModule();
+ModulePass *llvm::createAddressSanitizerModulePass(bool CompileKernel) {
+  return new AddressSanitizerModule(CompileKernel);
 }
 
 static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
@@ -1347,16 +1356,18 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
   int LongSize = M.getDataLayout().getPointerSizeInBits();
   IntptrTy = Type::getIntNTy(*C, LongSize);
   TargetTriple = Triple(M.getTargetTriple());
-  Mapping = getShadowMapping(TargetTriple, LongSize);
+  Mapping = getShadowMapping(TargetTriple, LongSize, CompileKernel);
   initializeCallbacks(M);
 
   bool Changed = false;
 
-  Function *CtorFunc = M.getFunction(kAsanModuleCtorName);
-  assert(CtorFunc);
-  IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator());
-
-  if (ClGlobals) Changed |= InstrumentGlobals(IRB, M);
+  // TODO(glider): temporarily disabled globals instrumentation for KASan.
+  if (ClGlobals && !CompileKernel) {
+    Function *CtorFunc = M.getFunction(kAsanModuleCtorName);
+    assert(CtorFunc);
+    IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator());
+    Changed |= InstrumentGlobals(IRB, M);
+  }
 
   return Changed;
 }
@@ -1369,38 +1380,44 @@ void AddressSanitizer::initializeCallbacks(Module &M) {
     for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
       const std::string TypeStr = AccessIsWrite ? "store" : "load";
       const std::string ExpStr = Exp ? "exp_" : "";
+      const std::string SuffixStr = CompileKernel ? "N" : "_n";
+      const std::string EndingStr = CompileKernel ? "_noabort" : "";
       const Type *ExpType = Exp ? Type::getInt32Ty(*C) : nullptr;
+      // TODO(glider): for KASan builds add _noabort to error reporting
+      // functions and make them actually noabort (remove the UnreachableInst).
       AsanErrorCallbackSized[AccessIsWrite][Exp] =
           checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-              kAsanReportErrorTemplate + ExpStr + TypeStr + "_n",
+              kAsanReportErrorTemplate + ExpStr + TypeStr + SuffixStr,
               IRB.getVoidTy(), IntptrTy, IntptrTy, ExpType, nullptr));
       AsanMemoryAccessCallbackSized[AccessIsWrite][Exp] =
           checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-              ClMemoryAccessCallbackPrefix + ExpStr + TypeStr + "N",
+              ClMemoryAccessCallbackPrefix + ExpStr + TypeStr + "N" + EndingStr,
               IRB.getVoidTy(), IntptrTy, IntptrTy, ExpType, nullptr));
       for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
            AccessSizeIndex++) {
         const std::string Suffix = TypeStr + itostr(1 << AccessSizeIndex);
         AsanErrorCallback[AccessIsWrite][Exp][AccessSizeIndex] =
             checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-                kAsanReportErrorTemplate + ExpStr + Suffix, IRB.getVoidTy(),
-                IntptrTy, ExpType, nullptr));
+                kAsanReportErrorTemplate + ExpStr + Suffix,
+                IRB.getVoidTy(), IntptrTy, ExpType, nullptr));
         AsanMemoryAccessCallback[AccessIsWrite][Exp][AccessSizeIndex] =
             checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-                ClMemoryAccessCallbackPrefix + ExpStr + Suffix, IRB.getVoidTy(),
-                IntptrTy, ExpType, nullptr));
+                ClMemoryAccessCallbackPrefix + ExpStr + Suffix + EndingStr,
+                IRB.getVoidTy(), IntptrTy, ExpType, nullptr));
       }
     }
   }
 
+  const std::string MemIntrinCallbackPrefix =
+      CompileKernel ? std::string("") : ClMemoryAccessCallbackPrefix;
   AsanMemmove = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-      ClMemoryAccessCallbackPrefix + "memmove", IRB.getInt8PtrTy(),
+      MemIntrinCallbackPrefix + "memmove", IRB.getInt8PtrTy(),
       IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, nullptr));
   AsanMemcpy = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-      ClMemoryAccessCallbackPrefix + "memcpy", IRB.getInt8PtrTy(),
+      MemIntrinCallbackPrefix + "memcpy", IRB.getInt8PtrTy(),
       IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, nullptr));
   AsanMemset = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-      ClMemoryAccessCallbackPrefix + "memset", IRB.getInt8PtrTy(),
+      MemIntrinCallbackPrefix + "memset", IRB.getInt8PtrTy(),
       IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy, nullptr));
 
   AsanHandleNoReturnFunc = checkSanitizerInterfaceFunction(
@@ -1427,14 +1444,14 @@ bool AddressSanitizer::doInitialization(Module &M) {
   IntptrTy = Type::getIntNTy(*C, LongSize);
   TargetTriple = Triple(M.getTargetTriple());
 
-  std::tie(AsanCtorFunction, AsanInitFunction) =
-      createSanitizerCtorAndInitFunctions(M, kAsanModuleCtorName, kAsanInitName,
-                                          /*InitArgTypes=*/{},
-                                          /*InitArgs=*/{});
-
-  Mapping = getShadowMapping(TargetTriple, LongSize);
-
-  appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority);
+  if (!CompileKernel) {
+    std::tie(AsanCtorFunction, AsanInitFunction) =
+        createSanitizerCtorAndInitFunctions(M, kAsanModuleCtorName, kAsanInitName,
+                                            /*InitArgTypes=*/{},
+                                            /*InitArgs=*/{});
+    appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority);
+  }
+  Mapping = getShadowMapping(TargetTriple, LongSize, CompileKernel);
   return true;
 }
 
@@ -1516,11 +1533,10 @@ bool AddressSanitizer::runOnFunction(Function &F) {
     }
   }
 
-  bool UseCalls = false;
-  if (ClInstrumentationWithCallsThreshold >= 0 &&
-      ToInstrument.size() > (unsigned)ClInstrumentationWithCallsThreshold)
-    UseCalls = true;
-
+  bool UseCalls =
+      CompileKernel ||
+      (ClInstrumentationWithCallsThreshold >= 0 &&
+       ToInstrument.size() > (unsigned)ClInstrumentationWithCallsThreshold);
   const TargetLibraryInfo *TLI =
       &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
   const DataLayout &DL = F.getParent()->getDataLayout();
@@ -1706,8 +1722,7 @@ void FunctionStackPoisoner::poisonStack() {
   if (ClInstrumentAllocas && DynamicAllocaVec.size() > 0) {
     // Handle dynamic allocas.
     createDynamicAllocasInitStorage();
-    for (auto &AI : DynamicAllocaVec)
-      handleDynamicAllocaCall(AI);
+    for (auto &AI : DynamicAllocaVec) handleDynamicAllocaCall(AI);
 
     unpoisonDynamicAllocas();
   }
@@ -1736,8 +1751,8 @@ void FunctionStackPoisoner::poisonStack() {
   ComputeASanStackFrameLayout(SVD, 1UL << Mapping.Scale, MinHeaderSize, &L);
   DEBUG(dbgs() << L.DescriptionString << " --- " << L.FrameSize << "\n");
   uint64_t LocalStackSize = L.FrameSize;
-  bool DoStackMalloc =
-      ClUseAfterReturn && LocalStackSize <= kMaxStackMallocSize;
+  bool DoStackMalloc = ClUseAfterReturn && !ASan.CompileKernel &&
+                       LocalStackSize <= kMaxStackMallocSize;
   // Don't do dynamic alloca in presence of inline asm: too often it makes
   // assumptions on which registers are available. Don't do stack malloc in the
   // presence of inline asm on 32-bit platforms for the same reason.
@@ -1901,9 +1916,9 @@ void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size,
   // For now just insert the call to ASan runtime.
   Value *AddrArg = IRB.CreatePointerCast(V, IntptrTy);
   Value *SizeArg = ConstantInt::get(IntptrTy, Size);
-  IRB.CreateCall(DoPoison ? AsanPoisonStackMemoryFunc
-                          : AsanUnpoisonStackMemoryFunc,
-                 {AddrArg, SizeArg});
+  IRB.CreateCall(
+      DoPoison ? AsanPoisonStackMemoryFunc : AsanUnpoisonStackMemoryFunc,
+      {AddrArg, SizeArg});
 }
 
 // Handling llvm.lifetime intrinsics for a given %alloca:
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp b/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
index f685803..a887425 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -63,7 +63,7 @@ namespace {
     void emitBranchToTrap(Value *Cmp = nullptr);
     bool instrument(Value *Ptr, Value *Val, const DataLayout &DL);
  };
-}
+} // namespace
 
 char BoundsChecking::ID = 0;
 INITIALIZE_PASS(BoundsChecking, "bounds-checking", "Run-time bounds checking",
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 2de6e1a..4309157 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -346,7 +346,7 @@ class DFSanVisitor : public InstVisitor<DFSanVisitor> {
   void visitMemTransferInst(MemTransferInst &I);
 };
 
-}
+} // namespace
 
 char DataFlowSanitizer::ID;
 INITIALIZE_PASS(DataFlowSanitizer, "dfsan",
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 9a3ed5c..43caf1f 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -139,7 +139,7 @@ namespace {
     LLVMContext *Ctx;
     SmallVector<std::unique_ptr<GCOVFunction>, 16> Funcs;
   };
-}
+} // namespace
 
 char GCOVProfiler::ID = 0;
 INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling",
@@ -419,7 +419,7 @@ namespace {
     DenseMap<BasicBlock *, GCOVBlock> Blocks;
     GCOVBlock ReturnBlock;
   };
-}
+} // namespace
 
 std::string GCOVProfiler::mangleName(const DICompileUnit *CU,
                                      const char *NewStem) {
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
index a91fc0e..2750585 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -30,6 +30,7 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) {
   initializeThreadSanitizerPass(Registry);
   initializeSanitizerCoverageModulePass(Registry);
   initializeDataFlowSanitizerPass(Registry);
+  initializeSafeStackPass(Registry);
 }
 
 /// LLVMInitializeInstrumentation - C binding for
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 100824e..63eee2f 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -2022,6 +2022,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     Value *CopyOp, *ConvertOp;
 
     switch (I.getNumArgOperands()) {
+    case 3:
+      assert(isa<ConstantInt>(I.getArgOperand(2)) && "Invalid rounding mode");
     case 2:
       CopyOp = I.getArgOperand(0);
       ConvertOp = I.getArgOperand(1);
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/SafeStack.cpp b/contrib/llvm/lib/Transforms/Instrumentation/SafeStack.cpp
new file mode 100644
index 0000000..13c5412
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/SafeStack.cpp
@@ -0,0 +1,608 @@
+//===-- SafeStack.cpp - Safe Stack Insertion ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass splits the stack into the safe stack (kept as-is for LLVM backend)
+// and the unsafe stack (explicitly allocated and managed through the runtime
+// support library).
+//
+// http://clang.llvm.org/docs/SafeStack.html
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_os_ostream.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "safestack"
+
+namespace llvm {
+
+STATISTIC(NumFunctions, "Total number of functions");
+STATISTIC(NumUnsafeStackFunctions, "Number of functions with unsafe stack");
+STATISTIC(NumUnsafeStackRestorePointsFunctions,
+          "Number of functions that use setjmp or exceptions");
+
+STATISTIC(NumAllocas, "Total number of allocas");
+STATISTIC(NumUnsafeStaticAllocas, "Number of unsafe static allocas");
+STATISTIC(NumUnsafeDynamicAllocas, "Number of unsafe dynamic allocas");
+STATISTIC(NumUnsafeStackRestorePoints, "Number of setjmps and landingpads");
+
+} // namespace llvm
+
+namespace {
+
+/// Check whether a given alloca instruction (AI) should be put on the safe
+/// stack or not. The function analyzes all uses of AI and checks whether it is
+/// only accessed in a memory safe way (as decided statically).
+bool IsSafeStackAlloca(const AllocaInst *AI) {
+  // Go through all uses of this alloca and check whether all accesses to the
+  // allocated object are statically known to be memory safe and, hence, the
+  // object can be placed on the safe stack.
+
+  SmallPtrSet<const Value *, 16> Visited;
+  SmallVector<const Instruction *, 8> WorkList;
+  WorkList.push_back(AI);
+
+  // A DFS search through all uses of the alloca in bitcasts/PHI/GEPs/etc.
+  while (!WorkList.empty()) {
+    const Instruction *V = WorkList.pop_back_val();
+    for (const Use &UI : V->uses()) {
+      auto I = cast<const Instruction>(UI.getUser());
+      assert(V == UI.get());
+
+      switch (I->getOpcode()) {
+      case Instruction::Load:
+        // Loading from a pointer is safe.
+        break;
+      case Instruction::VAArg:
+        // "va-arg" from a pointer is safe.
+        break;
+      case Instruction::Store:
+        if (V == I->getOperand(0))
+          // Stored the pointer - conservatively assume it may be unsafe.
+          return false;
+        // Storing to the pointee is safe.
+        break;
+
+      case Instruction::GetElementPtr:
+        if (!cast<const GetElementPtrInst>(I)->hasAllConstantIndices())
+          // GEP with non-constant indices can lead to memory errors.
+          // This also applies to inbounds GEPs, as the inbounds attribute
+          // represents an assumption that the address is in bounds, rather than
+          // an assertion that it is.
+          return false;
+
+        // We assume that GEP on static alloca with constant indices is safe,
+        // otherwise a compiler would detect it and warn during compilation.
+
+        if (!isa<const ConstantInt>(AI->getArraySize()))
+          // However, if the array size itself is not constant, the access
+          // might still be unsafe at runtime.
+          return false;
+
+      /* fallthrough */
+
+      case Instruction::BitCast:
+      case Instruction::IntToPtr:
+      case Instruction::PHI:
+      case Instruction::PtrToInt:
+      case Instruction::Select:
+        // The object can be safe or not, depending on how the result of the
+        // instruction is used.
+        if (Visited.insert(I).second)
+          WorkList.push_back(cast<const Instruction>(I));
+        break;
+
+      case Instruction::Call:
+      case Instruction::Invoke: {
+        // FIXME: add support for memset and memcpy intrinsics.
+        ImmutableCallSite CS(I);
+
+        // LLVM 'nocapture' attribute is only set for arguments whose address
+        // is not stored, passed around, or used in any other non-trivial way.
+        // We assume that passing a pointer to an object as a 'nocapture'
+        // argument is safe.
+        // FIXME: a more precise solution would require an interprocedural
+        // analysis here, which would look at all uses of an argument inside
+        // the function being called.
+        ImmutableCallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end();
+        for (ImmutableCallSite::arg_iterator A = B; A != E; ++A)
+          if (A->get() == V && !CS.doesNotCapture(A - B))
+            // The parameter is not marked 'nocapture' - unsafe.
+            return false;
+        continue;
+      }
+
+      default:
+        // The object is unsafe if it is used in any other way.
+        return false;
+      }
+    }
+  }
+
+  // All uses of the alloca are safe, we can place it on the safe stack.
+  return true;
+}
+
+/// The SafeStack pass splits the stack of each function into the
+/// safe stack, which is only accessed through memory safe dereferences
+/// (as determined statically), and the unsafe stack, which contains all
+/// local variables that are accessed in unsafe ways.
+class SafeStack : public FunctionPass {
+  const DataLayout *DL;
+
+  Type *StackPtrTy;
+  Type *IntPtrTy;
+  Type *Int32Ty;
+  Type *Int8Ty;
+
+  Constant *UnsafeStackPtr;
+
+  /// Unsafe stack alignment. Each stack frame must ensure that the stack is
+  /// aligned to this value. We need to re-align the unsafe stack if the
+  /// alignment of any object on the stack exceeds this value.
+  ///
+  /// 16 seems like a reasonable upper bound on the alignment of objects that we
+  /// might expect to appear on the stack on most common targets.
+  enum { StackAlignment = 16 };
+
+  /// \brief Build a constant representing a pointer to the unsafe stack
+  /// pointer.
+  Constant *getOrCreateUnsafeStackPtr(Module &M);
+
+  /// \brief Find all static allocas, dynamic allocas, return instructions and
+  /// stack restore points (exception unwind blocks and setjmp calls) in the
+  /// given function and append them to the respective vectors.
+  void findInsts(Function &F, SmallVectorImpl<AllocaInst *> &StaticAllocas,
+                 SmallVectorImpl<AllocaInst *> &DynamicAllocas,
+                 SmallVectorImpl<ReturnInst *> &Returns,
+                 SmallVectorImpl<Instruction *> &StackRestorePoints);
+
+  /// \brief Allocate space for all static allocas in \p StaticAllocas,
+  /// replace allocas with pointers into the unsafe stack and generate code to
+  /// restore the stack pointer before all return instructions in \p Returns.
+  ///
+  /// \returns A pointer to the top of the unsafe stack after all unsafe static
+  /// allocas are allocated.
+  Value *moveStaticAllocasToUnsafeStack(Function &F,
+                                        ArrayRef<AllocaInst *> StaticAllocas,
+                                        ArrayRef<ReturnInst *> Returns);
+
+  /// \brief Generate code to restore the stack after all stack restore points
+  /// in \p StackRestorePoints.
+  ///
+  /// \returns A local variable in which to maintain the dynamic top of the
+  /// unsafe stack if needed.
+  AllocaInst *
+  createStackRestorePoints(Function &F,
+                           ArrayRef<Instruction *> StackRestorePoints,
+                           Value *StaticTop, bool NeedDynamicTop);
+
+  /// \brief Replace all allocas in \p DynamicAllocas with code to allocate
+  /// space dynamically on the unsafe stack and store the dynamic unsafe stack
+  /// top to \p DynamicTop if non-null.
+  void moveDynamicAllocasToUnsafeStack(Function &F, Value *UnsafeStackPtr,
+                                       AllocaInst *DynamicTop,
+                                       ArrayRef<AllocaInst *> DynamicAllocas);
+
+public:
+  static char ID; // Pass identification, replacement for typeid.
+  SafeStack() : FunctionPass(ID), DL(nullptr) {
+    initializeSafeStackPass(*PassRegistry::getPassRegistry());
+  }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.addRequired<AliasAnalysis>();
+  }
+
+  virtual bool doInitialization(Module &M) {
+    DL = &M.getDataLayout();
+
+    StackPtrTy = Type::getInt8PtrTy(M.getContext());
+    IntPtrTy = DL->getIntPtrType(M.getContext());
+    Int32Ty = Type::getInt32Ty(M.getContext());
+    Int8Ty = Type::getInt8Ty(M.getContext());
+
+    UnsafeStackPtr = getOrCreateUnsafeStackPtr(M);
+
+    return false;
+  }
+
+  bool runOnFunction(Function &F);
+
+}; // class SafeStack
+
+Constant *SafeStack::getOrCreateUnsafeStackPtr(Module &M) {
+  // The unsafe stack pointer is stored in a global variable with a magic name.
+  const char *kUnsafeStackPtrVar = "__safestack_unsafe_stack_ptr";
+
+  auto UnsafeStackPtr =
+      dyn_cast_or_null<GlobalVariable>(M.getNamedValue(kUnsafeStackPtrVar));
+
+  if (!UnsafeStackPtr) {
+    // The global variable is not defined yet, define it ourselves.
+    // We use the initial-exec TLS model because we do not support the variable
+    // living anywhere other than in the main executable.
+    UnsafeStackPtr = new GlobalVariable(
+        /*Module=*/M, /*Type=*/StackPtrTy,
+        /*isConstant=*/false, /*Linkage=*/GlobalValue::ExternalLinkage,
+        /*Initializer=*/0, /*Name=*/kUnsafeStackPtrVar,
+        /*InsertBefore=*/nullptr,
+        /*ThreadLocalMode=*/GlobalValue::InitialExecTLSModel);
+  } else {
+    // The variable exists, check its type and attributes.
+    if (UnsafeStackPtr->getValueType() != StackPtrTy) {
+      report_fatal_error(Twine(kUnsafeStackPtrVar) + " must have void* type");
+    }
+
+    if (!UnsafeStackPtr->isThreadLocal()) {
+      report_fatal_error(Twine(kUnsafeStackPtrVar) + " must be thread-local");
+    }
+  }
+
+  return UnsafeStackPtr;
+}
+
+void SafeStack::findInsts(Function &F,
+                          SmallVectorImpl<AllocaInst *> &StaticAllocas,
+                          SmallVectorImpl<AllocaInst *> &DynamicAllocas,
+                          SmallVectorImpl<ReturnInst *> &Returns,
+                          SmallVectorImpl<Instruction *> &StackRestorePoints) {
+  for (Instruction &I : inst_range(&F)) {
+    if (auto AI = dyn_cast<AllocaInst>(&I)) {
+      ++NumAllocas;
+
+      if (IsSafeStackAlloca(AI))
+        continue;
+
+      if (AI->isStaticAlloca()) {
+        ++NumUnsafeStaticAllocas;
+        StaticAllocas.push_back(AI);
+      } else {
+        ++NumUnsafeDynamicAllocas;
+        DynamicAllocas.push_back(AI);
+      }
+    } else if (auto RI = dyn_cast<ReturnInst>(&I)) {
+      Returns.push_back(RI);
+    } else if (auto CI = dyn_cast<CallInst>(&I)) {
+      // setjmps require stack restore.
+      if (CI->getCalledFunction() && CI->canReturnTwice())
+        StackRestorePoints.push_back(CI);
+    } else if (auto LP = dyn_cast<LandingPadInst>(&I)) {
+      // Exception landing pads require stack restore.
+      StackRestorePoints.push_back(LP);
+    } else if (auto II = dyn_cast<IntrinsicInst>(&I)) {
+      if (II->getIntrinsicID() == Intrinsic::gcroot)
+        llvm::report_fatal_error(
+            "gcroot intrinsic not compatible with safestack attribute");
+    }
+  }
+}
+
+AllocaInst *
+SafeStack::createStackRestorePoints(Function &F,
+                                    ArrayRef<Instruction *> StackRestorePoints,
+                                    Value *StaticTop, bool NeedDynamicTop) {
+  if (StackRestorePoints.empty())
+    return nullptr;
+
+  IRBuilder<> IRB(StaticTop
+                      ? cast<Instruction>(StaticTop)->getNextNode()
+                      : (Instruction *)F.getEntryBlock().getFirstInsertionPt());
+
+  // We need the current value of the shadow stack pointer to restore
+  // after longjmp or exception catching.
+
+  // FIXME: On some platforms this could be handled by the longjmp/exception
+  // runtime itself.
+
+  AllocaInst *DynamicTop = nullptr;
+  if (NeedDynamicTop)
+    // If we also have dynamic alloca's, the stack pointer value changes
+    // throughout the function. For now we store it in an alloca.
+    DynamicTop = IRB.CreateAlloca(StackPtrTy, /*ArraySize=*/nullptr,
+                                  "unsafe_stack_dynamic_ptr");
+
+  if (!StaticTop)
+    // We need the original unsafe stack pointer value, even if there are
+    // no unsafe static allocas.
+    StaticTop = IRB.CreateLoad(UnsafeStackPtr, false, "unsafe_stack_ptr");
+
+  if (NeedDynamicTop)
+    IRB.CreateStore(StaticTop, DynamicTop);
+
+  // Restore current stack pointer after longjmp/exception catch.
+  for (Instruction *I : StackRestorePoints) {
+    ++NumUnsafeStackRestorePoints;
+
+    IRB.SetInsertPoint(cast<Instruction>(I->getNextNode()));
+    Value *CurrentTop = DynamicTop ? IRB.CreateLoad(DynamicTop) : StaticTop;
+    IRB.CreateStore(CurrentTop, UnsafeStackPtr);
+  }
+
+  return DynamicTop;
+}
+
+Value *
+SafeStack::moveStaticAllocasToUnsafeStack(Function &F,
+                                          ArrayRef<AllocaInst *> StaticAllocas,
+                                          ArrayRef<ReturnInst *> Returns) {
+  if (StaticAllocas.empty())
+    return nullptr;
+
+  IRBuilder<> IRB(F.getEntryBlock().getFirstInsertionPt());
+  DIBuilder DIB(*F.getParent());
+
+  // We explicitly compute and set the unsafe stack layout for all unsafe
+  // static alloca instructions. We save the unsafe "base pointer" in the
+  // prologue into a local variable and restore it in the epilogue.
+
+  // Load the current stack pointer (we'll also use it as a base pointer).
+  // FIXME: use a dedicated register for it ?
+  Instruction *BasePointer =
+      IRB.CreateLoad(UnsafeStackPtr, false, "unsafe_stack_ptr");
+  assert(BasePointer->getType() == StackPtrTy);
+
+  for (ReturnInst *RI : Returns) {
+    IRB.SetInsertPoint(RI);
+    IRB.CreateStore(BasePointer, UnsafeStackPtr);
+  }
+
+  // Compute maximum alignment among static objects on the unsafe stack.
+  unsigned MaxAlignment = 0;
+  for (AllocaInst *AI : StaticAllocas) {
+    Type *Ty = AI->getAllocatedType();
+    unsigned Align =
+        std::max((unsigned)DL->getPrefTypeAlignment(Ty), AI->getAlignment());
+    if (Align > MaxAlignment)
+      MaxAlignment = Align;
+  }
+
+  if (MaxAlignment > StackAlignment) {
+    // Re-align the base pointer according to the max requested alignment.
+    assert(isPowerOf2_32(MaxAlignment));
+    IRB.SetInsertPoint(cast<Instruction>(BasePointer->getNextNode()));
+    BasePointer = cast<Instruction>(IRB.CreateIntToPtr(
+        IRB.CreateAnd(IRB.CreatePtrToInt(BasePointer, IntPtrTy),
+                      ConstantInt::get(IntPtrTy, ~uint64_t(MaxAlignment - 1))),
+        StackPtrTy));
+  }
+
+  // Allocate space for every unsafe static AllocaInst on the unsafe stack.
+  int64_t StaticOffset = 0; // Current stack top.
+  for (AllocaInst *AI : StaticAllocas) {
+    IRB.SetInsertPoint(AI);
+
+    auto CArraySize = cast<ConstantInt>(AI->getArraySize());
+    Type *Ty = AI->getAllocatedType();
+
+    uint64_t Size = DL->getTypeAllocSize(Ty) * CArraySize->getZExtValue();
+    if (Size == 0)
+      Size = 1; // Don't create zero-sized stack objects.
+
+    // Ensure the object is properly aligned.
+    unsigned Align =
+        std::max((unsigned)DL->getPrefTypeAlignment(Ty), AI->getAlignment());
+
+    // Add alignment.
+    // NOTE: we ensure that BasePointer itself is aligned to >= Align.
+    StaticOffset += Size;
+    StaticOffset = RoundUpToAlignment(StaticOffset, Align);
+
+    Value *Off = IRB.CreateGEP(BasePointer, // BasePointer is i8*
+                               ConstantInt::get(Int32Ty, -StaticOffset));
+    Value *NewAI = IRB.CreateBitCast(Off, AI->getType(), AI->getName());
+    if (AI->hasName() && isa<Instruction>(NewAI))
+      cast<Instruction>(NewAI)->takeName(AI);
+
+    // Replace alloc with the new location.
+    replaceDbgDeclareForAlloca(AI, NewAI, DIB, /*Deref=*/true);
+    AI->replaceAllUsesWith(NewAI);
+    AI->eraseFromParent();
+  }
+
+  // Re-align BasePointer so that our callees would see it aligned as
+  // expected.
+  // FIXME: no need to update BasePointer in leaf functions.
+  StaticOffset = RoundUpToAlignment(StaticOffset, StackAlignment);
+
+  // Update shadow stack pointer in the function epilogue.
+  IRB.SetInsertPoint(cast<Instruction>(BasePointer->getNextNode()));
+
+  Value *StaticTop =
+      IRB.CreateGEP(BasePointer, ConstantInt::get(Int32Ty, -StaticOffset),
+                    "unsafe_stack_static_top");
+  IRB.CreateStore(StaticTop, UnsafeStackPtr);
+  return StaticTop;
+}
+
+void SafeStack::moveDynamicAllocasToUnsafeStack(
+    Function &F, Value *UnsafeStackPtr, AllocaInst *DynamicTop,
+    ArrayRef<AllocaInst *> DynamicAllocas) {
+  DIBuilder DIB(*F.getParent());
+
+  for (AllocaInst *AI : DynamicAllocas) {
+    IRBuilder<> IRB(AI);
+
+    // Compute the new SP value (after AI).
+    Value *ArraySize = AI->getArraySize();
+    if (ArraySize->getType() != IntPtrTy)
+      ArraySize = IRB.CreateIntCast(ArraySize, IntPtrTy, false);
+
+    Type *Ty = AI->getAllocatedType();
+    uint64_t TySize = DL->getTypeAllocSize(Ty);
+    Value *Size = IRB.CreateMul(ArraySize, ConstantInt::get(IntPtrTy, TySize));
+
+    Value *SP = IRB.CreatePtrToInt(IRB.CreateLoad(UnsafeStackPtr), IntPtrTy);
+    SP = IRB.CreateSub(SP, Size);
+
+    // Align the SP value to satisfy the AllocaInst, type and stack alignments.
+    unsigned Align = std::max(
+        std::max((unsigned)DL->getPrefTypeAlignment(Ty), AI->getAlignment()),
+        (unsigned)StackAlignment);
+
+    assert(isPowerOf2_32(Align));
+    Value *NewTop = IRB.CreateIntToPtr(
+        IRB.CreateAnd(SP, ConstantInt::get(IntPtrTy, ~uint64_t(Align - 1))),
+        StackPtrTy);
+
+    // Save the stack pointer.
+    IRB.CreateStore(NewTop, UnsafeStackPtr);
+    if (DynamicTop)
+      IRB.CreateStore(NewTop, DynamicTop);
+
+    Value *NewAI = IRB.CreateIntToPtr(SP, AI->getType());
+    if (AI->hasName() && isa<Instruction>(NewAI))
+      NewAI->takeName(AI);
+
+    replaceDbgDeclareForAlloca(AI, NewAI, DIB, /*Deref=*/true);
+    AI->replaceAllUsesWith(NewAI);
+    AI->eraseFromParent();
+  }
+
+  if (!DynamicAllocas.empty()) {
+    // Now go through the instructions again, replacing stacksave/stackrestore.
+    for (inst_iterator It = inst_begin(&F), Ie = inst_end(&F); It != Ie;) {
+      Instruction *I = &*(It++);
+      auto II = dyn_cast<IntrinsicInst>(I);
+      if (!II)
+        continue;
+
+      if (II->getIntrinsicID() == Intrinsic::stacksave) {
+        IRBuilder<> IRB(II);
+        Instruction *LI = IRB.CreateLoad(UnsafeStackPtr);
+        LI->takeName(II);
+        II->replaceAllUsesWith(LI);
+        II->eraseFromParent();
+      } else if (II->getIntrinsicID() == Intrinsic::stackrestore) {
+        IRBuilder<> IRB(II);
+        Instruction *SI = IRB.CreateStore(II->getArgOperand(0), UnsafeStackPtr);
+        SI->takeName(II);
+        assert(II->use_empty());
+        II->eraseFromParent();
+      }
+    }
+  }
+}
+
+bool SafeStack::runOnFunction(Function &F) {
+  auto AA = &getAnalysis<AliasAnalysis>();
+
+  DEBUG(dbgs() << "[SafeStack] Function: " << F.getName() << "\n");
+
+  if (!F.hasFnAttribute(Attribute::SafeStack)) {
+    DEBUG(dbgs() << "[SafeStack]     safestack is not requested"
+                    " for this function\n");
+    return false;
+  }
+
+  if (F.isDeclaration()) {
+    DEBUG(dbgs() << "[SafeStack]     function definition"
+                    " is not available\n");
+    return false;
+  }
+
+  {
+    // Make sure the regular stack protector won't run on this function
+    // (safestack attribute takes precedence).
+    AttrBuilder B;
+    B.addAttribute(Attribute::StackProtect)
+        .addAttribute(Attribute::StackProtectReq)
+        .addAttribute(Attribute::StackProtectStrong);
+    F.removeAttributes(
+        AttributeSet::FunctionIndex,
+        AttributeSet::get(F.getContext(), AttributeSet::FunctionIndex, B));
+  }
+
+  if (AA->onlyReadsMemory(&F)) {
+    // XXX: we don't protect against information leak attacks for now.
+    DEBUG(dbgs() << "[SafeStack]     function only reads memory\n");
+    return false;
+  }
+
+  ++NumFunctions;
+
+  SmallVector<AllocaInst *, 16> StaticAllocas;
+  SmallVector<AllocaInst *, 4> DynamicAllocas;
+  SmallVector<ReturnInst *, 4> Returns;
+
+  // Collect all points where stack gets unwound and needs to be restored
+  // This is only necessary because the runtime (setjmp and unwind code) is
+  // not aware of the unsafe stack and won't unwind/restore it prorerly.
+  // To work around this problem without changing the runtime, we insert
+  // instrumentation to restore the unsafe stack pointer when necessary.
+  SmallVector<Instruction *, 4> StackRestorePoints;
+
+  // Find all static and dynamic alloca instructions that must be moved to the
+  // unsafe stack, all return instructions and stack restore points.
+  findInsts(F, StaticAllocas, DynamicAllocas, Returns, StackRestorePoints);
+
+  if (StaticAllocas.empty() && DynamicAllocas.empty() &&
+      StackRestorePoints.empty())
+    return false; // Nothing to do in this function.
+
+  if (!StaticAllocas.empty() || !DynamicAllocas.empty())
+    ++NumUnsafeStackFunctions; // This function has the unsafe stack.
+
+  if (!StackRestorePoints.empty())
+    ++NumUnsafeStackRestorePointsFunctions;
+
+  // The top of the unsafe stack after all unsafe static allocas are allocated.
+  Value *StaticTop = moveStaticAllocasToUnsafeStack(F, StaticAllocas, Returns);
+
+  // Safe stack object that stores the current unsafe stack top. It is updated
+  // as unsafe dynamic (non-constant-sized) allocas are allocated and freed.
+  // This is only needed if we need to restore stack pointer after longjmp
+  // or exceptions, and we have dynamic allocations.
+  // FIXME: a better alternative might be to store the unsafe stack pointer
+  // before setjmp / invoke instructions.
+  AllocaInst *DynamicTop = createStackRestorePoints(
+      F, StackRestorePoints, StaticTop, !DynamicAllocas.empty());
+
+  // Handle dynamic allocas.
+  moveDynamicAllocasToUnsafeStack(F, UnsafeStackPtr, DynamicTop,
+                                  DynamicAllocas);
+
+  DEBUG(dbgs() << "[SafeStack]     safestack applied\n");
+  return true;
+}
+
+} // end anonymous namespace
+
+char SafeStack::ID = 0;
+INITIALIZE_PASS_BEGIN(SafeStack, "safe-stack",
+                      "Safe Stack instrumentation pass", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(SafeStack, "safe-stack", "Safe Stack instrumentation pass",
+                    false, false)
+
+FunctionPass *llvm::createSafeStackPass() { return new SafeStack(); }
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index f6ae0c2..dff39ef 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -33,6 +33,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InlineAsm.h"
@@ -385,9 +386,14 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
   }
 
   bool IsEntryBB = &BB == &F.getEntryBlock();
-  DebugLoc EntryLoc = IsEntryBB && IP->getDebugLoc()
-                          ? IP->getDebugLoc().getFnDebugLoc()
-                          : IP->getDebugLoc();
+  DebugLoc EntryLoc;
+  if (IsEntryBB) {
+    if (auto SP = getDISubprogram(&F))
+      EntryLoc = DebugLoc::get(SP->getScopeLine(), 0, SP);
+  } else {
+    EntryLoc = IP->getDebugLoc();
+  }
+
   IRBuilder<> IRB(IP);
   IRB.SetCurrentDebugLocation(EntryLoc);
   SmallVector<Value *, 1> Indices;
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/BlotMapVector.h b/contrib/llvm/lib/Transforms/ObjCARC/BlotMapVector.h
index d6439b6..f9fde26 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/BlotMapVector.h
+++ b/contrib/llvm/lib/Transforms/ObjCARC/BlotMapVector.h
@@ -105,4 +105,4 @@ public:
     return Map.empty();
   }
 };
-} //
+} // namespace llvm
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
index d318643..c7c77ec 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
@@ -50,7 +50,7 @@ namespace {
       initializeObjCARCAPElimPass(*PassRegistry::getPassRegistry());
     }
   };
-}
+} // namespace
 
 char ObjCARCAPElim::ID = 0;
 INITIALIZE_PASS(ObjCARCAPElim,
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
index b1515e3..94b092c 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
@@ -58,7 +58,8 @@ ObjCARCAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
 }
 
 AliasAnalysis::AliasResult
-ObjCARCAliasAnalysis::alias(const Location &LocA, const Location &LocB) {
+ObjCARCAliasAnalysis::alias(const MemoryLocation &LocA,
+                            const MemoryLocation &LocB) {
   if (!EnableARCOpts)
     return AliasAnalysis::alias(LocA, LocB);
 
@@ -67,8 +68,8 @@ ObjCARCAliasAnalysis::alias(const Location &LocA, const Location &LocB) {
   const Value *SA = GetRCIdentityRoot(LocA.Ptr);
   const Value *SB = GetRCIdentityRoot(LocB.Ptr);
   AliasResult Result =
-    AliasAnalysis::alias(Location(SA, LocA.Size, LocA.AATags),
-                         Location(SB, LocB.Size, LocB.AATags));
+      AliasAnalysis::alias(MemoryLocation(SA, LocA.Size, LocA.AATags),
+                           MemoryLocation(SB, LocB.Size, LocB.AATags));
   if (Result != MayAlias)
     return Result;
 
@@ -77,7 +78,7 @@ ObjCARCAliasAnalysis::alias(const Location &LocA, const Location &LocB) {
   const Value *UA = GetUnderlyingObjCPtr(SA, *DL);
   const Value *UB = GetUnderlyingObjCPtr(SB, *DL);
   if (UA != SA || UB != SB) {
-    Result = AliasAnalysis::alias(Location(UA), Location(UB));
+    Result = AliasAnalysis::alias(MemoryLocation(UA), MemoryLocation(UB));
     // We can't use MustAlias or PartialAlias results here because
     // GetUnderlyingObjCPtr may return an offsetted pointer value.
     if (Result == NoAlias)
@@ -89,24 +90,23 @@ ObjCARCAliasAnalysis::alias(const Location &LocA, const Location &LocB) {
   return MayAlias;
 }
 
-bool
-ObjCARCAliasAnalysis::pointsToConstantMemory(const Location &Loc,
-                                             bool OrLocal) {
+bool ObjCARCAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc,
+                                                  bool OrLocal) {
   if (!EnableARCOpts)
     return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
 
   // First, strip off no-ops, including ObjC-specific no-ops, and try making
   // a precise alias query.
   const Value *S = GetRCIdentityRoot(Loc.Ptr);
-  if (AliasAnalysis::pointsToConstantMemory(Location(S, Loc.Size, Loc.AATags),
-                                            OrLocal))
+  if (AliasAnalysis::pointsToConstantMemory(
+          MemoryLocation(S, Loc.Size, Loc.AATags), OrLocal))
     return true;
 
   // If that failed, climb to the underlying object, including climbing through
   // ObjC-specific no-ops, and try making an imprecise alias query.
   const Value *U = GetUnderlyingObjCPtr(S, *DL);
   if (U != S)
-    return AliasAnalysis::pointsToConstantMemory(Location(U), OrLocal);
+    return AliasAnalysis::pointsToConstantMemory(MemoryLocation(U), OrLocal);
 
   // If that failed, fail. We don't need to chain here, since that's covered
   // by the earlier precise query.
@@ -135,7 +135,8 @@ ObjCARCAliasAnalysis::getModRefBehavior(const Function *F) {
 }
 
 AliasAnalysis::ModRefResult
-ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) {
+ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
+                                    const MemoryLocation &Loc) {
   if (!EnableARCOpts)
     return AliasAnalysis::getModRefInfo(CS, Loc);
 
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
index 3c5a021..eecc82f 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
@@ -56,12 +56,14 @@ namespace objcarc {
     }
 
     void getAnalysisUsage(AnalysisUsage &AU) const override;
-    AliasResult alias(const Location &LocA, const Location &LocB) override;
-    bool pointsToConstantMemory(const Location &Loc, bool OrLocal) override;
+    AliasResult alias(const MemoryLocation &LocA,
+                      const MemoryLocation &LocB) override;
+    bool pointsToConstantMemory(const MemoryLocation &Loc,
+                                bool OrLocal) override;
     ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override;
     ModRefBehavior getModRefBehavior(const Function *F) override;
     ModRefResult getModRefInfo(ImmutableCallSite CS,
-                               const Location &Loc) override;
+                               const MemoryLocation &Loc) override;
     ModRefResult getModRefInfo(ImmutableCallSite CS1,
                                ImmutableCallSite CS2) override;
   };
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index e7731ad..080dbc0 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -101,7 +101,7 @@ namespace {
       initializeObjCARCContractPass(*PassRegistry::getPassRegistry());
     }
   };
-}
+} // namespace
 
 //===----------------------------------------------------------------------===//
 //                               Implementation
@@ -200,7 +200,7 @@ static StoreInst *findSafeStoreForStoreStrongContraction(LoadInst *Load,
   bool SawRelease = false;
 
   // Get the location associated with Load.
-  AliasAnalysis::Location Loc = MemoryLocation::get(Load);
+  MemoryLocation Loc = MemoryLocation::get(Load);
 
   // Walk down to find the store and the release, which may be in either order.
   for (auto I = std::next(BasicBlock::iterator(Load)),
@@ -212,7 +212,7 @@ static StoreInst *findSafeStoreForStoreStrongContraction(LoadInst *Load,
       break;
 
     // Now we know that we have not seen either the store or the release. If I
-    // is the the release, mark that we saw the release and continue.
+    // is the release, mark that we saw the release and continue.
     Instruction *Inst = &*I;
     if (Inst == Release) {
       SawRelease = true;
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
index 53c19c3..4f2f7da 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
@@ -63,7 +63,7 @@ namespace {
       initializeObjCARCExpandPass(*PassRegistry::getPassRegistry());
     }
   };
-}
+} // namespace
 
 char ObjCARCExpand::ID = 0;
 INITIALIZE_PASS(ObjCARCExpand,
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index dca3f1b..cdbbfac 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -313,7 +313,7 @@ namespace {
   };
 
   const unsigned BBState::OverflowOccurredValue = 0xffffffff;
-}
+} // namespace
 
 namespace llvm {
 raw_ostream &operator<<(raw_ostream &OS,
@@ -551,7 +551,7 @@ namespace {
       initializeObjCARCOptPass(*PassRegistry::getPassRegistry());
     }
   };
-}
+} // namespace
 
 char ObjCARCOpt::ID = 0;
 INITIALIZE_PASS_BEGIN(ObjCARCOpt,
diff --git a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
index d6fc916..fe0224b 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
@@ -44,7 +44,7 @@ struct ADCE : public FunctionPass {
     AU.setPreservesCFG();
   }
 };
-}
+} // namespace
 
 char ADCE::ID = 0;
 INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false)
diff --git a/contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index 8918909..a4e5446 100644
--- a/contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -76,7 +76,7 @@ struct AlignmentFromAssumptions : public FunctionPass {
                             const SCEV *&OffSCEV);
   bool processAssumption(CallInst *I);
 };
-}
+} // namespace
 
 char AlignmentFromAssumptions::ID = 0;
 static const char aip_name[] = "Alignment from assumptions";
diff --git a/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp b/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp
index 09c605e..8ffbacd 100644
--- a/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp
@@ -66,7 +66,7 @@ struct BDCE : public FunctionPass {
   AssumptionCache *AC;
   DominatorTree *DT;
 };
-}
+} // namespace
 
 char BDCE::ID = 0;
 INITIALIZE_PASS_BEGIN(BDCE, "bdce", "Bit-Tracking Dead Code Elimination",
diff --git a/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
index 4288742..cc1dc94 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -171,7 +171,7 @@ private:
   void deleteDeadCastInst() const;
   bool optimizeConstants(Function &Fn);
 };
-}
+} // namespace
 
 char ConstantHoisting::ID = 0;
 INITIALIZE_PASS_BEGIN(ConstantHoisting, "consthoist", "Constant Hoisting",
diff --git a/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp b/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
index c974ebb..e3df86e 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
@@ -47,7 +47,7 @@ namespace {
       AU.addRequired<TargetLibraryInfoWrapperPass>();
     }
   };
-}
+} // namespace
 
 char ConstantPropagation::ID = 0;
 INITIALIZE_PASS_BEGIN(ConstantPropagation, "constprop",
diff --git a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 79624b2..b1809b7 100644
--- a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -56,7 +56,7 @@ namespace {
       AU.addRequired<LazyValueInfo>();
     }
   };
-}
+} // namespace
 
 char CorrelatedValuePropagation::ID = 0;
 INITIALIZE_PASS_BEGIN(CorrelatedValuePropagation, "correlated-propagation",
diff --git a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
index 3b262a2..aa628e5 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
@@ -60,7 +60,7 @@ namespace {
       AU.setPreservesCFG();
     }
   };
-}
+} // namespace
 
 char DeadInstElimination::ID = 0;
 INITIALIZE_PASS(DeadInstElimination, "die",
@@ -87,7 +87,7 @@ namespace {
       AU.setPreservesCFG();
     }
  };
-}
+} // namespace
 
 char DCE::ID = 0;
 INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false)
diff --git a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index eb48a76..c99dc5f 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -78,7 +78,7 @@ namespace {
     bool runOnBasicBlock(BasicBlock &BB);
     bool HandleFree(CallInst *F);
     bool handleEndBlock(BasicBlock &BB);
-    void RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
+    void RemoveAccessedObjects(const MemoryLocation &LoadedLoc,
                                SmallSetVector<Value *, 16> &DeadStackObjects,
                                const DataLayout &DL);
 
@@ -92,7 +92,7 @@ namespace {
       AU.addPreserved<MemoryDependenceAnalysis>();
     }
   };
-}
+} // namespace
 
 char DSE::ID = 0;
 INITIALIZE_PASS_BEGIN(DSE, "dse", "Dead Store Elimination", false, false)
@@ -194,37 +194,37 @@ static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) {
 /// getLocForWrite - Return a Location stored to by the specified instruction.
 /// If isRemovable returns true, this function and getLocForRead completely
 /// describe the memory operations for this instruction.
-static AliasAnalysis::Location
-getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
+static MemoryLocation getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
   if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
     return MemoryLocation::get(SI);
 
   if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Inst)) {
     // memcpy/memmove/memset.
-    AliasAnalysis::Location Loc = MemoryLocation::getForDest(MI);
+    MemoryLocation Loc = MemoryLocation::getForDest(MI);
     return Loc;
   }
 
   IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst);
-  if (!II) return AliasAnalysis::Location();
+  if (!II)
+    return MemoryLocation();
 
   switch (II->getIntrinsicID()) {
-  default: return AliasAnalysis::Location(); // Unhandled intrinsic.
+  default:
+    return MemoryLocation(); // Unhandled intrinsic.
   case Intrinsic::init_trampoline:
     // FIXME: We don't know the size of the trampoline, so we can't really
     // handle it here.
-    return AliasAnalysis::Location(II->getArgOperand(0));
+    return MemoryLocation(II->getArgOperand(0));
   case Intrinsic::lifetime_end: {
     uint64_t Len = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
-    return AliasAnalysis::Location(II->getArgOperand(1), Len);
+    return MemoryLocation(II->getArgOperand(1), Len);
   }
   }
 }
 
 /// getLocForRead - Return the location read by the specified "hasMemoryWrite"
 /// instruction if any.
-static AliasAnalysis::Location
-getLocForRead(Instruction *Inst, AliasAnalysis &AA) {
+static MemoryLocation getLocForRead(Instruction *Inst, AliasAnalysis &AA) {
   assert(hasMemoryWrite(Inst, AA.getTargetLibraryInfo()) &&
          "Unknown instruction case");
 
@@ -232,7 +232,7 @@ getLocForRead(Instruction *Inst, AliasAnalysis &AA) {
   // instructions (memcpy/memmove).
   if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(Inst))
     return MemoryLocation::getForSource(MTI);
-  return AliasAnalysis::Location();
+  return MemoryLocation();
 }
 
 
@@ -317,7 +317,7 @@ static uint64_t getPointerSize(const Value *V, const DataLayout &DL,
   uint64_t Size;
   if (getObjectSize(V, Size, DL, TLI))
     return Size;
-  return AliasAnalysis::UnknownSize;
+  return MemoryLocation::UnknownSize;
 }
 
 namespace {
@@ -333,8 +333,8 @@ namespace {
 /// completely overwrites a store to the 'Earlier' location.
 /// 'OverwriteEnd' if the end of the 'Earlier' location is completely
 /// overwritten by 'Later', or 'OverwriteUnknown' if nothing can be determined
-static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
-                                   const AliasAnalysis::Location &Earlier,
+static OverwriteResult isOverwrite(const MemoryLocation &Later,
+                                   const MemoryLocation &Earlier,
                                    const DataLayout &DL,
                                    const TargetLibraryInfo *TLI,
                                    int64_t &EarlierOff, int64_t &LaterOff) {
@@ -346,8 +346,8 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
   if (P1 == P2) {
     // If we don't know the sizes of either access, then we can't do a
     // comparison.
-    if (Later.Size == AliasAnalysis::UnknownSize ||
-        Earlier.Size == AliasAnalysis::UnknownSize)
+    if (Later.Size == MemoryLocation::UnknownSize ||
+        Earlier.Size == MemoryLocation::UnknownSize)
       return OverwriteUnknown;
 
     // Make sure that the Later size is >= the Earlier size.
@@ -357,8 +357,8 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
 
   // Otherwise, we have to have size information, and the later store has to be
   // larger than the earlier one.
-  if (Later.Size == AliasAnalysis::UnknownSize ||
-      Earlier.Size == AliasAnalysis::UnknownSize)
+  if (Later.Size == MemoryLocation::UnknownSize ||
+      Earlier.Size == MemoryLocation::UnknownSize)
     return OverwriteUnknown;
 
   // Check to see if the later store is to the entire object (either a global,
@@ -374,7 +374,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
 
   // If the "Later" store is to a recognizable object, get its size.
   uint64_t ObjectSize = getPointerSize(UO2, DL, TLI);
-  if (ObjectSize != AliasAnalysis::UnknownSize)
+  if (ObjectSize != MemoryLocation::UnknownSize)
     if (ObjectSize == Later.Size && ObjectSize >= Earlier.Size)
       return OverwriteComplete;
 
@@ -441,11 +441,11 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
 /// This function detects when it is unsafe to remove a dependent instruction
 /// because the DSE inducing instruction may be a self-read.
 static bool isPossibleSelfRead(Instruction *Inst,
-                               const AliasAnalysis::Location &InstStoreLoc,
+                               const MemoryLocation &InstStoreLoc,
                                Instruction *DepWrite, AliasAnalysis &AA) {
   // Self reads can only happen for instructions that read memory.  Get the
   // location read.
-  AliasAnalysis::Location InstReadLoc = getLocForRead(Inst, AA);
+  MemoryLocation InstReadLoc = getLocForRead(Inst, AA);
   if (!InstReadLoc.Ptr) return false;  // Not a reading instruction.
 
   // If the read and written loc obviously don't alias, it isn't a read.
@@ -459,7 +459,7 @@ static bool isPossibleSelfRead(Instruction *Inst,
   // Here we don't know if A/B may alias, but we do know that B/B are must
   // aliases, so removing the first memcpy is safe (assuming it writes <= #
   // bytes as the second one.
-  AliasAnalysis::Location DepReadLoc = getLocForRead(DepWrite, AA);
+  MemoryLocation DepReadLoc = getLocForRead(DepWrite, AA);
 
   if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr))
     return false;
@@ -525,7 +525,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
     }
 
     // Figure out what location is being stored to.
-    AliasAnalysis::Location Loc = getLocForWrite(Inst, *AA);
+    MemoryLocation Loc = getLocForWrite(Inst, *AA);
 
     // If we didn't get a useful location, fail.
     if (!Loc.Ptr)
@@ -540,7 +540,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
       //
       // Find out what memory location the dependent instruction stores.
       Instruction *DepWrite = InstDep.getInst();
-      AliasAnalysis::Location DepLoc = getLocForWrite(DepWrite, *AA);
+      MemoryLocation DepLoc = getLocForWrite(DepWrite, *AA);
       // If we didn't get a useful location, or if it isn't a size, bail out.
       if (!DepLoc.Ptr)
         break;
@@ -645,7 +645,7 @@ static void FindUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks,
 bool DSE::HandleFree(CallInst *F) {
   bool MadeChange = false;
 
-  AliasAnalysis::Location Loc = AliasAnalysis::Location(F->getOperand(0));
+  MemoryLocation Loc = MemoryLocation(F->getOperand(0));
   SmallVector<BasicBlock *, 16> Blocks;
   Blocks.push_back(F->getParent());
   const DataLayout &DL = F->getModule()->getDataLayout();
@@ -809,7 +809,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
       continue;
     }
 
-    AliasAnalysis::Location LoadedLoc;
+    MemoryLocation LoadedLoc;
 
     // If we encounter a use of the pointer, it is no longer considered dead
     if (LoadInst *L = dyn_cast<LoadInst>(BBI)) {
@@ -845,7 +845,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
 /// RemoveAccessedObjects - Check to see if the specified location may alias any
 /// of the stack objects in the DeadStackObjects set.  If so, they become live
 /// because the location is being loaded.
-void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
+void DSE::RemoveAccessedObjects(const MemoryLocation &LoadedLoc,
                                 SmallSetVector<Value *, 16> &DeadStackObjects,
                                 const DataLayout &DL) {
   const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr, DL);
@@ -864,8 +864,8 @@ void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
   // Remove objects that could alias LoadedLoc.
   DeadStackObjects.remove_if([&](Value *I) {
     // See if the loaded location could alias the stack location.
-    AliasAnalysis::Location StackLoc(
-        I, getPointerSize(I, DL, AA->getTargetLibraryInfo()));
+    MemoryLocation StackLoc(I,
+                            getPointerSize(I, DL, AA->getTargetLibraryInfo()));
     return !AA->isNoAlias(StackLoc, LoadedLoc);
   });
 }
diff --git a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index d536a93..8b629ea 100644
--- a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -72,7 +72,7 @@ struct SimpleValue {
            isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst);
   }
 };
-}
+} // namespace
 
 namespace llvm {
 template <> struct DenseMapInfo<SimpleValue> {
@@ -85,7 +85,7 @@ template <> struct DenseMapInfo<SimpleValue> {
   static unsigned getHashValue(SimpleValue Val);
   static bool isEqual(SimpleValue LHS, SimpleValue RHS);
 };
-}
+} // namespace llvm
 
 unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
   Instruction *Inst = Val.Inst;
@@ -219,7 +219,7 @@ struct CallValue {
     return true;
   }
 };
-}
+} // namespace
 
 namespace llvm {
 template <> struct DenseMapInfo<CallValue> {
@@ -232,7 +232,7 @@ template <> struct DenseMapInfo<CallValue> {
   static unsigned getHashValue(CallValue Val);
   static bool isEqual(CallValue LHS, CallValue RHS);
 };
-}
+} // namespace llvm
 
 unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
   Instruction *Inst = Val.Inst;
@@ -447,7 +447,7 @@ private:
                                                  ExpectedType);
   }
 };
-}
+} // namespace
 
 bool EarlyCSE::processNode(DomTreeNode *Node) {
   BasicBlock *BB = Node->getBlock();
@@ -764,7 +764,7 @@ public:
     AU.setPreservesCFG();
   }
 };
-}
+} // namespace
 
 char EarlyCSELegacyPass::ID = 0;
 
diff --git a/contrib/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp b/contrib/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
index 0430c18..dd6ea8d 100644
--- a/contrib/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
@@ -36,7 +36,7 @@ public:
 private:
   AliasAnalysis *AA;
 };
-}
+} // namespace
 
 char FlattenCFGPass::ID = 0;
 INITIALIZE_PASS_BEGIN(FlattenCFGPass, "flattencfg", "Flatten the CFG", false,
diff --git a/contrib/llvm/lib/Transforms/Scalar/Float2Int.cpp b/contrib/llvm/lib/Transforms/Scalar/Float2Int.cpp
index c931422..bb90c5f 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Float2Int.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Float2Int.cpp
@@ -79,7 +79,7 @@ namespace {
     MapVector<Instruction*, Value*> ConvertedInsts;
     LLVMContext *Ctx;
   };
-}
+} // namespace
 
 char Float2Int::ID = 0;
 INITIALIZE_PASS(Float2Int, "float2int", "Float to int", false, false)
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
index 7770ddc..d9308c4 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -138,7 +138,7 @@ namespace {
     uint32_t getNextUnusedValueNumber() { return nextValueNumber; }
     void verifyRemoved(const Value *) const;
   };
-}
+} // namespace
 
 namespace llvm {
 template <> struct DenseMapInfo<Expression> {
@@ -159,7 +159,7 @@ template <> struct DenseMapInfo<Expression> {
   }
 };
 
-}
+} // namespace llvm
 
 //===----------------------------------------------------------------------===//
 //                     ValueTable Internal Functions
@@ -723,7 +723,7 @@ namespace {
   };
 
   char GVN::ID = 0;
-}
+} // namespace
 
 // The public interface to this file...
 FunctionPass *llvm::createGVNPass(bool NoLoads) {
@@ -852,13 +852,12 @@ static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
 
 /// If we saw a store of a value to memory, and
 /// then a load from a must-aliased pointer of a different type, try to coerce
-/// the stored value.  LoadedTy is the type of the load we want to replace and
-/// InsertPt is the place to insert new instructions.
+/// the stored value.  LoadedTy is the type of the load we want to replace.
+/// IRB is IRBuilder used to insert new instructions.
 ///
 /// If we can't do it, return null.
-static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
-                                             Type *LoadedTy,
-                                             Instruction *InsertPt,
+static Value *CoerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy,
+                                             IRBuilder<> &IRB,
                                              const DataLayout &DL) {
   if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL))
     return nullptr;
@@ -874,12 +873,12 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
     // Pointer to Pointer -> use bitcast.
     if (StoredValTy->getScalarType()->isPointerTy() &&
         LoadedTy->getScalarType()->isPointerTy())
-      return new BitCastInst(StoredVal, LoadedTy, "", InsertPt);
+      return IRB.CreateBitCast(StoredVal, LoadedTy);
 
     // Convert source pointers to integers, which can be bitcast.
     if (StoredValTy->getScalarType()->isPointerTy()) {
       StoredValTy = DL.getIntPtrType(StoredValTy);
-      StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
+      StoredVal = IRB.CreatePtrToInt(StoredVal, StoredValTy);
     }
 
     Type *TypeToCastTo = LoadedTy;
@@ -887,11 +886,11 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
       TypeToCastTo = DL.getIntPtrType(TypeToCastTo);
 
     if (StoredValTy != TypeToCastTo)
-      StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt);
+      StoredVal = IRB.CreateBitCast(StoredVal, TypeToCastTo);
 
     // Cast to pointer if the load needs a pointer type.
     if (LoadedTy->getScalarType()->isPointerTy())
-      StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt);
+      StoredVal = IRB.CreateIntToPtr(StoredVal, LoadedTy);
 
     return StoredVal;
   }
@@ -904,35 +903,34 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
   // Convert source pointers to integers, which can be manipulated.
   if (StoredValTy->getScalarType()->isPointerTy()) {
     StoredValTy = DL.getIntPtrType(StoredValTy);
-    StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
+    StoredVal = IRB.CreatePtrToInt(StoredVal, StoredValTy);
   }
 
   // Convert vectors and fp to integer, which can be manipulated.
   if (!StoredValTy->isIntegerTy()) {
     StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize);
-    StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt);
+    StoredVal = IRB.CreateBitCast(StoredVal, StoredValTy);
   }
 
   // If this is a big-endian system, we need to shift the value down to the low
   // bits so that a truncate will work.
   if (DL.isBigEndian()) {
-    Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize);
-    StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt);
+    StoredVal = IRB.CreateLShr(StoredVal, StoreSize - LoadSize, "tmp");
   }
 
   // Truncate the integer to the right size now.
   Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize);
-  StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt);
+  StoredVal  = IRB.CreateTrunc(StoredVal, NewIntTy, "trunc");
 
   if (LoadedTy == NewIntTy)
     return StoredVal;
 
   // If the result is a pointer, inttoptr.
   if (LoadedTy->getScalarType()->isPointerTy())
-    return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt);
+    return IRB.CreateIntToPtr(StoredVal, LoadedTy, "inttoptr");
 
   // Otherwise, bitcast.
-  return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt);
+  return IRB.CreateBitCast(StoredVal, LoadedTy, "bitcast");
 }
 
 /// This function is called when we have a
@@ -1122,7 +1120,7 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
   uint64_t StoreSize = (DL.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
   uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy) + 7) / 8;
 
-  IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
+  IRBuilder<> Builder(InsertPt);
 
   // Compute which bits of the stored value are being used by the load.  Convert
   // to an integer type to start with.
@@ -1145,7 +1143,7 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
   if (LoadSize != StoreSize)
     SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8));
 
-  return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, DL);
+  return CoerceAvailableValueToLoadType(SrcVal, LoadTy, Builder, DL);
 }
 
 /// This function is called when we have a
@@ -1219,7 +1217,7 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
   LLVMContext &Ctx = LoadTy->getContext();
   uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy)/8;
 
-  IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
+  IRBuilder<> Builder(InsertPt);
 
   // We know that this method is only called when the mem transfer fully
   // provides the bits for the load.
@@ -1248,7 +1246,7 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
       ++NumBytesSet;
     }
 
-    return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, DL);
+    return CoerceAvailableValueToLoadType(Val, LoadTy, Builder, DL);
   }
 
   // Otherwise, this is a memcpy/memmove from a constant global.
@@ -1695,6 +1693,8 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
   LI->replaceAllUsesWith(V);
   if (isa<PHINode>(V))
     V->takeName(LI);
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    I->setDebugLoc(LI->getDebugLoc());
   if (V->getType()->getScalarType()->isPointerTy())
     MD->invalidateCachedPointerInfo(V);
   markInstructionForDeletion(LI);
@@ -1761,6 +1761,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
 
     if (isa<PHINode>(V))
       V->takeName(LI);
+    if (Instruction *I = dyn_cast<Instruction>(V))
+      I->setDebugLoc(LI->getDebugLoc());
     if (V->getType()->getScalarType()->isPointerTy())
       MD->invalidateCachedPointerInfo(V);
     markInstructionForDeletion(LI);
@@ -1928,8 +1930,9 @@ bool GVN::processLoad(LoadInst *L) {
     // actually have the same type.  See if we know how to reuse the stored
     // value (depending on its type).
     if (StoredVal->getType() != L->getType()) {
+      IRBuilder<> Builder(L);
       StoredVal =
-          CoerceAvailableValueToLoadType(StoredVal, L->getType(), L, DL);
+          CoerceAvailableValueToLoadType(StoredVal, L->getType(), Builder, DL);
       if (!StoredVal)
         return false;
 
@@ -1953,7 +1956,9 @@ bool GVN::processLoad(LoadInst *L) {
     // the same type.  See if we know how to reuse the previously loaded value
     // (depending on its type).
     if (DepLI->getType() != L->getType()) {
-      AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L, DL);
+      IRBuilder<> Builder(L);
+      AvailableVal =
+          CoerceAvailableValueToLoadType(DepLI, L->getType(), Builder, DL);
       if (!AvailableVal)
         return false;
 
diff --git a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 359a616..e931382 100644
--- a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -136,7 +136,7 @@ namespace {
 
     void SinkUnusedInvariants(Loop *L);
   };
-}
+} // namespace
 
 char IndVarSimplify::ID = 0;
 INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars",
@@ -494,7 +494,7 @@ struct RewritePhi {
   RewritePhi(PHINode *P, unsigned I, Value *V, bool H, bool S)
       : PN(P), Ith(I), Val(V), HighCost(H), SafePhi(S) {}
 };
-}
+} // namespace
 
 //===----------------------------------------------------------------------===//
 // RewriteLoopExitValues - Optimize IV users outside the loop.
@@ -758,7 +758,7 @@ namespace {
     WideIVInfo() : NarrowIV(nullptr), WidestNativeType(nullptr),
                    IsSigned(false) {}
   };
-}
+} // namespace
 
 /// visitCast - Update information about the induction variable that is
 /// extended by this sign or zero extend operation. This is used to determine
@@ -1321,7 +1321,7 @@ namespace {
     // Implement the interface used by simplifyUsersOfIV.
     void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
   };
-}
+} // namespace
 
 /// SimplifyAndExtend - Iteratively perform simplification on a worklist of IV
 /// users. Each successive simplification may push more users which may
@@ -2013,10 +2013,11 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
 
   // Now that we're done iterating through lists, clean up any instructions
   // which are now dead.
-  while (!DeadInsts.empty())
-    if (Instruction *Inst =
-          dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
+  while (!DeadInsts.empty()) {
+    Value *V = static_cast<Value *>(DeadInsts.pop_back_val());
+    if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
       RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
+  }
 
   // The Rewriter may not be used from this point on.
 
diff --git a/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index cbdacad..ce1a0ca 100644
--- a/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -222,7 +222,7 @@ public:
 };
 
 char InductiveRangeCheckElimination::ID = 0;
-}
+} // namespace
 
 INITIALIZE_PASS(InductiveRangeCheckElimination, "irce",
                 "Inductive range check elimination", false, false)
@@ -618,7 +618,7 @@ public:
   bool run();
 };
 
-}
+} // namespace
 
 void LoopConstrainer::replacePHIBlock(PHINode *PN, BasicBlock *Block,
                                       BasicBlock *ReplaceBy) {
diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 711df41..7316db6 100644
--- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -138,7 +138,7 @@ namespace {
     bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
     bool TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB);
   };
-}
+} // namespace
 
 char JumpThreading::ID = 0;
 INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading",
@@ -758,67 +758,33 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
 
 
   if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {
-    // For a comparison where the LHS is outside this block, it's possible
-    // that we've branched on it before.  Used LVI to see if we can simplify
-    // the branch based on that.
+    // If we're branching on a conditional, LVI might be able to determine
+    // it's value at the branch instruction.  We only handle comparisons
+    // against a constant at this time.
+    // TODO: This should be extended to handle switches as well.  
     BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
     Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1));
-    pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
-    if (CondBr && CondConst && CondBr->isConditional() && PI != PE &&
-        (!isa<Instruction>(CondCmp->getOperand(0)) ||
-         cast<Instruction>(CondCmp->getOperand(0))->getParent() != BB)) {
-      // For predecessor edge, determine if the comparison is true or false
-      // on that edge.  If they're all true or all false, we can simplify the
-      // branch.
-      // FIXME: We could handle mixed true/false by duplicating code.
-      LazyValueInfo::Tristate Baseline =
-        LVI->getPredicateOnEdge(CondCmp->getPredicate(), CondCmp->getOperand(0),
-                                CondConst, *PI, BB, CondCmp);
-      if (Baseline != LazyValueInfo::Unknown) {
-        // Check that all remaining incoming values match the first one.
-        while (++PI != PE) {
-          LazyValueInfo::Tristate Ret =
-            LVI->getPredicateOnEdge(CondCmp->getPredicate(),
-                                    CondCmp->getOperand(0), CondConst, *PI, BB,
-                                    CondCmp);
-          if (Ret != Baseline) break;
-        }
-
-        // If we terminated early, then one of the values didn't match.
-        if (PI == PE) {
-          unsigned ToRemove = Baseline == LazyValueInfo::True ? 1 : 0;
-          unsigned ToKeep = Baseline == LazyValueInfo::True ? 0 : 1;
-          CondBr->getSuccessor(ToRemove)->removePredecessor(BB, true);
-          BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr);
-          CondBr->eraseFromParent();
-          if (CondCmp->use_empty())
-            CondCmp->eraseFromParent();
-          else if (CondCmp->getParent() == BB) {
-            // If the fact we just learned is true for all uses of the
-            // condition, replace it with a constant value
-            auto *CI = Baseline == LazyValueInfo::True ?
-              ConstantInt::getTrue(CondCmp->getType()) :
-              ConstantInt::getFalse(CondCmp->getType());
-            CondCmp->replaceAllUsesWith(CI);
-            CondCmp->eraseFromParent();
-          }
-          return true;
-        }
-      }
-
-    } else if (CondBr && CondConst && CondBr->isConditional()) {
-      // There might be an invariant in the same block with the conditional
-      // that can determine the predicate.
-
+    if (CondBr && CondConst && CondBr->isConditional()) {
       LazyValueInfo::Tristate Ret =
         LVI->getPredicateAt(CondCmp->getPredicate(), CondCmp->getOperand(0),
-                            CondConst, CondCmp);
+                            CondConst, CondBr);
       if (Ret != LazyValueInfo::Unknown) {
         unsigned ToRemove = Ret == LazyValueInfo::True ? 1 : 0;
         unsigned ToKeep = Ret == LazyValueInfo::True ? 0 : 1;
         CondBr->getSuccessor(ToRemove)->removePredecessor(BB, true);
         BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr);
         CondBr->eraseFromParent();
+        if (CondCmp->use_empty())
+          CondCmp->eraseFromParent();
+        else if (CondCmp->getParent() == BB) {
+          // If the fact we just learned is true for all uses of the
+          // condition, replace it with a constant value
+          auto *CI = Ret == LazyValueInfo::True ?
+            ConstantInt::getTrue(CondCmp->getType()) :
+            ConstantInt::getFalse(CondCmp->getType());
+          CondCmp->replaceAllUsesWith(CI);
+          CondCmp->eraseFromParent();
+        }
         return true;
       }
     }
diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
index f0e6d64..e501946 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -156,7 +156,7 @@ namespace {
     /// Simple Analysis hook. Delete loop L from alias set map.
     void deleteAnalysisLoop(Loop *L) override;
   };
-}
+} // namespace
 
 char LICM::ID = 0;
 INITIALIZE_PASS_BEGIN(LICM, "licm", "Loop Invariant Code Motion", false, false)
@@ -777,7 +777,7 @@ namespace {
       AST.deleteValue(I);
     }
   };
-} // end anon namespace
+} // namespace
 
 /// Try to promote memory values to scalars by sinking stores out of the
 /// loop and moving loads to before the loop.  We do this by looping over
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp b/contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp
index c19cd19..3dbf6ac 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp
@@ -77,7 +77,7 @@ private:
   bool aggregateLoads(SmallVectorImpl<LoadPOPPair> &);
   bool combineLoads(SmallVectorImpl<LoadPOPPair> &);
 };
-}
+} // namespace
 
 bool LoadCombine::doInitialization(Function &F) {
   DEBUG(dbgs() << "LoadCombine function: " << F.getName() << "\n");
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
index 98b068e..02760ff 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -57,7 +57,7 @@ namespace {
                     bool &Changed, BasicBlock *Preheader);
 
   };
-}
+} // namespace
 
 char LoopDeletion::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopDeletion, "loop-deletion",
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
index a907d59..d21a7db 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -630,26 +630,17 @@ private:
 };
 
 /// \brief Handles the loop versioning based on memchecks.
-class RuntimeCheckEmitter {
+class LoopVersioning {
 public:
-  RuntimeCheckEmitter(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI,
-                      DominatorTree *DT)
-      : OrigLoop(L), NonDistributedLoop(nullptr), LAI(LAI), LI(LI), DT(DT) {}
-
-  /// \brief Given the \p Partitions formed by Loop Distribution, it determines
-  /// in which partition each pointer is used.
-  void partitionPointers(InstPartitionContainer &Partitions) {
-    // Set up partition id in PtrRtChecks.  Ptr -> Access -> Intruction ->
-    // Partition.
-    PtrToPartition = Partitions.computePartitionSetForPointers(LAI);
-
-    DEBUG(dbgs() << "\nPointers:\n");
-    DEBUG(LAI.getRuntimePointerCheck()->print(dbgs(), 0, &PtrToPartition));
-  }
+  LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI,
+                 DominatorTree *DT,
+                 const SmallVector<int, 8> *PtrToPartition = nullptr)
+      : OrigLoop(L), NonDistributedLoop(nullptr),
+        PtrToPartition(PtrToPartition), LAI(LAI), LI(LI), DT(DT) {}
 
   /// \brief Returns true if we need memchecks to distribute the loop.
   bool needsRuntimeChecks() const {
-    return LAI.getRuntimePointerCheck()->needsAnyChecking(&PtrToPartition);
+    return LAI.getRuntimePointerCheck()->needsAnyChecking(PtrToPartition);
   }
 
   /// \brief Performs the CFG manipulation part of versioning the loop including
@@ -660,7 +651,7 @@ public:
     // Add the memcheck in the original preheader (this is empty initially).
     BasicBlock *MemCheckBB = OrigLoop->getLoopPreheader();
     std::tie(FirstCheckInst, MemRuntimeCheck) =
-        LAI.addRuntimeCheck(MemCheckBB->getTerminator(), &PtrToPartition);
+        LAI.addRuntimeCheck(MemCheckBB->getTerminator(), PtrToPartition);
     assert(MemRuntimeCheck && "called even though needsAnyChecking = false");
 
     // Rename the block to make the IR more readable.
@@ -733,10 +724,11 @@ private:
   Loop *NonDistributedLoop;
 
   /// \brief For each memory pointer it contains the partitionId it is used in.
+  /// If nullptr, no partitioning is used.
   ///
   /// The I-th entry corresponds to I-th entry in LAI.getRuntimePointerCheck().
   /// If the pointer is used in multiple partitions the entry is set to -1.
-  SmallVector<int, 8> PtrToPartition;
+  const SmallVector<int, 8> *PtrToPartition;
 
   /// \brief This maps the instructions from OrigLoop to their counterpart in
   /// NonDistributedLoop.
@@ -929,11 +921,13 @@ private:
 
     // If we need run-time checks to disambiguate pointers are run-time, version
     // the loop now.
-    RuntimeCheckEmitter RtCheckEmitter(LAI, L, LI, DT);
-    RtCheckEmitter.partitionPointers(Partitions);
-    if (RtCheckEmitter.needsRuntimeChecks()) {
-      RtCheckEmitter.versionLoop(this);
-      RtCheckEmitter.addPHINodes(DefsUsedOutside);
+    auto PtrToPartition = Partitions.computePartitionSetForPointers(LAI);
+    LoopVersioning LVer(LAI, L, LI, DT, &PtrToPartition);
+    if (LVer.needsRuntimeChecks()) {
+      DEBUG(dbgs() << "\nPointers:\n");
+      DEBUG(LAI.getRuntimePointerCheck()->print(dbgs(), 0, &PtrToPartition));
+      LVer.versionLoop(this);
+      LVer.addPHINodes(DefsUsedOutside);
     }
 
     // Create identical copies of the original loop for each partition and hook
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index f92ecd4..3de1333 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -209,7 +209,7 @@ namespace {
     bool runOnNoncountableLoop();
     bool runOnCountableLoop();
   };
-}
+} // namespace
 
 char LoopIdiomRecognize::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
@@ -833,7 +833,7 @@ static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
   // Get the location that may be stored across the loop.  Since the access is
   // strided positively through memory, we say that the modified location starts
   // at the pointer and has infinite size.
-  uint64_t AccessSize = AliasAnalysis::UnknownSize;
+  uint64_t AccessSize = MemoryLocation::UnknownSize;
 
   // If the loop iterates a fixed number of times, we can refine the access size
   // to be exactly the size of the memset, which is (BECount+1)*StoreSize
@@ -844,7 +844,7 @@ static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
   // operand in the store.  Store to &A[i] of 100 will always return may alias
   // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
   // which will then no-alias a store to &A[100].
-  AliasAnalysis::Location StoreLoc(Ptr, AccessSize);
+  MemoryLocation StoreLoc(Ptr, AccessSize);
 
   for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
        ++BI)
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
index e125026..4c40f24 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -52,7 +52,7 @@ namespace {
       AU.addRequired<TargetLibraryInfoWrapperPass>();
     }
   };
-}
+} // namespace
 
 char LoopInstSimplify::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopInstSimplify, "loop-instsimplify",
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index f584018..2554655 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -598,8 +598,8 @@ struct LoopInterchange : public FunctionPass {
 bool LoopInterchangeLegality::areAllUsesReductions(Instruction *Ins, Loop *L) {
   return !std::any_of(Ins->user_begin(), Ins->user_end(), [=](User *U) -> bool {
     PHINode *UserIns = dyn_cast<PHINode>(U);
-    ReductionDescriptor RD;
-    return !UserIns || !ReductionDescriptor::isReductionPHI(UserIns, L, RD);
+    RecurrenceDescriptor RD;
+    return !UserIns || !RecurrenceDescriptor::isReductionPHI(UserIns, L, RD);
   });
 }
 
@@ -697,12 +697,12 @@ bool LoopInterchangeLegality::findInductionAndReductions(
   if (!L->getLoopLatch() || !L->getLoopPredecessor())
     return false;
   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
-    ReductionDescriptor RD;
+    RecurrenceDescriptor RD;
     PHINode *PHI = cast<PHINode>(I);
     ConstantInt *StepValue = nullptr;
     if (isInductionPHI(PHI, SE, StepValue))
       Inductions.push_back(PHI);
-    else if (ReductionDescriptor::isReductionPHI(PHI, L, RD))
+    else if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD))
       Reductions.push_back(PHI);
     else {
       DEBUG(
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
index ed103e6..f6db9b1 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -438,7 +438,7 @@ namespace {
     bool reroll(Instruction *IV, Loop *L, BasicBlock *Header, const SCEV *IterCount,
                 ReductionTracker &Reductions);
   };
-}
+} // namespace
 
 char LoopReroll::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopReroll, "loop-reroll", "Reroll loops", false, false)
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index a675e12..2ba70ad 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -79,7 +79,7 @@ namespace {
     AssumptionCache *AC;
     DominatorTree *DT;
   };
-}
+} // namespace
 
 char LoopRotate::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 4b59f3d..ee72486 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -116,7 +116,7 @@ public:
   void dump() const;
 };
 
-}
+} // namespace
 
 void RegSortData::print(raw_ostream &OS) const {
   OS << "[NumUses=" << UsedByIndices.count() << ']';
@@ -157,7 +157,7 @@ public:
   const_iterator end() const   { return RegSequence.end(); }
 };
 
-}
+} // namespace
 
 void
 RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) {
@@ -281,7 +281,7 @@ struct Formula {
   void dump() const;
 };
 
-}
+} // namespace
 
 /// DoInitialMatch - Recursion helper for InitialMatch.
 static void DoInitialMatch(const SCEV *S, Loop *L,
@@ -903,7 +903,7 @@ private:
                            SmallPtrSetImpl<const SCEV *> *LoserRegs);
 };
 
-}
+} // namespace
 
 /// RateRegister - Tally up interesting quantities from the given register.
 void Cost::RateRegister(const SCEV *Reg,
@@ -1102,7 +1102,7 @@ struct LSRFixup {
   void dump() const;
 };
 
-}
+} // namespace
 
 LSRFixup::LSRFixup()
   : UserInst(nullptr), OperandValToReplace(nullptr), LUIdx(~size_t(0)),
@@ -1252,7 +1252,7 @@ public:
   void dump() const;
 };
 
-}
+} // namespace
 
 /// HasFormula - Test whether this use as a formula which has the same
 /// registers as the given formula.
@@ -1791,7 +1791,7 @@ public:
   void dump() const;
 };
 
-}
+} // namespace
 
 /// OptimizeShadowIV - If IV is used in a int-to-float cast
 /// inside the loop then try to eliminate the cast operation.
@@ -3644,7 +3644,7 @@ struct WorkItem {
   void dump() const;
 };
 
-}
+} // namespace
 
 void WorkItem::print(raw_ostream &OS) const {
   OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx
@@ -4949,7 +4949,7 @@ private:
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 };
 
-}
+} // namespace
 
 char LoopStrengthReduce::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 4ccbfc9..d702dc0 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -229,7 +229,7 @@ namespace {
                              unsigned DynamicCostSavingsDiscount,
                              uint64_t UnrolledCost, uint64_t RolledDynamicCost);
   };
-}
+} // namespace
 
 char LoopUnroll::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
@@ -455,13 +455,15 @@ struct EstimatedUnrollCost {
 ///
 /// Complete loop unrolling can make some loads constant, and we need to know
 /// if that would expose any further optimization opportunities.  This routine
-/// estimates this optimization.  It assigns computed number of instructions,
-/// that potentially might be optimized away, to
-/// NumberOfOptimizedInstructions, and total number of instructions to
-/// UnrolledLoopSize (not counting blocks that won't be reached, if we were
-/// able to compute the condition).
-/// \returns false if we can't analyze the loop, or if we discovered that
-/// unrolling won't give anything. Otherwise, returns true.
+/// estimates this optimization.  It computes cost of unrolled loop
+/// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By
+/// dynamic cost we mean that we won't count costs of blocks that are known not
+/// to be executed (i.e. if we have a branch in the loop and we know that at the
+/// given iteration its condition would be resolved to true, we won't add up the
+/// cost of the 'false'-block).
+/// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
+/// the analysis failed (no benefits expected from the unrolling, or the loop is
+/// too big to analyze), the returned value is None.
 Optional<EstimatedUnrollCost>
 analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE,
                       const TargetTransformInfo &TTI,
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
index 988d2af..5bdc2ec 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -213,7 +213,7 @@ namespace {
                                     BasicBlock **LoopExit = nullptr);
 
   };
-}
+} // namespace
 
 // Analyze loop. Check its size, calculate is it possible to unswitch
 // it. Returns true if we can unswitch this loop.
diff --git a/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp b/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
index 3314e1e..b8b35d4 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -138,7 +138,7 @@ namespace {
       return Changed;
     }
   };
-}
+} // namespace
 
 char LowerAtomic::ID = 0;
 INITIALIZE_PASS(LowerAtomic, "loweratomic",
diff --git a/contrib/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/contrib/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index 0c47cbd..b845c03 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -181,7 +181,7 @@ public:
 
   bool runOnFunction(Function &F) override { return lowerExpectIntrinsic(F); }
 };
-}
+} // namespace
 
 char LowerExpectIntrinsic::ID = 0;
 INITIALIZE_PASS(LowerExpectIntrinsic, "lower-expect",
diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 2bdf670..2c9f935 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -153,7 +153,7 @@ struct MemsetRange {
 
   bool isProfitableToUseMemset(const DataLayout &DL) const;
 };
-} // end anon namespace
+} // namespace
 
 bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
   // If we found more than 4 stores to merge or 16 bytes, use memset.
@@ -237,7 +237,7 @@ public:
 
 };
 
-} // end anon namespace
+} // namespace
 
 
 /// addRange - Add a new store to the MemsetRanges data structure.  This adds a
@@ -337,7 +337,7 @@ namespace {
       AU.addPreserved<MemoryDependenceAnalysis>();
     }
 
-    // Helper fuctions
+    // Helper functions
     bool processStore(StoreInst *SI, BasicBlock::iterator &BBI);
     bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI);
     bool processMemCpy(MemCpyInst *M);
@@ -355,7 +355,7 @@ namespace {
   };
 
   char MemCpyOpt::ID = 0;
-}
+} // namespace
 
 // createMemCpyOptPass - The public interface to this file...
 FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); }
@@ -510,7 +510,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
         // Check that nothing touches the dest of the "copy" between
         // the call and the store.
         AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
-        AliasAnalysis::Location StoreLoc = MemoryLocation::get(SI);
+        MemoryLocation StoreLoc = MemoryLocation::get(SI);
         for (BasicBlock::iterator I = --BasicBlock::iterator(SI),
                                   E = C; I != E; --I) {
           if (AA.getModRefInfo(&*I, StoreLoc) != AliasAnalysis::NoModRef) {
@@ -997,7 +997,7 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
     }
   }
 
-  AliasAnalysis::Location SrcLoc = MemoryLocation::getForSource(M);
+  MemoryLocation SrcLoc = MemoryLocation::getForSource(M);
   MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(SrcLoc, true,
                                                          M, M->getParent());
 
@@ -1075,10 +1075,9 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
   Value *ByValArg = CS.getArgument(ArgNo);
   Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType();
   uint64_t ByValSize = DL.getTypeAllocSize(ByValTy);
-  MemDepResult DepInfo =
-    MD->getPointerDependencyFrom(AliasAnalysis::Location(ByValArg, ByValSize),
-                                 true, CS.getInstruction(),
-                                 CS.getInstruction()->getParent());
+  MemDepResult DepInfo = MD->getPointerDependencyFrom(
+      MemoryLocation(ByValArg, ByValSize), true, CS.getInstruction(),
+      CS.getInstruction()->getParent());
   if (!DepInfo.isClobber())
     return false;
 
diff --git a/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index 776dfb4..886b6f5 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -144,9 +144,8 @@ private:
   // Routines for sinking stores
   StoreInst *canSinkFromBlock(BasicBlock *BB, StoreInst *SI);
   PHINode *getPHIOperand(BasicBlock *BB, StoreInst *S0, StoreInst *S1);
-  bool isStoreSinkBarrierInRange(const Instruction& Start,
-                                 const Instruction& End,
-                                 AliasAnalysis::Location Loc);
+  bool isStoreSinkBarrierInRange(const Instruction &Start,
+                                 const Instruction &End, MemoryLocation Loc);
   bool sinkStore(BasicBlock *BB, StoreInst *SinkCand, StoreInst *ElseInst);
   bool mergeStores(BasicBlock *BB);
   // The mergeLoad/Store algorithms could have Size0 * Size1 complexity,
@@ -157,7 +156,7 @@ private:
 };
 
 char MergedLoadStoreMotion::ID = 0;
-}
+} // namespace
 
 ///
 /// \brief createMergedLoadStoreMotionPass - The public interface to this file.
@@ -241,7 +240,7 @@ bool MergedLoadStoreMotion::isDiamondHead(BasicBlock *BB) {
 bool MergedLoadStoreMotion::isLoadHoistBarrierInRange(const Instruction& Start, 
                                                       const Instruction& End,
                                                       LoadInst* LI) {
-  AliasAnalysis::Location Loc = MemoryLocation::get(LI);
+  MemoryLocation Loc = MemoryLocation::get(LI);
   return AA->canInstructionRangeModRef(Start, End, Loc, AliasAnalysis::Mod);
 }
 
@@ -266,8 +265,8 @@ LoadInst *MergedLoadStoreMotion::canHoistFromBlock(BasicBlock *BB1,
     LoadInst *Load1 = dyn_cast<LoadInst>(Inst);
     BasicBlock *BB0 = Load0->getParent();
 
-    AliasAnalysis::Location Loc0 = MemoryLocation::get(Load0);
-    AliasAnalysis::Location Loc1 = MemoryLocation::get(Load1);
+    MemoryLocation Loc0 = MemoryLocation::get(Load0);
+    MemoryLocation Loc1 = MemoryLocation::get(Load1);
     if (AA->isMustAlias(Loc0, Loc1) && Load0->isSameOperationAs(Load1) &&
         !isLoadHoistBarrierInRange(BB1->front(), *Load1, Load1) &&
         !isLoadHoistBarrierInRange(BB0->front(), *Load0, Load0)) {
@@ -400,10 +399,9 @@ bool MergedLoadStoreMotion::mergeLoads(BasicBlock *BB) {
 /// happening it is considered a sink barrier.
 ///
 
-bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction& Start,
-                                                      const Instruction& End,
-                                                      AliasAnalysis::Location
-                                                      Loc) {
+bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction &Start,
+                                                      const Instruction &End,
+                                                      MemoryLocation Loc) {
   return AA->canInstructionRangeModRef(Start, End, Loc, AliasAnalysis::ModRef);
 }
 
@@ -425,8 +423,8 @@ StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1,
 
     StoreInst *Store1 = cast<StoreInst>(Inst);
 
-    AliasAnalysis::Location Loc0 = MemoryLocation::get(Store0);
-    AliasAnalysis::Location Loc1 = MemoryLocation::get(Store1);
+    MemoryLocation Loc0 = MemoryLocation::get(Store0);
+    MemoryLocation Loc1 = MemoryLocation::get(Store1);
     if (AA->isMustAlias(Loc0, Loc1) && Store0->isSameOperationAs(Store1) &&
       !isStoreSinkBarrierInRange(*(std::next(BasicBlock::iterator(Store1))),
                                  BB1->back(), Loc1) &&
diff --git a/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index 31d7df3..5423499 100644
--- a/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -46,7 +46,7 @@ namespace {
   };
 
   char PartiallyInlineLibCalls::ID = 0;
-}
+} // namespace
 
 INITIALIZE_PASS(PartiallyInlineLibCalls, "partially-inline-libcalls",
                 "Partially inline calls to library functions", false, false)
diff --git a/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp b/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
index 9ecaf10..670dcd2 100644
--- a/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -160,7 +160,7 @@ struct PlaceBackedgeSafepointsImpl : public FunctionPass {
     AU.setPreservesAll();
   }
 };
-}
+} // namespace
 
 static cl::opt<bool> NoEntry("spp-no-entry", cl::Hidden, cl::init(false));
 static cl::opt<bool> NoCall("spp-no-call", cl::Hidden, cl::init(false));
@@ -181,7 +181,7 @@ struct PlaceSafepoints : public FunctionPass {
     // if that was worth doing
   }
 };
-}
+} // namespace
 
 // Insert a safepoint poll immediately before the given instruction.  Does
 // not handle the parsability of state at the runtime call, that's the
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 6c66b58..9842fd7 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -154,7 +154,7 @@ namespace {
     unsigned SymbolicRank;
     bool isOr;
   };
-}
+} // namespace
 
 namespace {
   class Reassociate : public FunctionPass {
@@ -197,7 +197,7 @@ namespace {
     void OptimizeInst(Instruction *I);
     Instruction *canonicalizeNegConstExpr(Instruction *I);
   };
-}
+} // namespace
 
 XorOpnd::XorOpnd(Value *V) {
   assert(!isa<ConstantInt>(V) && "No ConstantInt");
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp b/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
index 1b46727..2ff56e6 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -58,7 +58,7 @@ namespace {
 
     bool runOnFunction(Function &F) override;
   };
-}
+} // namespace
 
 char RegToMem::ID = 0;
 INITIALIZE_PASS_BEGIN(RegToMem, "reg2mem", "Demote all values to stack slots",
diff --git a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 6f6ba72..c15bc1b 100644
--- a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -183,7 +183,7 @@ struct PartiallyConstructedSafepointRecord {
   /// Maps rematerialized copy to it's original value.
   RematerializedValueMapTy RematerializedValues;
 };
-}
+} // namespace
 
 /// Compute the live-in set for every basic block in the function
 static void computeLiveInValues(DominatorTree &DT, Function &F,
@@ -646,7 +646,7 @@ private:
     llvm_unreachable("only three states!");
   }
 };
-}
+} // namespace
 /// For a given value or instruction, figure out what base ptr it's derived
 /// from.  For gc objects, this is simply itself.  On success, returns a value
 /// which is the base pointer.  (This is reliable and can be used for
@@ -1659,17 +1659,10 @@ static void relocationViaAlloca(
 /// vector.  Doing so has the effect of changing the output of a couple of
 /// tests in ways which make them less useful in testing fused safepoints.
 template <typename T> static void unique_unsorted(SmallVectorImpl<T> &Vec) {
-  DenseSet<T> Seen;
-  SmallVector<T, 128> TempVec;
-  TempVec.reserve(Vec.size());
-  for (auto Element : Vec)
-    TempVec.push_back(Element);
-  Vec.clear();
-  for (auto V : TempVec) {
-    if (Seen.insert(V).second) {
-      Vec.push_back(V);
-    }
-  }
+  SmallSet<T, 8> Seen;
+  Vec.erase(std::remove_if(Vec.begin(), Vec.end(), [&](const T &V) {
+              return !Seen.insert(V).second;
+            }), Vec.end());
 }
 
 /// Insert holders so that each Value is obviously live through the entire
diff --git a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
index 056dd11..f38b2b1 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -127,7 +127,7 @@ typedef llvm::IRBuilder<true, ConstantFolder, IRBuilderPrefixedInserter<true>>
 typedef llvm::IRBuilder<false, ConstantFolder, IRBuilderPrefixedInserter<false>>
     IRBuilderTy;
 #endif
-}
+} // namespace
 
 namespace {
 /// \brief A used slice of an alloca.
@@ -595,7 +595,7 @@ private:
   /// the alloca.
   SmallVector<Use *, 8> DeadOperands;
 };
-}
+} // namespace
 
 static Value *foldSelectInst(SelectInst &SI) {
   // If the condition being selected on is a constant or the same value is
@@ -1173,7 +1173,7 @@ public:
     }
   }
 };
-} // end anon namespace
+} // namespace
 
 namespace {
 /// \brief An optimization pass providing Scalar Replacement of Aggregates.
@@ -1268,7 +1268,7 @@ private:
   void deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
   bool promoteAllocas(Function &F);
 };
-}
+} // namespace
 
 char SROA::ID = 0;
 
@@ -3119,7 +3119,7 @@ private:
     return true;
   }
 };
-}
+} // namespace
 
 namespace {
 /// \brief Visitor to rewrite aggregate loads and stores as scalar.
@@ -3327,7 +3327,7 @@ private:
     return false;
   }
 };
-}
+} // namespace
 
 /// \brief Strip aggregate type wrapping.
 ///
diff --git a/contrib/llvm/lib/Transforms/Scalar/SampleProfile.cpp b/contrib/llvm/lib/Transforms/Scalar/SampleProfile.cpp
index 3480cd4..69e3a67 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SampleProfile.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SampleProfile.cpp
@@ -174,7 +174,7 @@ protected:
   /// \brief Flag indicating whether the profile input loaded successfully.
   bool ProfileIsValid;
 };
-}
+} // namespace
 
 /// \brief Print the weight of edge \p E on stream \p OS.
 ///
@@ -282,7 +282,7 @@ bool SampleProfileLoader::computeBlockWeights(Function &F) {
 /// \brief Find equivalence classes for the given block.
 ///
 /// This finds all the blocks that are guaranteed to execute the same
-/// number of times as \p BB1. To do this, it traverses all the the
+/// number of times as \p BB1. To do this, it traverses all the
 /// descendants of \p BB1 in the dominator or post-dominator tree.
 ///
 /// A block BB2 will be in the same equivalence class as \p BB1 if
diff --git a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index d955da7..e42c3da 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -221,7 +221,7 @@ namespace {
     }
   };
 
-}
+} // namespace
 
 char SROA_DT::ID = 0;
 char SROA_SSAUp::ID = 0;
@@ -1123,7 +1123,7 @@ public:
     }
   }
 };
-} // end anon namespace
+} // namespace
 
 /// isSafeSelectToSpeculate - Select instructions that use an alloca and are
 /// subsequently loaded can be rewritten to load both input pointers and then
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index f0e3ffd..0733daf 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -220,7 +220,7 @@ struct CFGSimplifyPass : public FunctionPass {
     AU.addRequired<TargetTransformInfoWrapperPass>();
   }
 };
-}
+} // namespace
 
 char CFGSimplifyPass::ID = 0;
 INITIALIZE_PASS_BEGIN(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false,
diff --git a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
index 078c6a9..f49f4ea 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
@@ -163,7 +163,7 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
   }
 
   if (LoadInst *L = dyn_cast<LoadInst>(Inst)) {
-    AliasAnalysis::Location Loc = MemoryLocation::get(L);
+    MemoryLocation Loc = MemoryLocation::get(L);
     for (Instruction *S : Stores)
       if (AA->getModRefInfo(S, Loc) & AliasAnalysis::Mod)
         return false;
diff --git a/contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index 453503a..f32769c 100644
--- a/contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -265,8 +265,10 @@ static bool isGEPFoldable(GetElementPtrInst *GEP,
       BaseOffset += DL->getStructLayout(STy)->getElementOffset(Field);
     }
   }
+
+  unsigned AddrSpace = GEP->getPointerAddressSpace();
   return TTI->isLegalAddressingMode(GEP->getType()->getElementType(), BaseGV,
-                                    BaseOffset, HasBaseReg, Scale);
+                                    BaseOffset, HasBaseReg, Scale, AddrSpace);
 }
 
 // Returns whether (Base + Index * Stride) can be folded to an addressing mode.
@@ -630,6 +632,15 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis(
       // trivially dead.
       RecursivelyDeleteTriviallyDeadInstructions(Bump);
     } else {
+      // It's tempting to preserve nsw on Bump and/or Reduced. However, it's
+      // usually unsound, e.g.,
+      //
+      // X = (-2 +nsw 1) *nsw INT_MAX
+      // Y = (-2 +nsw 3) *nsw INT_MAX
+      //   =>
+      // Y = X + 2 * INT_MAX
+      //
+      // Neither + and * in the resultant expression are nsw.
       Reduced = Builder.CreateAdd(Basis.Ins, Bump);
     }
     break;
diff --git a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 9eef132..d23f515 100644
--- a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -120,7 +120,7 @@ namespace {
     bool CanMoveAboveCall(Instruction *I, CallInst *CI);
     Value *CanTransformAccumulatorRecursion(Instruction *I, CallInst *CI);
   };
-}
+} // namespace
 
 char TailCallElim::ID = 0;
 INITIALIZE_PASS_BEGIN(TailCallElim, "tailcallelim",
@@ -158,6 +158,9 @@ bool TailCallElim::runOnFunction(Function &F) {
   if (skipOptnoneFunction(F))
     return false;
 
+  if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
+    return false;
+
   bool AllCallsAreTailCalls = false;
   bool Modified = markTails(F, AllCallsAreTailCalls);
   if (AllCallsAreTailCalls)
@@ -243,7 +246,7 @@ struct AllocaDerivedValueTracker {
   SmallPtrSet<Instruction *, 32> AllocaUsers;
   SmallPtrSet<Instruction *, 32> EscapePoints;
 };
-}
+} // namespace
 
 bool TailCallElim::markTails(Function &F, bool &AllCallsAreTailCalls) {
   if (F.callsFunctionThatReturnsTwice())
diff --git a/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp
index 03c3a80..72cdfa4 100644
--- a/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp
@@ -107,4 +107,4 @@ ComputeASanStackFrameLayout(SmallVectorImpl<ASanStackVariableDescription> &Vars,
   assert(Layout->FrameSize / Granularity == Layout->ShadowBytes.size());
 }
 
-} // llvm namespace
+} // namespace llvm
diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index f3c8013..798376e 100644
--- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -486,11 +486,12 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
   }
 
   // Create new basic block, insert right before the original block.
-  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix,
-                                         BB->getParent(), BB);
+  BasicBlock *NewBB = BasicBlock::Create(
+      BB->getContext(), BB->getName() + Suffix, BB->getParent(), BB);
 
   // The new block unconditionally branches to the old block.
   BranchInst *BI = BranchInst::Create(BB, NewBB);
+  BI->setDebugLoc(BB->getFirstNonPHI()->getDebugLoc());
 
   // Move the edges from Preds to point to NewBB instead of BB.
   for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
@@ -553,6 +554,7 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
 
   // The new block unconditionally branches to the old block.
   BranchInst *BI1 = BranchInst::Create(OrigBB, NewBB1);
+  BI1->setDebugLoc(OrigBB->getFirstNonPHI()->getDebugLoc());
 
   // Move the edges from Preds to point to NewBB1 instead of OrigBB.
   for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
@@ -593,6 +595,7 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
 
     // The new block unconditionally branches to the old block.
     BranchInst *BI2 = BranchInst::Create(OrigBB, NewBB2);
+    BI2->setDebugLoc(OrigBB->getFirstNonPHI()->getDebugLoc());
 
     // Move the remaining edges from OrigBB to point to NewBB2.
     for (SmallVectorImpl<BasicBlock*>::iterator
diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 7e83c9e..362cd9b 100644
--- a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -60,7 +60,7 @@ namespace {
       AU.addPreservedID(LoopSimplifyID);
     }
   };
-}
+} // namespace
 
 char BreakCriticalEdges::ID = 0;
 INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges",
diff --git a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
index f2d5e07..0771b29 100644
--- a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -42,7 +42,7 @@ namespace {
     DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
       : Quotient(InQuotient), Remainder(InRemainder) {}
   };
-}
+} // namespace
 
 namespace llvm {
   template<>
@@ -69,7 +69,7 @@ namespace llvm {
   };
 
   typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy;
-}
+} // namespace llvm
 
 // insertFastDiv - Substitutes the div/rem instruction with code that checks the
 // value of the operands and uses a shorter-faster div/rem instruction when
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 4f8d1df..e623445 100644
--- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -289,7 +289,7 @@ namespace {
                     BasicBlock::const_iterator StartingInst,
                     std::vector<const BasicBlock*> &ToClone);
   };
-}
+} // namespace
 
 /// The specified block is found to be reachable, clone it and
 /// anything that it can reach.
diff --git a/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp
index dc95089..4bbded8 100644
--- a/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp
@@ -162,4 +162,4 @@ bool optimizeGlobalCtorsList(Module &M,
   return true;
 }
 
-} // End llvm namespace
+} // namespace llvm
diff --git a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp
index 4eb3e3d..40a48c0 100644
--- a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp
@@ -46,7 +46,7 @@ public:
   FlattenCFGOpt(AliasAnalysis *AA) : AA(AA) {}
   bool run(BasicBlock *BB);
 };
-}
+} // namespace
 
 /// If \param [in] BB has more than one predecessor that is a conditional
 /// branch, attempt to use parallel and/or for the branch condition. \returns
diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
index ddeaff0..ea84e7c 100644
--- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -121,7 +121,7 @@ namespace {
       }
     }
   };
-}
+} // namespace
 
 /// Get or create a target for the branch from ResumeInsts.
 BasicBlock *InvokeInliningInfo::getInnerResumeDest() {
@@ -949,35 +949,23 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
   }
 
   // Get the personality function from the callee if it contains a landing pad.
-  Value *CalleePersonality = nullptr;
-  for (Function::const_iterator I = CalledFunc->begin(), E = CalledFunc->end();
-       I != E; ++I)
-    if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
-      const BasicBlock *BB = II->getUnwindDest();
-      const LandingPadInst *LP = BB->getLandingPadInst();
-      CalleePersonality = LP->getPersonalityFn();
-      break;
-    }
+  Constant *CalledPersonality =
+      CalledFunc->hasPersonalityFn() ? CalledFunc->getPersonalityFn() : nullptr;
 
   // Find the personality function used by the landing pads of the caller. If it
   // exists, then check to see that it matches the personality function used in
   // the callee.
-  if (CalleePersonality) {
-    for (Function::const_iterator I = Caller->begin(), E = Caller->end();
-         I != E; ++I)
-      if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
-        const BasicBlock *BB = II->getUnwindDest();
-        const LandingPadInst *LP = BB->getLandingPadInst();
-
-        // If the personality functions match, then we can perform the
-        // inlining. Otherwise, we can't inline.
-        // TODO: This isn't 100% true. Some personality functions are proper
-        //       supersets of others and can be used in place of the other.
-        if (LP->getPersonalityFn() != CalleePersonality)
-          return false;
-
-        break;
-      }
+  Constant *CallerPersonality =
+      Caller->hasPersonalityFn() ? Caller->getPersonalityFn() : nullptr;
+  if (CalledPersonality) {
+    if (!CallerPersonality)
+      Caller->setPersonalityFn(CalledPersonality);
+    // If the personality functions match, then we can perform the
+    // inlining. Otherwise, we can't inline.
+    // TODO: This isn't 100% true. Some personality functions are proper
+    //       supersets of others and can be used in place of the other.
+    else if (CalledPersonality != CallerPersonality)
+      return false;
   }
 
   // Get an iterator to the last basic block in the function, which will have
diff --git a/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp
index da890a2..c9bec9a 100644
--- a/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp
@@ -50,7 +50,7 @@ namespace {
   };
   
   char InstNamer::ID = 0;
-}
+} // namespace
 
 INITIALIZE_PASS(InstNamer, "instnamer", 
                 "Assign names to anonymous instructions", false, false)
diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
index 9d40b69..fcc7986 100644
--- a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -300,7 +300,7 @@ struct LCSSA : public FunctionPass {
     AU.addPreserved<ScalarEvolution>();
   }
 };
-}
+} // namespace
 
 char LCSSA::ID = 0;
 INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp
index 70c77b0..5608557 100644
--- a/contrib/llvm/lib/Transforms/Utils/Local.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp
@@ -14,6 +14,8 @@
 
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
@@ -828,64 +830,45 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
 /// orders them so it usually won't matter.
 ///
 bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
-  bool Changed = false;
-
   // This implementation doesn't currently consider undef operands
   // specially. Theoretically, two phis which are identical except for
   // one having an undef where the other doesn't could be collapsed.
 
-  // Map from PHI hash values to PHI nodes. If multiple PHIs have
-  // the same hash value, the element is the first PHI in the
-  // linked list in CollisionMap.
-  DenseMap<uintptr_t, PHINode *> HashMap;
+  struct PHIDenseMapInfo {
+    static PHINode *getEmptyKey() {
+      return DenseMapInfo<PHINode *>::getEmptyKey();
+    }
+    static PHINode *getTombstoneKey() {
+      return DenseMapInfo<PHINode *>::getTombstoneKey();
+    }
+    static unsigned getHashValue(PHINode *PN) {
+      // Compute a hash value on the operands. Instcombine will likely have
+      // sorted them, which helps expose duplicates, but we have to check all
+      // the operands to be safe in case instcombine hasn't run.
+      return static_cast<unsigned>(hash_combine(
+          hash_combine_range(PN->value_op_begin(), PN->value_op_end()),
+          hash_combine_range(PN->block_begin(), PN->block_end())));
+    }
+    static bool isEqual(PHINode *LHS, PHINode *RHS) {
+      if (LHS == getEmptyKey() || LHS == getTombstoneKey() ||
+          RHS == getEmptyKey() || RHS == getTombstoneKey())
+        return LHS == RHS;
+      return LHS->isIdenticalTo(RHS);
+    }
+  };
 
-  // Maintain linked lists of PHI nodes with common hash values.
-  DenseMap<PHINode *, PHINode *> CollisionMap;
+  // Set of unique PHINodes.
+  DenseSet<PHINode *, PHIDenseMapInfo> PHISet;
 
   // Examine each PHI.
-  for (BasicBlock::iterator I = BB->begin();
-       PHINode *PN = dyn_cast<PHINode>(I++); ) {
-    // Compute a hash value on the operands. Instcombine will likely have sorted
-    // them, which helps expose duplicates, but we have to check all the
-    // operands to be safe in case instcombine hasn't run.
-    uintptr_t Hash = 0;
-    // This hash algorithm is quite weak as hash functions go, but it seems
-    // to do a good enough job for this particular purpose, and is very quick.
-    for (User::op_iterator I = PN->op_begin(), E = PN->op_end(); I != E; ++I) {
-      Hash ^= reinterpret_cast<uintptr_t>(static_cast<Value *>(*I));
-      Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7));
-    }
-    for (PHINode::block_iterator I = PN->block_begin(), E = PN->block_end();
-         I != E; ++I) {
-      Hash ^= reinterpret_cast<uintptr_t>(static_cast<BasicBlock *>(*I));
-      Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7));
-    }
-    // Avoid colliding with the DenseMap sentinels ~0 and ~0-1.
-    Hash >>= 1;
-    // If we've never seen this hash value before, it's a unique PHI.
-    std::pair<DenseMap<uintptr_t, PHINode *>::iterator, bool> Pair =
-      HashMap.insert(std::make_pair(Hash, PN));
-    if (Pair.second) continue;
-    // Otherwise it's either a duplicate or a hash collision.
-    for (PHINode *OtherPN = Pair.first->second; ; ) {
-      if (OtherPN->isIdenticalTo(PN)) {
-        // A duplicate. Replace this PHI with its duplicate.
-        PN->replaceAllUsesWith(OtherPN);
-        PN->eraseFromParent();
-        Changed = true;
-        break;
-      }
-      // A non-duplicate hash collision.
-      DenseMap<PHINode *, PHINode *>::iterator I = CollisionMap.find(OtherPN);
-      if (I == CollisionMap.end()) {
-        // Set this PHI to be the head of the linked list of colliding PHIs.
-        PHINode *Old = Pair.first->second;
-        Pair.first->second = PN;
-        CollisionMap[PN] = Old;
-        break;
-      }
-      // Proceed to the next PHI in the list.
-      OtherPN = I->second;
+  bool Changed = false;
+  for (auto I = BB->begin(); PHINode *PN = dyn_cast<PHINode>(I++);) {
+    auto Inserted = PHISet.insert(PN);
+    if (!Inserted.second) {
+      // A duplicate. Replace this PHI with its duplicate.
+      PN->replaceAllUsesWith(*Inserted.first);
+      PN->eraseFromParent();
+      Changed = true;
     }
   }
 
@@ -1173,10 +1156,11 @@ static void changeToCall(InvokeInst *II) {
   II->eraseFromParent();
 }
 
-static bool markAliveBlocks(BasicBlock *BB,
+static bool markAliveBlocks(Function &F,
                             SmallPtrSetImpl<BasicBlock*> &Reachable) {
 
   SmallVector<BasicBlock*, 128> Worklist;
+  BasicBlock *BB = F.begin();
   Worklist.push_back(BB);
   Reachable.insert(BB);
   bool Changed = false;
@@ -1247,7 +1231,7 @@ static bool markAliveBlocks(BasicBlock *BB,
       if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
         changeToUnreachable(II, true);
         Changed = true;
-      } else if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(II)) {
+      } else if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(&F)) {
         if (II->use_empty() && II->onlyReadsMemory()) {
           // jump to the normal destination branch.
           BranchInst::Create(II->getNormalDest(), II);
@@ -1272,7 +1256,7 @@ static bool markAliveBlocks(BasicBlock *BB,
 /// otherwise.
 bool llvm::removeUnreachableBlocks(Function &F) {
   SmallPtrSet<BasicBlock*, 128> Reachable;
-  bool Changed = markAliveBlocks(F.begin(), Reachable);
+  bool Changed = markAliveBlocks(F, Reachable);
 
   // If there are unreachable blocks in the CFG...
   if (Reachable.size() == F.size())
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
index 90dfaba..8b0afa6 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -144,8 +144,6 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) {
   PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader",
                                        AA, DT, LI, PreserveLCSSA);
 
-  PreheaderBB->getTerminator()->setDebugLoc(
-                                      Header->getFirstNonPHI()->getDebugLoc());
   DEBUG(dbgs() << "LoopSimplify: Creating pre-header "
                << PreheaderBB->getName() << "\n");
 
@@ -778,7 +776,7 @@ namespace {
     /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
     void verifyAnalysis() const override;
   };
-}
+} // namespace
 
 char LoopSimplify::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index d1774df..919b45d 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -113,6 +113,7 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
   // Create a branch around the orignal loop, which is taken if there are no
   // iterations remaining to be executed after running the prologue.
   Instruction *InsertPt = PrologEnd->getTerminator();
+  IRBuilder<> B(InsertPt);
 
   assert(Count != 0 && "nonsensical Count!");
 
@@ -120,9 +121,8 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
   // (since Count is a power of 2).  This means %xtraiter is (BECount + 1) and
   // and all of the iterations of this loop were executed by the prologue.  Note
   // that if BECount <u (Count - 1) then (BECount + 1) cannot unsigned-overflow.
-  Instruction *BrLoopExit =
-    new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, BECount,
-                 ConstantInt::get(BECount->getType(), Count - 1));
+  Value *BrLoopExit =
+      B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1));
   BasicBlock *Exit = L->getUniqueExitBlock();
   assert(Exit && "Loop must have a single exit block only");
   // Split the exit to maintain loop canonicalization guarantees
@@ -130,7 +130,7 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
   SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", AA, DT, LI,
                          P->mustPreserveAnalysisID(LCSSAID));
   // Add the branch to the exit block (around the unrolled loop)
-  BranchInst::Create(Exit, NewPH, BrLoopExit, InsertPt);
+  B.CreateCondBr(BrLoopExit, Exit, NewPH);
   InsertPt->eraseFromParent();
 }
 
@@ -184,23 +184,22 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
       VMap.erase((*BB)->getTerminator());
       BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
       BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
+      IRBuilder<> Builder(LatchBR);
       if (UnrollProlog) {
-        LatchBR->eraseFromParent();
-        BranchInst::Create(InsertBot, NewBB);
+        Builder.CreateBr(InsertBot);
       } else {
         PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, "prol.iter",
                                           FirstLoopBB->getFirstNonPHI());
-        IRBuilder<> Builder(LatchBR);
         Value *IdxSub =
             Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
                               NewIdx->getName() + ".sub");
         Value *IdxCmp =
             Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp");
-        BranchInst::Create(FirstLoopBB, InsertBot, IdxCmp, NewBB);
+        Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot);
         NewIdx->addIncoming(NewIter, InsertTop);
         NewIdx->addIncoming(IdxSub, NewBB);
-        LatchBR->eraseFromParent();
       }
+      LatchBR->eraseFromParent();
     }
   }
 
@@ -370,7 +369,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
 
   // Branch to either the extra iterations or the cloned/unrolled loop
   // We will fix up the true branch label when adding loop body copies
-  BranchInst::Create(PEnd, PEnd, BranchVal, PreHeaderBR);
+  B.CreateCondBr(BranchVal, PEnd, PEnd);
   assert(PreHeaderBR->isUnconditional() &&
          PreHeaderBR->getSuccessor(0) == PEnd &&
          "CFG edges in Preheader are not correct");
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 5f25e6b..5cbde94 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -26,17 +26,17 @@ using namespace llvm::PatternMatch;
 
 #define DEBUG_TYPE "loop-utils"
 
-bool ReductionDescriptor::areAllUsesIn(Instruction *I,
-                                       SmallPtrSetImpl<Instruction *> &Set) {
+bool RecurrenceDescriptor::areAllUsesIn(Instruction *I,
+                                        SmallPtrSetImpl<Instruction *> &Set) {
   for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use)
     if (!Set.count(dyn_cast<Instruction>(*Use)))
       return false;
   return true;
 }
 
-bool ReductionDescriptor::AddReductionVar(PHINode *Phi, ReductionKind Kind,
-                                          Loop *TheLoop, bool HasFunNoNaNAttr,
-                                          ReductionDescriptor &RedDes) {
+bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
+                                           Loop *TheLoop, bool HasFunNoNaNAttr,
+                                           RecurrenceDescriptor &RedDes) {
   if (Phi->getNumIncomingValues() != 2)
     return false;
 
@@ -66,7 +66,7 @@ bool ReductionDescriptor::AddReductionVar(PHINode *Phi, ReductionKind Kind,
   // the number of instruction we saw from the recognized min/max pattern,
   //  to make sure we only see exactly the two instructions.
   unsigned NumCmpSelectPatternInst = 0;
-  ReductionInstDesc ReduxDesc(false, nullptr);
+  InstDesc ReduxDesc(false, nullptr);
 
   SmallPtrSet<Instruction *, 8> VisitedInsts;
   SmallVector<Instruction *, 8> Worklist;
@@ -111,8 +111,8 @@ bool ReductionDescriptor::AddReductionVar(PHINode *Phi, ReductionKind Kind,
       return false;
 
     // Any reduction instruction must be of one of the allowed kinds.
-    ReduxDesc = isReductionInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr);
-    if (!ReduxDesc.isReduction())
+    ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr);
+    if (!ReduxDesc.isRecurrence())
       return false;
 
     // A reduction operation must only have one use of the reduction value.
@@ -164,7 +164,7 @@ bool ReductionDescriptor::AddReductionVar(PHINode *Phi, ReductionKind Kind,
       // Process instructions only once (termination). Each reduction cycle
       // value must only be used once, except by phi nodes and min/max
       // reductions which are represented as a cmp followed by a select.
-      ReductionInstDesc IgnoredVal(false, nullptr);
+      InstDesc IgnoredVal(false, nullptr);
       if (VisitedInsts.insert(UI).second) {
         if (isa<PHINode>(UI))
           PHIs.push_back(UI);
@@ -173,7 +173,7 @@ bool ReductionDescriptor::AddReductionVar(PHINode *Phi, ReductionKind Kind,
       } else if (!isa<PHINode>(UI) &&
                  ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) &&
                    !isa<SelectInst>(UI)) ||
-                  !isMinMaxSelectCmpPattern(UI, IgnoredVal).isReduction()))
+                  !isMinMaxSelectCmpPattern(UI, IgnoredVal).isRecurrence()))
         return false;
 
       // Remember that we completed the cycle.
@@ -197,11 +197,11 @@ bool ReductionDescriptor::AddReductionVar(PHINode *Phi, ReductionKind Kind,
   // only have a single instruction with out-of-loop users.
 
   // The ExitInstruction(Instruction which is allowed to have out-of-loop users)
-  // is saved as part of the ReductionDescriptor.
+  // is saved as part of the RecurrenceDescriptor.
 
   // Save the description of this reduction variable.
-  ReductionDescriptor RD(RdxStart, ExitInstruction, Kind,
-                         ReduxDesc.getMinMaxKind());
+  RecurrenceDescriptor RD(RdxStart, ExitInstruction, Kind,
+                          ReduxDesc.getMinMaxKind());
 
   RedDes = RD;
 
@@ -210,9 +210,8 @@ bool ReductionDescriptor::AddReductionVar(PHINode *Phi, ReductionKind Kind,
 
 /// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
 /// pattern corresponding to a min(X, Y) or max(X, Y).
-ReductionInstDesc
-ReductionDescriptor::isMinMaxSelectCmpPattern(Instruction *I,
-                                              ReductionInstDesc &Prev) {
+RecurrenceDescriptor::InstDesc
+RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev) {
 
   assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) &&
          "Expect a select instruction");
@@ -223,84 +222,83 @@ ReductionDescriptor::isMinMaxSelectCmpPattern(Instruction *I,
   // select.
   if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) {
     if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->user_begin())))
-      return ReductionInstDesc(false, I);
-    return ReductionInstDesc(Select, Prev.getMinMaxKind());
+      return InstDesc(false, I);
+    return InstDesc(Select, Prev.getMinMaxKind());
   }
 
   // Only handle single use cases for now.
   if (!(Select = dyn_cast<SelectInst>(I)))
-    return ReductionInstDesc(false, I);
+    return InstDesc(false, I);
   if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) &&
       !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0))))
-    return ReductionInstDesc(false, I);
+    return InstDesc(false, I);
   if (!Cmp->hasOneUse())
-    return ReductionInstDesc(false, I);
+    return InstDesc(false, I);
 
   Value *CmpLeft;
   Value *CmpRight;
 
   // Look for a min/max pattern.
   if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
-    return ReductionInstDesc(Select, ReductionInstDesc::MRK_UIntMin);
+    return InstDesc(Select, MRK_UIntMin);
   else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
-    return ReductionInstDesc(Select, ReductionInstDesc::MRK_UIntMax);
+    return InstDesc(Select, MRK_UIntMax);
   else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
-    return ReductionInstDesc(Select, ReductionInstDesc::MRK_SIntMax);
+    return InstDesc(Select, MRK_SIntMax);
   else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
-    return ReductionInstDesc(Select, ReductionInstDesc::MRK_SIntMin);
+    return InstDesc(Select, MRK_SIntMin);
   else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
-    return ReductionInstDesc(Select, ReductionInstDesc::MRK_FloatMin);
+    return InstDesc(Select, MRK_FloatMin);
   else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
-    return ReductionInstDesc(Select, ReductionInstDesc::MRK_FloatMax);
+    return InstDesc(Select, MRK_FloatMax);
   else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
-    return ReductionInstDesc(Select, ReductionInstDesc::MRK_FloatMin);
+    return InstDesc(Select, MRK_FloatMin);
   else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
-    return ReductionInstDesc(Select, ReductionInstDesc::MRK_FloatMax);
+    return InstDesc(Select, MRK_FloatMax);
 
-  return ReductionInstDesc(false, I);
+  return InstDesc(false, I);
 }
 
-ReductionInstDesc ReductionDescriptor::isReductionInstr(Instruction *I,
-                                                        ReductionKind Kind,
-                                                        ReductionInstDesc &Prev,
-                                                        bool HasFunNoNaNAttr) {
+RecurrenceDescriptor::InstDesc
+RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
+                                        InstDesc &Prev, bool HasFunNoNaNAttr) {
   bool FP = I->getType()->isFloatingPointTy();
   bool FastMath = FP && I->hasUnsafeAlgebra();
   switch (I->getOpcode()) {
   default:
-    return ReductionInstDesc(false, I);
+    return InstDesc(false, I);
   case Instruction::PHI:
     if (FP &&
         (Kind != RK_FloatMult && Kind != RK_FloatAdd && Kind != RK_FloatMinMax))
-      return ReductionInstDesc(false, I);
-    return ReductionInstDesc(I, Prev.getMinMaxKind());
+      return InstDesc(false, I);
+    return InstDesc(I, Prev.getMinMaxKind());
   case Instruction::Sub:
   case Instruction::Add:
-    return ReductionInstDesc(Kind == RK_IntegerAdd, I);
+    return InstDesc(Kind == RK_IntegerAdd, I);
   case Instruction::Mul:
-    return ReductionInstDesc(Kind == RK_IntegerMult, I);
+    return InstDesc(Kind == RK_IntegerMult, I);
   case Instruction::And:
-    return ReductionInstDesc(Kind == RK_IntegerAnd, I);
+    return InstDesc(Kind == RK_IntegerAnd, I);
   case Instruction::Or:
-    return ReductionInstDesc(Kind == RK_IntegerOr, I);
+    return InstDesc(Kind == RK_IntegerOr, I);
   case Instruction::Xor:
-    return ReductionInstDesc(Kind == RK_IntegerXor, I);
+    return InstDesc(Kind == RK_IntegerXor, I);
   case Instruction::FMul:
-    return ReductionInstDesc(Kind == RK_FloatMult && FastMath, I);
+    return InstDesc(Kind == RK_FloatMult && FastMath, I);
   case Instruction::FSub:
   case Instruction::FAdd:
-    return ReductionInstDesc(Kind == RK_FloatAdd && FastMath, I);
+    return InstDesc(Kind == RK_FloatAdd && FastMath, I);
   case Instruction::FCmp:
   case Instruction::ICmp:
   case Instruction::Select:
     if (Kind != RK_IntegerMinMax &&
         (!HasFunNoNaNAttr || Kind != RK_FloatMinMax))
-      return ReductionInstDesc(false, I);
+      return InstDesc(false, I);
     return isMinMaxSelectCmpPattern(I, Prev);
   }
 }
 
-bool ReductionDescriptor::hasMultipleUsesOf(
+bool RecurrenceDescriptor::hasMultipleUsesOf(
     Instruction *I, SmallPtrSetImpl<Instruction *> &Insts) {
   unsigned NumUses = 0;
   for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E;
@@ -313,8 +311,8 @@ bool ReductionDescriptor::hasMultipleUsesOf(
 
   return false;
 }
-bool ReductionDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
-                                         ReductionDescriptor &RedDes) {
+bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
+                                          RecurrenceDescriptor &RedDes) {
 
   bool HasFunNoNaNAttr = false;
   BasicBlock *Header = TheLoop->getHeader();
@@ -366,7 +364,8 @@ bool ReductionDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
 
 /// This function returns the identity element (or neutral element) for
 /// the operation K.
-Constant *ReductionDescriptor::getReductionIdentity(ReductionKind K, Type *Tp) {
+Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurrenceKind K,
+                                                      Type *Tp) {
   switch (K) {
   case RK_IntegerXor:
   case RK_IntegerAdd:
@@ -386,12 +385,12 @@ Constant *ReductionDescriptor::getReductionIdentity(ReductionKind K, Type *Tp) {
     // Adding zero to a number does not change it.
     return ConstantFP::get(Tp, 0.0L);
   default:
-    llvm_unreachable("Unknown reduction kind");
+    llvm_unreachable("Unknown recurrence kind");
   }
 }
 
-/// This function translates the reduction kind to an LLVM binary operator.
-unsigned ReductionDescriptor::getReductionBinOp(ReductionKind Kind) {
+/// This function translates the recurrence kind to an LLVM binary operator.
+unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurrenceKind Kind) {
   switch (Kind) {
   case RK_IntegerAdd:
     return Instruction::Add;
@@ -412,41 +411,39 @@ unsigned ReductionDescriptor::getReductionBinOp(ReductionKind Kind) {
   case RK_FloatMinMax:
     return Instruction::FCmp;
   default:
-    llvm_unreachable("Unknown reduction operation");
+    llvm_unreachable("Unknown recurrence operation");
   }
 }
 
-Value *
-ReductionDescriptor::createMinMaxOp(IRBuilder<> &Builder,
-                                    ReductionInstDesc::MinMaxReductionKind RK,
-                                    Value *Left, Value *Right) {
+Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder,
+                                            MinMaxRecurrenceKind RK,
+                                            Value *Left, Value *Right) {
   CmpInst::Predicate P = CmpInst::ICMP_NE;
   switch (RK) {
   default:
-    llvm_unreachable("Unknown min/max reduction kind");
-  case ReductionInstDesc::MRK_UIntMin:
+    llvm_unreachable("Unknown min/max recurrence kind");
+  case MRK_UIntMin:
     P = CmpInst::ICMP_ULT;
     break;
-  case ReductionInstDesc::MRK_UIntMax:
+  case MRK_UIntMax:
     P = CmpInst::ICMP_UGT;
     break;
-  case ReductionInstDesc::MRK_SIntMin:
+  case MRK_SIntMin:
     P = CmpInst::ICMP_SLT;
     break;
-  case ReductionInstDesc::MRK_SIntMax:
+  case MRK_SIntMax:
     P = CmpInst::ICMP_SGT;
     break;
-  case ReductionInstDesc::MRK_FloatMin:
+  case MRK_FloatMin:
     P = CmpInst::FCMP_OLT;
     break;
-  case ReductionInstDesc::MRK_FloatMax:
+  case MRK_FloatMax:
     P = CmpInst::FCMP_OGT;
     break;
   }
 
   Value *Cmp;
-  if (RK == ReductionInstDesc::MRK_FloatMin ||
-      RK == ReductionInstDesc::MRK_FloatMax)
+  if (RK == MRK_FloatMin || RK == MRK_FloatMax)
     Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp");
   else
     Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
index e0e0e90..c1b0645 100644
--- a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
@@ -101,7 +101,7 @@ namespace {
       return CI1->getValue().slt(CI2->getValue());
     }
   };
-}
+} // namespace
 
 char LowerSwitch::ID = 0;
 INITIALIZE_PASS(LowerSwitch, "lowerswitch",
@@ -364,9 +364,9 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
   std::sort(Cases.begin(), Cases.end(), CaseCmp());
 
   // Merge case into clusters
-  if (Cases.size()>=2)
-    for (CaseItr I = Cases.begin(), J = std::next(Cases.begin());
-         J != Cases.end();) {
+  if (Cases.size() >= 2) {
+    CaseItr I = Cases.begin();
+    for (CaseItr J = std::next(I), E = Cases.end(); J != E; ++J) {
       int64_t nextValue = J->Low->getSExtValue();
       int64_t currentValue = I->High->getSExtValue();
       BasicBlock* nextBB = J->BB;
@@ -374,13 +374,16 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
 
       // If the two neighboring cases go to the same destination, merge them
       // into a single case.
-      if ((nextValue-currentValue==1) && (currentBB == nextBB)) {
+      assert(nextValue > currentValue && "Cases should be strictly ascending");
+      if ((nextValue == currentValue + 1) && (currentBB == nextBB)) {
         I->High = J->High;
-        J = Cases.erase(J);
-      } else {
-        I = J++;
+        // FIXME: Combine branch weights.
+      } else if (++I != J) {
+        *I = *J;
       }
     }
+    Cases.erase(std::next(I), Cases.end());
+  }
 
   for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
     if (I->Low != I->High)
@@ -476,12 +479,10 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) {
     // cases.
     assert(MaxPop > 0 && PopSucc);
     Default = PopSucc;
-    for (CaseItr I = Cases.begin(); I != Cases.end();) {
-      if (I->BB == PopSucc)
-        I = Cases.erase(I);
-      else
-        ++I;
-    }
+    Cases.erase(std::remove_if(
+                    Cases.begin(), Cases.end(),
+                    [PopSucc](const CaseRange &R) { return R.BB == PopSucc; }),
+                Cases.end());
 
     // If there are no cases left, just branch.
     if (Cases.empty()) {
diff --git a/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp
index 395a46b..46dd65e 100644
--- a/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp
@@ -131,7 +131,7 @@ namespace {
       return true;
     }
   };
-}
+} // namespace
 
 char MetaRenamer::ID = 0;
 INITIALIZE_PASS(MetaRenamer, "metarenamer", 
diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
index 88b39dd..c0988987 100644
--- a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
@@ -303,7 +303,7 @@ public:
   }
 };
 
-} // End llvm namespace
+} // namespace llvm
 
 /// Check to see if AvailableVals has an entry for the specified BB and if so,
 /// return it.  If not, construct SSA form by first calculating the required
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 60ac271..3d7ab0f 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -136,7 +136,7 @@ public:
       : TTI(TTI), DL(DL), BonusInstThreshold(BonusInstThreshold), AC(AC) {}
   bool run(BasicBlock *BB);
 };
-}
+} // namespace
 
 /// SafeToMergeTerminators - Return true if it is safe to merge these two
 /// terminator instructions together.
@@ -502,7 +502,7 @@ private:
   }
 };
 
-}
+} // namespace
 
 static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
   Instruction *Cond = nullptr;
@@ -3717,7 +3717,7 @@ namespace {
     // For ArrayKind, this is the array.
     GlobalVariable *Array;
   };
-}
+} // namespace
 
 SwitchLookupTable::SwitchLookupTable(
     Module &M, uint64_t TableSize, ConstantInt *Offset,
@@ -4058,7 +4058,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
     return false;
 
   // Figure out the corresponding result for each case value and phi node in the
-  // common destination, as well as the the min and max case values.
+  // common destination, as well as the min and max case values.
   assert(SI->case_begin() != SI->case_end());
   SwitchInst::CaseIt CI = SI->case_begin();
   ConstantInt *MinCaseVal = CI.getCaseValue();
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index ab30aa1..68986ac 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -77,7 +77,7 @@ namespace {
     Instruction *splitOverflowIntrinsic(Instruction *IVUser,
                                         const DominatorTree *DT);
   };
-}
+} // namespace
 
 /// Fold an IV operand into its use.  This removes increments of an
 /// aligned IV when used by a instruction that ignores the low bits.
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
index c499c87..0a583a5 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -100,7 +100,7 @@ namespace {
       return Changed;
     }
   };
-}
+} // namespace
 
 char InstSimplifier::ID = 0;
 INITIALIZE_PASS_BEGIN(InstSimplifier, "instsimplify",
diff --git a/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp b/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
index a2a54da..4cc278f 100644
--- a/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -538,7 +538,7 @@ void RewriteSymbols::loadAndParseMapFiles() {
   for (const auto &MapFile : MapFiles)
     parser.parse(MapFile, &Descriptors);
 }
-}
+} // namespace
 
 INITIALIZE_PASS(RewriteSymbols, "rewrite-symbols", "Rewrite Symbols", false,
                 false)
diff --git a/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
index 215d6f9..fd7661f 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -3192,7 +3192,7 @@ namespace {
 
     DEBUG(dbgs() << "BBV: final: \n" << BB << "\n");
   }
-}
+} // namespace
 
 char BBVectorize::ID = 0;
 static const char bb_vectorize_name[] = "Basic-Block Vectorization";
diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 95c9381..b7faa20 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -872,7 +872,7 @@ public:
 
   /// ReductionList contains the reduction descriptors for all
   /// of the reductions that were found in the loop.
-  typedef DenseMap<PHINode*, ReductionDescriptor> ReductionList;
+  typedef DenseMap<PHINode *, RecurrenceDescriptor> ReductionList;
 
   /// InductionList saves induction variables and maps them to the
   /// induction descriptor.
@@ -2906,7 +2906,7 @@ struct CSEDenseMapInfo {
     return LHS->isIdenticalTo(RHS);
   }
 };
-}
+} // namespace
 
 /// \brief Check whether this block is a predicated block.
 /// Due to if predication of stores we might create a sequence of "if(pred) a[i]
@@ -3093,13 +3093,13 @@ void InnerLoopVectorizer::vectorizeLoop() {
     // Find the reduction variable descriptor.
     assert(Legal->getReductionVars()->count(RdxPhi) &&
            "Unable to find the reduction variable");
-    ReductionDescriptor RdxDesc = (*Legal->getReductionVars())[RdxPhi];
+    RecurrenceDescriptor RdxDesc = (*Legal->getReductionVars())[RdxPhi];
 
-    ReductionDescriptor::ReductionKind RK = RdxDesc.getReductionKind();
-    TrackingVH<Value> ReductionStartValue = RdxDesc.getReductionStartValue();
+    RecurrenceDescriptor::RecurrenceKind RK = RdxDesc.getRecurrenceKind();
+    TrackingVH<Value> ReductionStartValue = RdxDesc.getRecurrenceStartValue();
     Instruction *LoopExitInst = RdxDesc.getLoopExitInstr();
-    ReductionInstDesc::MinMaxReductionKind MinMaxKind =
-        RdxDesc.getMinMaxReductionKind();
+    RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind =
+        RdxDesc.getMinMaxRecurrenceKind();
     setDebugLocFromInst(Builder, ReductionStartValue);
 
     // We need to generate a reduction vector from the incoming scalar.
@@ -3116,8 +3116,8 @@ void InnerLoopVectorizer::vectorizeLoop() {
     // one for multiplication, -1 for And.
     Value *Identity;
     Value *VectorStart;
-    if (RK == ReductionDescriptor::RK_IntegerMinMax ||
-        RK == ReductionDescriptor::RK_FloatMinMax) {
+    if (RK == RecurrenceDescriptor::RK_IntegerMinMax ||
+        RK == RecurrenceDescriptor::RK_FloatMinMax) {
       // MinMax reduction have the start value as their identify.
       if (VF == 1) {
         VectorStart = Identity = ReductionStartValue;
@@ -3127,8 +3127,8 @@ void InnerLoopVectorizer::vectorizeLoop() {
       }
     } else {
       // Handle other reduction kinds:
-      Constant *Iden =
-          ReductionDescriptor::getReductionIdentity(RK, VecTy->getScalarType());
+      Constant *Iden = RecurrenceDescriptor::getRecurrenceIdentity(
+          RK, VecTy->getScalarType());
       if (VF == 1) {
         Identity = Iden;
         // This vector is the Identity vector where the first element is the
@@ -3185,7 +3185,7 @@ void InnerLoopVectorizer::vectorizeLoop() {
 
     // Reduce all of the unrolled parts into a single vector.
     Value *ReducedPartRdx = RdxParts[0];
-    unsigned Op = ReductionDescriptor::getReductionBinOp(RK);
+    unsigned Op = RecurrenceDescriptor::getRecurrenceBinOp(RK);
     setDebugLocFromInst(Builder, ReducedPartRdx);
     for (unsigned part = 1; part < UF; ++part) {
       if (Op != Instruction::ICmp && Op != Instruction::FCmp)
@@ -3194,7 +3194,7 @@ void InnerLoopVectorizer::vectorizeLoop() {
             Builder.CreateBinOp((Instruction::BinaryOps)Op, RdxParts[part],
                                 ReducedPartRdx, "bin.rdx"));
       else
-        ReducedPartRdx = ReductionDescriptor::createMinMaxOp(
+        ReducedPartRdx = RecurrenceDescriptor::createMinMaxOp(
             Builder, MinMaxKind, ReducedPartRdx, RdxParts[part]);
     }
 
@@ -3226,8 +3226,8 @@ void InnerLoopVectorizer::vectorizeLoop() {
           TmpVec = addFastMathFlag(Builder.CreateBinOp(
               (Instruction::BinaryOps)Op, TmpVec, Shuf, "bin.rdx"));
         else
-          TmpVec = ReductionDescriptor::createMinMaxOp(Builder, MinMaxKind,
-                                                       TmpVec, Shuf);
+          TmpVec = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind,
+                                                        TmpVec, Shuf);
       }
 
       // The result is in the first element of the vector.
@@ -4040,8 +4040,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
           continue;
         }
 
-        if (ReductionDescriptor::isReductionPHI(Phi, TheLoop,
-                                                Reductions[Phi])) {
+        if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop,
+                                                 Reductions[Phi])) {
           AllowedExit.insert(Reductions[Phi].getLoopExitInstr());
           continue;
         }
diff --git a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index a3a45c8..370e295 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -315,12 +315,12 @@ static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst,
 }
 
 /// \returns the AA location that is being access by the instruction.
-static AliasAnalysis::Location getLocation(Instruction *I, AliasAnalysis *AA) {
+static MemoryLocation getLocation(Instruction *I, AliasAnalysis *AA) {
   if (StoreInst *SI = dyn_cast<StoreInst>(I))
     return MemoryLocation::get(SI);
   if (LoadInst *LI = dyn_cast<LoadInst>(I))
     return MemoryLocation::get(LI);
-  return AliasAnalysis::Location();
+  return MemoryLocation();
 }
 
 /// \returns True if the instruction is not a volatile or atomic load/store.
@@ -515,7 +515,7 @@ private:
   ///
   /// \p Loc1 is the location of \p Inst1. It is passed explicitly because it
   /// is invariant in the calling loop.
-  bool isAliased(const AliasAnalysis::Location &Loc1, Instruction *Inst1,
+  bool isAliased(const MemoryLocation &Loc1, Instruction *Inst1,
                  Instruction *Inst2) {
 
     // First check if the result is already in the cache.
@@ -524,7 +524,7 @@ private:
     if (result.hasValue()) {
       return result.getValue();
     }
-    AliasAnalysis::Location Loc2 = getLocation(Inst2, AA);
+    MemoryLocation Loc2 = getLocation(Inst2, AA);
     bool aliased = true;
     if (Loc1.Ptr && Loc2.Ptr && isSimple(Inst1) && isSimple(Inst2)) {
       // Do the alias check.
@@ -1637,8 +1637,10 @@ bool BoUpSLP::isFullyVectorizableTinyTree() {
   if (VectorizableTree.size() != 2)
     return false;
 
-  // Handle splat stores.
-  if (!VectorizableTree[0].NeedToGather && isSplat(VectorizableTree[1].Scalars))
+  // Handle splat and all-constants stores.
+  if (!VectorizableTree[0].NeedToGather &&
+      (allConstant(VectorizableTree[1].Scalars) ||
+       isSplat(VectorizableTree[1].Scalars)))
     return true;
 
   // Gathering cost would be too much for tiny trees.
@@ -2903,7 +2905,7 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
         ScheduleData *DepDest = BundleMember->NextLoadStore;
         if (DepDest) {
           Instruction *SrcInst = BundleMember->Inst;
-          AliasAnalysis::Location SrcLoc = getLocation(SrcInst, SLP->AA);
+          MemoryLocation SrcLoc = getLocation(SrcInst, SLP->AA);
           bool SrcMayWrite = BundleMember->Inst->mayWriteToMemory();
           unsigned numAliased = 0;
           unsigned DistToSrc = 1;
diff --git a/contrib/llvm/tools/clang/include/clang-c/Index.h b/contrib/llvm/tools/clang/include/clang-c/Index.h
index 3276afc..a224180 100644
--- a/contrib/llvm/tools/clang/include/clang-c/Index.h
+++ b/contrib/llvm/tools/clang/include/clang-c/Index.h
@@ -2225,7 +2225,12 @@ enum CXCursorKind {
    */
   CXCursor_OMPTeamsDirective             = 253,
 
-  CXCursor_LastStmt                      = CXCursor_OMPTeamsDirective,
+  /** \brief OpenMP taskwait directive.
+   */
+  CXCursor_OMPTaskgroupDirective          = 254,
+
+
+  CXCursor_LastStmt                      = CXCursor_OMPTaskgroupDirective,
 
   /**
    * \brief Cursor that represents the translation unit itself.
@@ -5628,7 +5633,7 @@ typedef enum {
  * reused after indexing is finished. Set to \c NULL if you do not require it.
  *
  * \returns 0 on success or if there were errors from which the compiler could
- * recover.  If there is a failure from which the there is no recovery, returns
+ * recover.  If there is a failure from which there is no recovery, returns
  * a non-zero \c CXErrorCode.
  *
  * The rest of the parameters are the same as #clang_parseTranslationUnit.
@@ -5659,7 +5664,7 @@ CINDEX_LINKAGE int clang_indexSourceFile(CXIndexAction,
  *
  * The parameters are the same as #clang_indexSourceFile.
  * 
- * \returns If there is a failure from which the there is no recovery, returns
+ * \returns If there is a failure from which there is no recovery, returns
  * non-zero, otherwise returns 0.
  */
 CINDEX_LINKAGE int clang_indexTranslationUnit(CXIndexAction,
diff --git a/contrib/llvm/tools/clang/include/clang/ARCMigrate/ARCMT.h b/contrib/llvm/tools/clang/include/clang/ARCMigrate/ARCMT.h
index ad4f23c..7408186 100644
--- a/contrib/llvm/tools/clang/include/clang/ARCMigrate/ARCMT.h
+++ b/contrib/llvm/tools/clang/include/clang/ARCMigrate/ARCMT.h
@@ -17,6 +17,7 @@
 namespace clang {
   class ASTContext;
   class DiagnosticConsumer;
+  class PCHContainerOperations;
 
 namespace arcmt {
   class MigrationPass;
@@ -37,19 +38,22 @@ namespace arcmt {
 /// the pre-migration ARC diagnostics.
 ///
 /// \returns false if no error is produced, true otherwise.
-bool checkForManualIssues(CompilerInvocation &CI,
-                          const FrontendInputFile &Input,
-                          DiagnosticConsumer *DiagClient,
-                          bool emitPremigrationARCErrors = false,
-                          StringRef plistOut = StringRef());
+bool
+checkForManualIssues(CompilerInvocation &CI, const FrontendInputFile &Input,
+                     std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+                     DiagnosticConsumer *DiagClient,
+                     bool emitPremigrationARCErrors = false,
+                     StringRef plistOut = StringRef());
 
 /// \brief Works similar to checkForManualIssues but instead of checking, it
 /// applies automatic modifications to source files to conform to ARC.
 ///
 /// \returns false if no error is produced, true otherwise.
-bool applyTransformations(CompilerInvocation &origCI,
-                          const FrontendInputFile &Input,
-                          DiagnosticConsumer *DiagClient);
+bool
+applyTransformations(CompilerInvocation &origCI,
+                     const FrontendInputFile &Input,
+                     std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+                     DiagnosticConsumer *DiagClient);
 
 /// \brief Applies automatic modifications and produces temporary files
 /// and metadata into the \p outputDir path.
@@ -62,12 +66,11 @@ bool applyTransformations(CompilerInvocation &origCI,
 /// the pre-migration ARC diagnostics.
 ///
 /// \returns false if no error is produced, true otherwise.
-bool migrateWithTemporaryFiles(CompilerInvocation &origCI,
-                               const FrontendInputFile &Input,
-                               DiagnosticConsumer *DiagClient,
-                               StringRef outputDir,
-                               bool emitPremigrationARCErrors,
-                               StringRef plistOut);
+bool migrateWithTemporaryFiles(
+    CompilerInvocation &origCI, const FrontendInputFile &Input,
+    std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+    DiagnosticConsumer *DiagClient, StringRef outputDir,
+    bool emitPremigrationARCErrors, StringRef plistOut);
 
 /// \brief Get the set of file remappings from the \p outputDir path that
 /// migrateWithTemporaryFiles produced.
@@ -93,13 +96,16 @@ std::vector<TransformFn> getAllTransformations(LangOptions::GCMode OrigGCMode,
 
 class MigrationProcess {
   CompilerInvocation OrigCI;
+  std::shared_ptr<PCHContainerOperations> PCHContainerOps;
   DiagnosticConsumer *DiagClient;
   FileRemapper Remapper;
 
 public:
   bool HadARCErrors;
 
-  MigrationProcess(const CompilerInvocation &CI, DiagnosticConsumer *diagClient,
+  MigrationProcess(const CompilerInvocation &CI,
+                   std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+                   DiagnosticConsumer *diagClient,
                    StringRef outputDir = StringRef());
 
   class RewriteListener {
diff --git a/contrib/llvm/tools/clang/include/clang/AST/ASTContext.h b/contrib/llvm/tools/clang/include/clang/AST/ASTContext.h
index 049221a..da288c4 100644
--- a/contrib/llvm/tools/clang/include/clang/AST/ASTContext.h
+++ b/contrib/llvm/tools/clang/include/clang/AST/ASTContext.h
@@ -1854,6 +1854,36 @@ public:
            getCanonicalType(T2).getTypePtr();
   }
 
+  bool hasSameNullabilityTypeQualifier(QualType SubT, QualType SuperT,
+                                       bool IsParam) const {
+    auto SubTnullability = SubT->getNullability(*this);
+    auto SuperTnullability = SuperT->getNullability(*this);
+    if (SubTnullability.hasValue() == SuperTnullability.hasValue()) {
+      // Neither has nullability; return true
+      if (!SubTnullability)
+        return true;
+      // Both have nullability qualifier.
+      if (*SubTnullability == *SuperTnullability ||
+          *SubTnullability == NullabilityKind::Unspecified ||
+          *SuperTnullability == NullabilityKind::Unspecified)
+        return true;
+      
+      if (IsParam) {
+        // Ok for the superclass method parameter to be "nonnull" and the subclass
+        // method parameter to be "nullable"
+        return (*SuperTnullability == NullabilityKind::NonNull &&
+                *SubTnullability == NullabilityKind::Nullable);
+      }
+      else {
+        // For the return type, it's okay for the superclass method to specify
+        // "nullable" and the subclass method specify "nonnull"
+        return (*SuperTnullability == NullabilityKind::Nullable &&
+                *SubTnullability == NullabilityKind::NonNull);
+      }
+    }
+    return true;
+  }
+
   bool ObjCMethodsAreEqual(const ObjCMethodDecl *MethodDecl,
                            const ObjCMethodDecl *MethodImp);
   
diff --git a/contrib/llvm/tools/clang/include/clang/AST/CommentParser.h b/contrib/llvm/tools/clang/include/clang/AST/CommentParser.h
index 42bf4c9..fa88628 100644
--- a/contrib/llvm/tools/clang/include/clang/AST/CommentParser.h
+++ b/contrib/llvm/tools/clang/include/clang/AST/CommentParser.h
@@ -75,11 +75,7 @@ class Parser {
       return;
 
     MoreLATokens.push_back(Tok);
-    for (const Token *I = &Toks.back(),
-         *B = &Toks.front();
-         I != B; --I) {
-      MoreLATokens.push_back(*I);
-    }
+    MoreLATokens.append(Toks.rbegin(), std::prev(Toks.rend()));
 
     Tok = Toks[0];
   }
diff --git a/contrib/llvm/tools/clang/include/clang/AST/DataRecursiveASTVisitor.h b/contrib/llvm/tools/clang/include/clang/AST/DataRecursiveASTVisitor.h
index 971841e..ef88176 100644
--- a/contrib/llvm/tools/clang/include/clang/AST/DataRecursiveASTVisitor.h
+++ b/contrib/llvm/tools/clang/include/clang/AST/DataRecursiveASTVisitor.h
@@ -531,10 +531,7 @@ bool RecursiveASTVisitor<Derived>::TraverseStmt(Stmt *S) {
       }
     }
 
-    for (SmallVectorImpl<Stmt *>::reverse_iterator RI = StmtsToEnqueue.rbegin(),
-                                                   RE = StmtsToEnqueue.rend();
-         RI != RE; ++RI)
-      Queue.push_back(*RI);
+    Queue.append(StmtsToEnqueue.rbegin(), StmtsToEnqueue.rend());
   }
 
   return true;
@@ -2204,9 +2201,11 @@ DEF_TRAVERSE_STMT(CXXThisExpr, {})
 DEF_TRAVERSE_STMT(CXXThrowExpr, {})
 DEF_TRAVERSE_STMT(UserDefinedLiteral, {})
 DEF_TRAVERSE_STMT(DesignatedInitExpr, {})
+DEF_TRAVERSE_STMT(DesignatedInitUpdateExpr, {})
 DEF_TRAVERSE_STMT(ExtVectorElementExpr, {})
 DEF_TRAVERSE_STMT(GNUNullExpr, {})
 DEF_TRAVERSE_STMT(ImplicitValueInitExpr, {})
+DEF_TRAVERSE_STMT(NoInitExpr, {})
 DEF_TRAVERSE_STMT(ObjCBoolLiteralExpr, {})
 DEF_TRAVERSE_STMT(ObjCEncodeExpr, {
   if (TypeSourceInfo *TInfo = S->getEncodedTypeSourceInfo())
@@ -2356,6 +2355,9 @@ DEF_TRAVERSE_STMT(OMPBarrierDirective,
 DEF_TRAVERSE_STMT(OMPTaskwaitDirective,
                   { TRY_TO(TraverseOMPExecutableDirective(S)); })
 
+DEF_TRAVERSE_STMT(OMPTaskgroupDirective,
+                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
+
 DEF_TRAVERSE_STMT(OMPFlushDirective,
                   { TRY_TO(TraverseOMPExecutableDirective(S)); })
 
diff --git a/contrib/llvm/tools/clang/include/clang/AST/DeclBase.h b/contrib/llvm/tools/clang/include/clang/AST/DeclBase.h
index f176e54..6b6ac3f 100644
--- a/contrib/llvm/tools/clang/include/clang/AST/DeclBase.h
+++ b/contrib/llvm/tools/clang/include/clang/AST/DeclBase.h
@@ -178,7 +178,12 @@ public:
     OBJC_TQ_Out = 0x4,
     OBJC_TQ_Bycopy = 0x8,
     OBJC_TQ_Byref = 0x10,
-    OBJC_TQ_Oneway = 0x20
+    OBJC_TQ_Oneway = 0x20,
+
+    /// The nullability qualifier is set when the nullability of the
+    /// result or parameter was expressed via a context-sensitive
+    /// keyword.
+    OBJC_TQ_CSNullability = 0x40
   };
 
 protected:
diff --git a/contrib/llvm/tools/clang/include/clang/AST/DeclCXX.h b/contrib/llvm/tools/clang/include/clang/AST/DeclCXX.h
index 537ad46..08451c0 100644
--- a/contrib/llvm/tools/clang/include/clang/AST/DeclCXX.h
+++ b/contrib/llvm/tools/clang/include/clang/AST/DeclCXX.h
@@ -1440,7 +1440,7 @@ public:
   ///
   /// \returns true if this class is derived from \p Base, false otherwise.
   ///
-  /// \todo add a separate paramaeter to configure IsDerivedFrom, rather than
+  /// \todo add a separate parameter to configure IsDerivedFrom, rather than
   /// tangling input and output in \p Paths
   bool isDerivedFrom(const CXXRecordDecl *Base, CXXBasePaths &Paths) const;
 
diff --git a/contrib/llvm/tools/clang/include/clang/AST/DeclObjC.h b/contrib/llvm/tools/clang/include/clang/AST/DeclObjC.h
index 4a5b4f3..c3fb577 100644
--- a/contrib/llvm/tools/clang/include/clang/AST/DeclObjC.h
+++ b/contrib/llvm/tools/clang/include/clang/AST/DeclObjC.h
@@ -141,7 +141,7 @@ private:
 
   // NOTE: VC++ treats enums as signed, avoid using the ObjCDeclQualifier enum
   /// in, inout, etc.
-  unsigned objcDeclQualifier : 6;
+  unsigned objcDeclQualifier : 7;
 
   /// \brief Indicates whether this method has a related result type.
   unsigned RelatedResultType : 1;
@@ -2203,13 +2203,17 @@ public:
     OBJC_PR_atomic    = 0x100,
     OBJC_PR_weak      = 0x200,
     OBJC_PR_strong    = 0x400,
-    OBJC_PR_unsafe_unretained = 0x800
+    OBJC_PR_unsafe_unretained = 0x800,
+    /// Indicates that the nullability of the type was spelled with a
+    /// property attribute rather than a type qualifier.
+    OBJC_PR_nullability = 0x1000,
+    OBJC_PR_null_resettable = 0x2000
     // Adding a property should change NumPropertyAttrsBits
   };
 
   enum {
     /// \brief Number of bits fitting all the property attributes.
-    NumPropertyAttrsBits = 12
+    NumPropertyAttrsBits = 14
   };
 
   enum SetterKind { Assign, Retain, Copy, Weak };
@@ -2217,7 +2221,8 @@ public:
 private:
   SourceLocation AtLoc;   // location of \@property
   SourceLocation LParenLoc; // location of '(' starting attribute list or null.
-  TypeSourceInfo *DeclType;
+  QualType DeclType;
+  TypeSourceInfo *DeclTypeSourceInfo;
   unsigned PropertyAttributes : NumPropertyAttrsBits;
   unsigned PropertyAttributesAsWritten : NumPropertyAttrsBits;
   // \@required/\@optional
@@ -2232,12 +2237,13 @@ private:
 
   ObjCPropertyDecl(DeclContext *DC, SourceLocation L, IdentifierInfo *Id,
                    SourceLocation AtLocation,  SourceLocation LParenLocation,
-                   TypeSourceInfo *T)
+                   QualType T, TypeSourceInfo *TSI,
+                   PropertyControl propControl)
     : NamedDecl(ObjCProperty, DC, L, Id), AtLoc(AtLocation), 
-      LParenLoc(LParenLocation), DeclType(T),
+      LParenLoc(LParenLocation), DeclType(T), DeclTypeSourceInfo(TSI),
       PropertyAttributes(OBJC_PR_noattr),
       PropertyAttributesAsWritten(OBJC_PR_noattr),
-      PropertyImplementation(None),
+      PropertyImplementation(propControl),
       GetterName(Selector()),
       SetterName(Selector()),
       GetterMethodDecl(nullptr), SetterMethodDecl(nullptr),
@@ -2248,7 +2254,8 @@ public:
                                   SourceLocation L,
                                   IdentifierInfo *Id, SourceLocation AtLocation,
                                   SourceLocation LParenLocation,
-                                  TypeSourceInfo *T,
+                                  QualType T,
+                                  TypeSourceInfo *TSI,
                                   PropertyControl propControl = None);
   
   static ObjCPropertyDecl *CreateDeserialized(ASTContext &C, unsigned ID);
@@ -2259,9 +2266,14 @@ public:
   SourceLocation getLParenLoc() const { return LParenLoc; }
   void setLParenLoc(SourceLocation L) { LParenLoc = L; }
 
-  TypeSourceInfo *getTypeSourceInfo() const { return DeclType; }
-  QualType getType() const { return DeclType->getType(); }
-  void setType(TypeSourceInfo *T) { DeclType = T; }
+  TypeSourceInfo *getTypeSourceInfo() const { return DeclTypeSourceInfo; }
+
+  QualType getType() const { return DeclType; }
+
+  void setType(QualType T, TypeSourceInfo *TSI) { 
+    DeclType = T;
+    DeclTypeSourceInfo = TSI; 
+  }
 
   PropertyAttributeKind getPropertyAttributes() const {
     return PropertyAttributeKind(PropertyAttributes);
diff --git a/contrib/llvm/tools/clang/include/clang/AST/DeclTemplate.h b/contrib/llvm/tools/clang/include/clang/AST/DeclTemplate.h
index 90cfb20..0fc9b49 100644
--- a/contrib/llvm/tools/clang/include/clang/AST/DeclTemplate.h
+++ b/contrib/llvm/tools/clang/include/clang/AST/DeclTemplate.h
@@ -217,6 +217,88 @@ public:
   }
 };
 
+void *allocateDefaultArgStorageChain(const ASTContext &C);
+
+/// Storage for a default argument. This is conceptually either empty, or an
+/// argument value, or a pointer to a previous declaration that had a default
+/// argument.
+///
+/// However, this is complicated by modules: while we require all the default
+/// arguments for a template to be equivalent, there may be more than one, and
+/// we need to track all the originating parameters to determine if the default
+/// argument is visible.
+template<typename ParmDecl, typename ArgType>
+class DefaultArgStorage {
+  /// Storage for both the value *and* another parameter from which we inherit
+  /// the default argument. This is used when multiple default arguments for a
+  /// parameter are merged together from different modules.
+  struct Chain {
+    ParmDecl *PrevDeclWithDefaultArg;
+    ArgType Value;
+  };
+  static_assert(sizeof(Chain) == sizeof(void *) * 2,
+                "non-pointer argument type?");
+
+  llvm::PointerUnion3<ArgType, ParmDecl*, Chain*> ValueOrInherited;
+
+  static ParmDecl *getParmOwningDefaultArg(ParmDecl *Parm) {
+    const DefaultArgStorage &Storage = Parm->getDefaultArgStorage();
+    if (auto *Prev = Storage.ValueOrInherited.template dyn_cast<ParmDecl*>())
+      Parm = Prev;
+    assert(!Parm->getDefaultArgStorage()
+                .ValueOrInherited.template is<ParmDecl *>() &&
+           "should only be one level of indirection");
+    return Parm;
+  }
+
+public:
+  DefaultArgStorage() : ValueOrInherited(ArgType()) {}
+
+  /// Determine whether there is a default argument for this parameter.
+  bool isSet() const { return !ValueOrInherited.isNull(); }
+  /// Determine whether the default argument for this parameter was inherited
+  /// from a previous declaration of the same entity.
+  bool isInherited() const { return ValueOrInherited.template is<ParmDecl*>(); }
+  /// Get the default argument's value. This does not consider whether the
+  /// default argument is visible.
+  ArgType get() const {
+    const DefaultArgStorage *Storage = this;
+    if (auto *Prev = ValueOrInherited.template dyn_cast<ParmDecl*>())
+      Storage = &Prev->getDefaultArgStorage();
+    if (auto *C = Storage->ValueOrInherited.template dyn_cast<Chain*>())
+      return C->Value;
+    return Storage->ValueOrInherited.template get<ArgType>();
+  }
+  /// Get the parameter from which we inherit the default argument, if any.
+  /// This is the parameter on which the default argument was actually written.
+  const ParmDecl *getInheritedFrom() const {
+    if (auto *D = ValueOrInherited.template dyn_cast<ParmDecl*>())
+      return D;
+    if (auto *C = ValueOrInherited.template dyn_cast<Chain*>())
+      return C->PrevDeclWithDefaultArg;
+    return nullptr;
+  }
+  /// Set the default argument.
+  void set(ArgType Arg) {
+    assert(!isSet() && "default argument already set");
+    ValueOrInherited = Arg;
+  }
+  /// Set that the default argument was inherited from another parameter.
+  void setInherited(const ASTContext &C, ParmDecl *InheritedFrom) {
+    assert(!isInherited() && "default argument already inherited");
+    InheritedFrom = getParmOwningDefaultArg(InheritedFrom);
+    if (!isSet())
+      ValueOrInherited = InheritedFrom;
+    else
+      ValueOrInherited = new (allocateDefaultArgStorageChain(C))
+          Chain{InheritedFrom, ValueOrInherited.template get<ArgType>()};
+  }
+  /// Remove the default argument, even if it was inherited.
+  void clear() {
+    ValueOrInherited = ArgType();
+  }
+};
+
 //===----------------------------------------------------------------------===//
 // Kinds of Templates
 //===----------------------------------------------------------------------===//
@@ -942,18 +1024,16 @@ class TemplateTypeParmDecl : public TypeDecl {
   /// If false, it was declared with the 'class' keyword.
   bool Typename : 1;
 
-  /// \brief Whether this template type parameter inherited its
-  /// default argument.
-  bool InheritedDefault : 1;
-
   /// \brief The default template argument, if any.
-  TypeSourceInfo *DefaultArgument;
+  typedef DefaultArgStorage<TemplateTypeParmDecl, TypeSourceInfo *>
+      DefArgStorage;
+  DefArgStorage DefaultArgument;
 
   TemplateTypeParmDecl(DeclContext *DC, SourceLocation KeyLoc,
                        SourceLocation IdLoc, IdentifierInfo *Id,
                        bool Typename)
     : TypeDecl(TemplateTypeParm, DC, IdLoc, Id, KeyLoc), Typename(Typename),
-      InheritedDefault(false), DefaultArgument() { }
+      DefaultArgument() { }
 
   /// Sema creates these on the stack during auto type deduction.
   friend class Sema;
@@ -974,35 +1054,45 @@ public:
   /// If not, it was declared with the 'class' keyword.
   bool wasDeclaredWithTypename() const { return Typename; }
 
+  const DefArgStorage &getDefaultArgStorage() const { return DefaultArgument; }
+
   /// \brief Determine whether this template parameter has a default
   /// argument.
-  bool hasDefaultArgument() const { return DefaultArgument != nullptr; }
+  bool hasDefaultArgument() const { return DefaultArgument.isSet(); }
 
   /// \brief Retrieve the default argument, if any.
-  QualType getDefaultArgument() const { return DefaultArgument->getType(); }
+  QualType getDefaultArgument() const {
+    return DefaultArgument.get()->getType();
+  }
 
   /// \brief Retrieves the default argument's source information, if any.
-  TypeSourceInfo *getDefaultArgumentInfo() const { return DefaultArgument; }
+  TypeSourceInfo *getDefaultArgumentInfo() const {
+    return DefaultArgument.get();
+  }
 
   /// \brief Retrieves the location of the default argument declaration.
   SourceLocation getDefaultArgumentLoc() const;
 
   /// \brief Determines whether the default argument was inherited
   /// from a previous declaration of this template.
-  bool defaultArgumentWasInherited() const { return InheritedDefault; }
+  bool defaultArgumentWasInherited() const {
+    return DefaultArgument.isInherited();
+  }
 
-  /// \brief Set the default argument for this template parameter, and
-  /// whether that default argument was inherited from another
-  /// declaration.
-  void setDefaultArgument(TypeSourceInfo *DefArg, bool Inherited) {
-    DefaultArgument = DefArg;
-    InheritedDefault = Inherited;
+  /// \brief Set the default argument for this template parameter.
+  void setDefaultArgument(TypeSourceInfo *DefArg) {
+    DefaultArgument.set(DefArg);
+  }
+  /// \brief Set that this default argument was inherited from another
+  /// parameter.
+  void setInheritedDefaultArgument(const ASTContext &C,
+                                   TemplateTypeParmDecl *Prev) {
+    DefaultArgument.setInherited(C, Prev);
   }
 
   /// \brief Removes the default argument of this template parameter.
   void removeDefaultArgument() {
-    DefaultArgument = nullptr;
-    InheritedDefault = false;
+    DefaultArgument.clear();
   }
 
   /// \brief Set whether this template type parameter was declared with
@@ -1034,7 +1124,8 @@ class NonTypeTemplateParmDecl
   : public DeclaratorDecl, protected TemplateParmPosition {
   /// \brief The default template argument, if any, and whether or not
   /// it was inherited.
-  llvm::PointerIntPair<Expr*, 1, bool> DefaultArgumentAndInherited;
+  typedef DefaultArgStorage<NonTypeTemplateParmDecl, Expr*> DefArgStorage;
+  DefArgStorage DefaultArgument;
 
   // FIXME: Collapse this into TemplateParamPosition; or, just move depth/index
   // down here to save memory.
@@ -1055,9 +1146,8 @@ class NonTypeTemplateParmDecl
                           IdentifierInfo *Id, QualType T,
                           bool ParameterPack, TypeSourceInfo *TInfo)
     : DeclaratorDecl(NonTypeTemplateParm, DC, IdLoc, Id, T, TInfo, StartLoc),
-      TemplateParmPosition(D, P), DefaultArgumentAndInherited(nullptr, false),
-      ParameterPack(ParameterPack), ExpandedParameterPack(false),
-      NumExpandedTypes(0)
+      TemplateParmPosition(D, P), ParameterPack(ParameterPack),
+      ExpandedParameterPack(false), NumExpandedTypes(0)
   { }
 
   NonTypeTemplateParmDecl(DeclContext *DC, SourceLocation StartLoc,
@@ -1097,16 +1187,14 @@ public:
 
   SourceRange getSourceRange() const override LLVM_READONLY;
 
+  const DefArgStorage &getDefaultArgStorage() const { return DefaultArgument; }
+
   /// \brief Determine whether this template parameter has a default
   /// argument.
-  bool hasDefaultArgument() const {
-    return DefaultArgumentAndInherited.getPointer() != nullptr;
-  }
+  bool hasDefaultArgument() const { return DefaultArgument.isSet(); }
 
   /// \brief Retrieve the default argument, if any.
-  Expr *getDefaultArgument() const {
-    return DefaultArgumentAndInherited.getPointer();
-  }
+  Expr *getDefaultArgument() const { return DefaultArgument.get(); }
 
   /// \brief Retrieve the location of the default argument, if any.
   SourceLocation getDefaultArgumentLoc() const;
@@ -1114,22 +1202,20 @@ public:
   /// \brief Determines whether the default argument was inherited
   /// from a previous declaration of this template.
   bool defaultArgumentWasInherited() const {
-    return DefaultArgumentAndInherited.getInt();
+    return DefaultArgument.isInherited();
   }
 
   /// \brief Set the default argument for this template parameter, and
   /// whether that default argument was inherited from another
   /// declaration.
-  void setDefaultArgument(Expr *DefArg, bool Inherited) {
-    DefaultArgumentAndInherited.setPointer(DefArg);
-    DefaultArgumentAndInherited.setInt(Inherited);
+  void setDefaultArgument(Expr *DefArg) { DefaultArgument.set(DefArg); }
+  void setInheritedDefaultArgument(const ASTContext &C,
+                                   NonTypeTemplateParmDecl *Parm) {
+    DefaultArgument.setInherited(C, Parm);
   }
 
   /// \brief Removes the default argument of this template parameter.
-  void removeDefaultArgument() {
-    DefaultArgumentAndInherited.setPointer(nullptr);
-    DefaultArgumentAndInherited.setInt(false);
-  }
+  void removeDefaultArgument() { DefaultArgument.clear(); }
 
   /// \brief Whether this parameter is a non-type template parameter pack.
   ///
@@ -1217,10 +1303,10 @@ class TemplateTemplateParmDecl : public TemplateDecl,
 {
   void anchor() override;
 
-  /// DefaultArgument - The default template argument, if any.
-  TemplateArgumentLoc DefaultArgument;
-  /// Whether or not the default argument was inherited.
-  bool DefaultArgumentWasInherited;
+  /// \brief The default template argument, if any.
+  typedef DefaultArgStorage<TemplateTemplateParmDecl, TemplateArgumentLoc *>
+      DefArgStorage;
+  DefArgStorage DefaultArgument;
 
   /// \brief Whether this parameter is a parameter pack.
   bool ParameterPack;
@@ -1237,8 +1323,7 @@ class TemplateTemplateParmDecl : public TemplateDecl,
                            unsigned D, unsigned P, bool ParameterPack,
                            IdentifierInfo *Id, TemplateParameterList *Params)
     : TemplateDecl(TemplateTemplateParm, DC, L, Id, Params),
-      TemplateParmPosition(D, P), DefaultArgument(),
-      DefaultArgumentWasInherited(false), ParameterPack(ParameterPack),
+      TemplateParmPosition(D, P), ParameterPack(ParameterPack),
       ExpandedParameterPack(false), NumExpandedParams(0)
     { }
 
@@ -1322,15 +1407,16 @@ public:
     return reinterpret_cast<TemplateParameterList *const *>(this + 1)[I];
   }
 
+  const DefArgStorage &getDefaultArgStorage() const { return DefaultArgument; }
+
   /// \brief Determine whether this template parameter has a default
   /// argument.
-  bool hasDefaultArgument() const {
-    return !DefaultArgument.getArgument().isNull();
-  }
+  bool hasDefaultArgument() const { return DefaultArgument.isSet(); }
 
   /// \brief Retrieve the default argument, if any.
   const TemplateArgumentLoc &getDefaultArgument() const {
-    return DefaultArgument;
+    static const TemplateArgumentLoc None;
+    return DefaultArgument.isSet() ? *DefaultArgument.get() : None;
   }
 
   /// \brief Retrieve the location of the default argument, if any.
@@ -1339,22 +1425,21 @@ public:
   /// \brief Determines whether the default argument was inherited
   /// from a previous declaration of this template.
   bool defaultArgumentWasInherited() const {
-    return DefaultArgumentWasInherited;
+    return DefaultArgument.isInherited();
   }
 
   /// \brief Set the default argument for this template parameter, and
   /// whether that default argument was inherited from another
   /// declaration.
-  void setDefaultArgument(const TemplateArgumentLoc &DefArg, bool Inherited) {
-    DefaultArgument = DefArg;
-    DefaultArgumentWasInherited = Inherited;
+  void setDefaultArgument(const ASTContext &C,
+                          const TemplateArgumentLoc &DefArg);
+  void setInheritedDefaultArgument(const ASTContext &C,
+                                   TemplateTemplateParmDecl *Prev) {
+    DefaultArgument.setInherited(C, Prev);
   }
 
   /// \brief Removes the default argument of this template parameter.
-  void removeDefaultArgument() {
-    DefaultArgument = TemplateArgumentLoc();
-    DefaultArgumentWasInherited = false;
-  }
+  void removeDefaultArgument() { DefaultArgument.clear(); }
 
   SourceRange getSourceRange() const override LLVM_READONLY {
     SourceLocation End = getLocation();
diff --git a/contrib/llvm/tools/clang/include/clang/AST/EvaluatedExprVisitor.h b/contrib/llvm/tools/clang/include/clang/AST/EvaluatedExprVisitor.h
index 59de104..5cae5d9 100644
--- a/contrib/llvm/tools/clang/include/clang/AST/EvaluatedExprVisitor.h
+++ b/contrib/llvm/tools/clang/include/clang/AST/EvaluatedExprVisitor.h
@@ -26,29 +26,32 @@ class ASTContext;
   
 /// \brief Given a potentially-evaluated expression, this visitor visits all
 /// of its potentially-evaluated subexpressions, recursively.
-template<typename ImplClass>
-class EvaluatedExprVisitor : public StmtVisitor<ImplClass> {
-  ASTContext &Context;
-  
+template<template <typename> class Ptr, typename ImplClass>
+class EvaluatedExprVisitorBase : public StmtVisitorBase<Ptr, ImplClass, void> {
+protected:
+  const ASTContext &Context;
+
 public:
-  explicit EvaluatedExprVisitor(ASTContext &Context) : Context(Context) { }
-  
+#define PTR(CLASS) typename Ptr<CLASS>::type
+
+  explicit EvaluatedExprVisitorBase(const ASTContext &Context) : Context(Context) { }
+
   // Expressions that have no potentially-evaluated subexpressions (but may have
   // other sub-expressions).
-  void VisitDeclRefExpr(DeclRefExpr *E) { }
-  void VisitOffsetOfExpr(OffsetOfExpr *E) { }
-  void VisitUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *E) { }
-  void VisitExpressionTraitExpr(ExpressionTraitExpr *E) { }
-  void VisitBlockExpr(BlockExpr *E) { }
-  void VisitCXXUuidofExpr(CXXUuidofExpr *E) { }  
-  void VisitCXXNoexceptExpr(CXXNoexceptExpr *E) { }
-  
-  void VisitMemberExpr(MemberExpr *E) {
+  void VisitDeclRefExpr(PTR(DeclRefExpr) E) { }
+  void VisitOffsetOfExpr(PTR(OffsetOfExpr) E) { }
+  void VisitUnaryExprOrTypeTraitExpr(PTR(UnaryExprOrTypeTraitExpr) E) { }
+  void VisitExpressionTraitExpr(PTR(ExpressionTraitExpr) E) { }
+  void VisitBlockExpr(PTR(BlockExpr) E) { }
+  void VisitCXXUuidofExpr(PTR(CXXUuidofExpr) E) { }
+  void VisitCXXNoexceptExpr(PTR(CXXNoexceptExpr) E) { }
+
+  void VisitMemberExpr(PTR(MemberExpr) E) {
     // Only the base matters.
     return this->Visit(E->getBase());
   }
-  
-  void VisitChooseExpr(ChooseExpr *E) {
+
+  void VisitChooseExpr(PTR(ChooseExpr) E) {
     // Don't visit either child expression if the condition is dependent.
     if (E->getCond()->isValueDependent())
       return;
@@ -56,7 +59,7 @@ public:
     return this->Visit(E->getChosenSubExpr());
   }
 
-  void VisitGenericSelectionExpr(GenericSelectionExpr *E) {
+  void VisitGenericSelectionExpr(PTR(GenericSelectionExpr) E) {
     // The controlling expression of a generic selection is not evaluated.
 
     // Don't visit either child expression if the condition is type-dependent.
@@ -67,23 +70,23 @@ public:
     return this->Visit(E->getResultExpr());
   }
 
-  void VisitDesignatedInitExpr(DesignatedInitExpr *E) {
+  void VisitDesignatedInitExpr(PTR(DesignatedInitExpr) E) {
     // Only the actual initializer matters; the designators are all constant
     // expressions.
     return this->Visit(E->getInit());
   }
 
-  void VisitCXXTypeidExpr(CXXTypeidExpr *E) {
+  void VisitCXXTypeidExpr(PTR(CXXTypeidExpr) E) {
     if (E->isPotentiallyEvaluated())
       return this->Visit(E->getExprOperand());
   }
 
-  void VisitCallExpr(CallExpr *CE) {
+  void VisitCallExpr(PTR(CallExpr) CE) {
     if (!CE->isUnevaluatedBuiltinCall(Context))
       return static_cast<ImplClass*>(this)->VisitExpr(CE);
   }
 
-  void VisitLambdaExpr(LambdaExpr *LE) {
+  void VisitLambdaExpr(PTR(LambdaExpr) LE) {
     // Only visit the capture initializers, and not the body.
     for (LambdaExpr::capture_init_iterator I = LE->capture_init_begin(),
                                            E = LE->capture_init_end();
@@ -94,11 +97,31 @@ public:
 
   /// \brief The basis case walks all of the children of the statement or
   /// expression, assuming they are all potentially evaluated.
-  void VisitStmt(Stmt *S) {
-    for (Stmt::child_range C = S->children(); C; ++C)
+  void VisitStmt(PTR(Stmt) S) {
+    for (auto C = S->children(); C; ++C)
       if (*C)
         this->Visit(*C);
   }
+
+#undef PTR
+};
+
+/// EvaluatedExprVisitor - This class visits 'Expr *'s
+template<typename ImplClass>
+class EvaluatedExprVisitor
+ : public EvaluatedExprVisitorBase<make_ptr, ImplClass> {
+public:
+  explicit EvaluatedExprVisitor(const ASTContext &Context) :
+    EvaluatedExprVisitorBase<make_ptr, ImplClass>(Context) { }
+};
+
+/// ConstEvaluatedExprVisitor - This class visits 'const Expr *'s.
+template<typename ImplClass>
+class ConstEvaluatedExprVisitor
+ : public EvaluatedExprVisitorBase<make_const_ptr, ImplClass> {
+public:
+  explicit ConstEvaluatedExprVisitor(const ASTContext &Context) :
+    EvaluatedExprVisitorBase<make_const_ptr, ImplClass>(Context) { }
 };
 
 }
diff --git a/contrib/llvm/tools/clang/include/clang/AST/Expr.h b/contrib/llvm/tools/clang/include/clang/AST/Expr.h
index a3be7d0..2a5b4c0 100644
--- a/contrib/llvm/tools/clang/include/clang/AST/Expr.h
+++ b/contrib/llvm/tools/clang/include/clang/AST/Expr.h
@@ -598,7 +598,7 @@ public:
 
   /// \brief Determine whether this expression involves a call to any function
   /// that is not trivial.
-  bool hasNonTrivialCall(ASTContext &Ctx);
+  bool hasNonTrivialCall(const ASTContext &Ctx) const;
 
   /// EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded
   /// integer. This must be called on an expression that constant folds to an
@@ -2273,7 +2273,7 @@ public:
 
   /// \brief Returns \c true if this is a call to a builtin which does not
   /// evaluate side-effects within its arguments.
-  bool isUnevaluatedBuiltinCall(ASTContext &Ctx) const;
+  bool isUnevaluatedBuiltinCall(const ASTContext &Ctx) const;
 
   /// getCallReturnType - Get the return type of the call expr. This is not
   /// always the type of the expr itself, if the return type is a reference
@@ -4267,6 +4267,80 @@ public:
   }
 };
 
+/// \brief Represents a place-holder for an object not to be initialized by
+/// anything.
+///
+/// This only makes sense when it appears as part of an updater of a
+/// DesignatedInitUpdateExpr (see below). The base expression of a DIUE
+/// initializes a big object, and the NoInitExpr's mark the spots within the
+/// big object not to be overwritten by the updater.
+///
+/// \see DesignatedInitUpdateExpr
+class NoInitExpr : public Expr {
+public:
+  explicit NoInitExpr(QualType ty)
+    : Expr(NoInitExprClass, ty, VK_RValue, OK_Ordinary,
+           false, false, ty->isInstantiationDependentType(), false) { }
+
+  explicit NoInitExpr(EmptyShell Empty)
+    : Expr(NoInitExprClass, Empty) { }
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == NoInitExprClass;
+  }
+
+  SourceLocation getLocStart() const LLVM_READONLY { return SourceLocation(); }
+  SourceLocation getLocEnd() const LLVM_READONLY { return SourceLocation(); }
+
+  // Iterators
+  child_range children() { return child_range(); }
+};
+
+// In cases like:
+//   struct Q { int a, b, c; };
+//   Q *getQ();
+//   void foo() {
+//     struct A { Q q; } a = { *getQ(), .q.b = 3 };
+//   }
+//
+// We will have an InitListExpr for a, with type A, and then a
+// DesignatedInitUpdateExpr for "a.q" with type Q. The "base" for this DIUE
+// is the call expression *getQ(); the "updater" for the DIUE is ".q.b = 3"
+//
+class DesignatedInitUpdateExpr : public Expr {
+  // BaseAndUpdaterExprs[0] is the base expression;
+  // BaseAndUpdaterExprs[1] is an InitListExpr overwriting part of the base.
+  Stmt *BaseAndUpdaterExprs[2];
+
+public:
+  DesignatedInitUpdateExpr(const ASTContext &C, SourceLocation lBraceLoc,
+                           Expr *baseExprs, SourceLocation rBraceLoc);
+
+  explicit DesignatedInitUpdateExpr(EmptyShell Empty)
+    : Expr(DesignatedInitUpdateExprClass, Empty) { }
+
+  SourceLocation getLocStart() const LLVM_READONLY;
+  SourceLocation getLocEnd() const LLVM_READONLY;
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == DesignatedInitUpdateExprClass;
+  }
+
+  Expr *getBase() const { return cast<Expr>(BaseAndUpdaterExprs[0]); }
+  void setBase(Expr *Base) { BaseAndUpdaterExprs[0] = Base; }
+
+  InitListExpr *getUpdater() const {
+    return cast<InitListExpr>(BaseAndUpdaterExprs[1]);
+  }
+  void setUpdater(Expr *Updater) { BaseAndUpdaterExprs[1] = Updater; }
+
+  // Iterators
+  // children = the base and the updater
+  child_range children() {
+    return child_range(&BaseAndUpdaterExprs[0], &BaseAndUpdaterExprs[0] + 2);
+  }
+};
+
 /// \brief Represents an implicitly-generated value initialization of
 /// an object of a given type.
 ///
diff --git a/contrib/llvm/tools/clang/include/clang/AST/OpenMPClause.h b/contrib/llvm/tools/clang/include/clang/AST/OpenMPClause.h
index c8ecef8..386c8dd 100644
--- a/contrib/llvm/tools/clang/include/clang/AST/OpenMPClause.h
+++ b/contrib/llvm/tools/clang/include/clang/AST/OpenMPClause.h
@@ -1752,7 +1752,7 @@ public:
 
   StmtRange children() {
     return StmtRange(reinterpret_cast<Stmt **>(varlist_begin()),
-                     reinterpret_cast<Stmt **>(getFinals().end() + 2));
+                     reinterpret_cast<Stmt **>(varlist_end()));
   }
 
   static bool classof(const OMPClause *T) {
@@ -1837,7 +1837,7 @@ public:
 
   StmtRange children() {
     return StmtRange(reinterpret_cast<Stmt **>(varlist_begin()),
-                     reinterpret_cast<Stmt **>(varlist_end() + 1));
+                     reinterpret_cast<Stmt **>(varlist_end()));
   }
 
   static bool classof(const OMPClause *T) {
diff --git a/contrib/llvm/tools/clang/include/clang/AST/RecursiveASTVisitor.h b/contrib/llvm/tools/clang/include/clang/AST/RecursiveASTVisitor.h
index 95e0df3..95d7730 100644
--- a/contrib/llvm/tools/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/contrib/llvm/tools/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -2234,9 +2234,11 @@ DEF_TRAVERSE_STMT(CXXThisExpr, {})
 DEF_TRAVERSE_STMT(CXXThrowExpr, {})
 DEF_TRAVERSE_STMT(UserDefinedLiteral, {})
 DEF_TRAVERSE_STMT(DesignatedInitExpr, {})
+DEF_TRAVERSE_STMT(DesignatedInitUpdateExpr, {})
 DEF_TRAVERSE_STMT(ExtVectorElementExpr, {})
 DEF_TRAVERSE_STMT(GNUNullExpr, {})
 DEF_TRAVERSE_STMT(ImplicitValueInitExpr, {})
+DEF_TRAVERSE_STMT(NoInitExpr, {})
 DEF_TRAVERSE_STMT(ObjCBoolLiteralExpr, {})
 DEF_TRAVERSE_STMT(ObjCEncodeExpr, {
   if (TypeSourceInfo *TInfo = S->getEncodedTypeSourceInfo())
@@ -2386,6 +2388,9 @@ DEF_TRAVERSE_STMT(OMPBarrierDirective,
 DEF_TRAVERSE_STMT(OMPTaskwaitDirective,
                   { TRY_TO(TraverseOMPExecutableDirective(S)); })
 
+DEF_TRAVERSE_STMT(OMPTaskgroupDirective,
+                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
+
 DEF_TRAVERSE_STMT(OMPFlushDirective,
                   { TRY_TO(TraverseOMPExecutableDirective(S)); })
 
diff --git a/contrib/llvm/tools/clang/include/clang/AST/StmtOpenMP.h b/contrib/llvm/tools/clang/include/clang/AST/StmtOpenMP.h
index 5161eff..63f295d 100644
--- a/contrib/llvm/tools/clang/include/clang/AST/StmtOpenMP.h
+++ b/contrib/llvm/tools/clang/include/clang/AST/StmtOpenMP.h
@@ -285,24 +285,23 @@ class OMPLoopDirective : public OMPExecutableDirective {
     CalcLastIterationOffset = 3,
     PreConditionOffset = 4,
     CondOffset = 5,
-    SeparatedCondOffset = 6,
-    InitOffset = 7,
-    IncOffset = 8,
+    InitOffset = 6,
+    IncOffset = 7,
     // The '...End' enumerators do not correspond to child expressions - they
     // specify the offset to the end (and start of the following counters/
     // updates/finals arrays).
-    DefaultEnd = 9,
+    DefaultEnd = 8,
     // The following 7 exprs are used by worksharing loops only.
-    IsLastIterVariableOffset = 9,
-    LowerBoundVariableOffset = 10,
-    UpperBoundVariableOffset = 11,
-    StrideVariableOffset = 12,
-    EnsureUpperBoundOffset = 13,
-    NextLowerBoundOffset = 14,
-    NextUpperBoundOffset = 15,
+    IsLastIterVariableOffset = 8,
+    LowerBoundVariableOffset = 9,
+    UpperBoundVariableOffset = 10,
+    StrideVariableOffset = 11,
+    EnsureUpperBoundOffset = 12,
+    NextLowerBoundOffset = 13,
+    NextUpperBoundOffset = 14,
     // Offset to the end (and start of the following counters/updates/finals
     // arrays) for worksharing loop directives.
-    WorksharingEnd = 16,
+    WorksharingEnd = 15,
   };
 
   /// \brief Get the counters storage.
@@ -374,9 +373,8 @@ protected:
   void setPreCond(Expr *PC) {
     *std::next(child_begin(), PreConditionOffset) = PC;
   }
-  void setCond(Expr *Cond, Expr *SeparatedCond) {
+  void setCond(Expr *Cond) {
     *std::next(child_begin(), CondOffset) = Cond;
-    *std::next(child_begin(), SeparatedCondOffset) = SeparatedCond;
   }
   void setInit(Expr *Init) { *std::next(child_begin(), InitOffset) = Init; }
   void setInc(Expr *Inc) { *std::next(child_begin(), IncOffset) = Inc; }
@@ -435,8 +433,6 @@ public:
     Expr *PreCond;
     /// \brief Loop condition.
     Expr *Cond;
-    /// \brief A condition with 1 iteration separated.
-    Expr *SeparatedCond;
     /// \brief Loop iteration variable init.
     Expr *Init;
     /// \brief Loop increment.
@@ -467,8 +463,7 @@ public:
     bool builtAll() {
       return IterationVarRef != nullptr && LastIteration != nullptr &&
              NumIterations != nullptr && PreCond != nullptr &&
-             Cond != nullptr && SeparatedCond != nullptr && Init != nullptr &&
-             Inc != nullptr;
+             Cond != nullptr && Init != nullptr && Inc != nullptr;
     }
 
     /// \brief Initialize all the fields to null.
@@ -479,7 +474,6 @@ public:
       CalcLastIteration = nullptr;
       PreCond = nullptr;
       Cond = nullptr;
-      SeparatedCond = nullptr;
       Init = nullptr;
       Inc = nullptr;
       IL = nullptr;
@@ -519,10 +513,9 @@ public:
     return const_cast<Expr *>(reinterpret_cast<const Expr *>(
         *std::next(child_begin(), PreConditionOffset)));
   }
-  Expr *getCond(bool SeparateIter) const {
-    return const_cast<Expr *>(reinterpret_cast<const Expr *>(
-        *std::next(child_begin(),
-                   (SeparateIter ? SeparatedCondOffset : CondOffset))));
+  Expr *getCond() const {
+    return const_cast<Expr *>(
+        reinterpret_cast<const Expr *>(*std::next(child_begin(), CondOffset)));
   }
   Expr *getInit() const {
     return const_cast<Expr *>(
@@ -1462,6 +1455,53 @@ public:
   }
 };
 
+/// \brief This represents '#pragma omp taskgroup' directive.
+///
+/// \code
+/// #pragma omp taskgroup
+/// \endcode
+///
+class OMPTaskgroupDirective : public OMPExecutableDirective {
+  friend class ASTStmtReader;
+  /// \brief Build directive with the given start and end location.
+  ///
+  /// \param StartLoc Starting location of the directive kind.
+  /// \param EndLoc Ending location of the directive.
+  ///
+  OMPTaskgroupDirective(SourceLocation StartLoc, SourceLocation EndLoc)
+      : OMPExecutableDirective(this, OMPTaskgroupDirectiveClass, OMPD_taskgroup,
+                               StartLoc, EndLoc, 0, 1) {}
+
+  /// \brief Build an empty directive.
+  ///
+  explicit OMPTaskgroupDirective()
+      : OMPExecutableDirective(this, OMPTaskgroupDirectiveClass, OMPD_taskgroup,
+                               SourceLocation(), SourceLocation(), 0, 1) {}
+
+public:
+  /// \brief Creates directive.
+  ///
+  /// \param C AST context.
+  /// \param StartLoc Starting location of the directive kind.
+  /// \param EndLoc Ending Location of the directive.
+  /// \param AssociatedStmt Statement, associated with the directive.
+  ///
+  static OMPTaskgroupDirective *Create(const ASTContext &C,
+                                       SourceLocation StartLoc,
+                                       SourceLocation EndLoc,
+                                       Stmt *AssociatedStmt);
+
+  /// \brief Creates an empty directive.
+  ///
+  /// \param C AST context.
+  ///
+  static OMPTaskgroupDirective *CreateEmpty(const ASTContext &C, EmptyShell);
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == OMPTaskgroupDirectiveClass;
+  }
+};
+
 /// \brief This represents '#pragma omp flush' directive.
 ///
 /// \code
diff --git a/contrib/llvm/tools/clang/include/clang/AST/Type.h b/contrib/llvm/tools/clang/include/clang/AST/Type.h
index 8cd29b7..d903b9d 100644
--- a/contrib/llvm/tools/clang/include/clang/AST/Type.h
+++ b/contrib/llvm/tools/clang/include/clang/AST/Type.h
@@ -1818,6 +1818,19 @@ public:
   /// checking. Should always return true.
   bool isLinkageValid() const;
 
+  /// Determine the nullability of the given type.
+  ///
+  /// Note that nullability is only captured as sugar within the type
+  /// system, not as part of the canonical type, so nullability will
+  /// be lost by canonicalization and desugaring.
+  Optional<NullabilityKind> getNullability(const ASTContext &context) const;
+
+  /// Determine whether the given type can have a nullability
+  /// specifier applied to it, i.e., if it is any kind of pointer type
+  /// or a dependent type that could instantiate to any kind of
+  /// pointer type.
+  bool canHaveNullability() const;
+
   const char *getTypeClassName() const;
 
   QualType getCanonicalTypeInternal() const {
@@ -3479,7 +3492,10 @@ public:
     attr_ptr32,
     attr_ptr64,
     attr_sptr,
-    attr_uptr
+    attr_uptr,
+    attr_nonnull,
+    attr_nullable,
+    attr_null_unspecified,
   };
 
 private:
@@ -3513,6 +3529,35 @@ public:
 
   bool isCallingConv() const;
 
+  llvm::Optional<NullabilityKind> getImmediateNullability() const;
+
+  /// Retrieve the attribute kind corresponding to the given
+  /// nullability kind.
+  static Kind getNullabilityAttrKind(NullabilityKind kind) {
+    switch (kind) {
+    case NullabilityKind::NonNull:
+      return attr_nonnull;
+     
+    case NullabilityKind::Nullable:
+      return attr_nullable;
+
+    case NullabilityKind::Unspecified:
+      return attr_null_unspecified;
+    }
+    llvm_unreachable("Unknown nullability kind.");
+  }
+
+  /// Strip off the top-level nullability annotation on the given
+  /// type, if it's there.
+  ///
+  /// \param T The type to strip. If the type is exactly an
+  /// AttributedType specifying nullability (without looking through
+  /// type sugar), the nullability is returned and this type changed
+  /// to the underlying modified type.
+  ///
+  /// \returns the top-level nullability, if present.
+  static Optional<NullabilityKind> stripOuterNullability(QualType &T);
+
   void Profile(llvm::FoldingSetNodeID &ID) {
     Profile(ID, getAttrKind(), ModifiedType, EquivalentType);
   }
diff --git a/contrib/llvm/tools/clang/include/clang/ASTMatchers/ASTMatchersMacros.h b/contrib/llvm/tools/clang/include/clang/ASTMatchers/ASTMatchersMacros.h
index e8eab86..8ad0c16 100644
--- a/contrib/llvm/tools/clang/include/clang/ASTMatchers/ASTMatchersMacros.h
+++ b/contrib/llvm/tools/clang/include/clang/ASTMatchers/ASTMatchersMacros.h
@@ -20,17 +20,30 @@
 //  to only have the functions (which is all the user cares about) in the
 //  'ast_matchers' namespace and hide the boilerplate.
 //
-//  To define a matcher in user code, always put it into the clang::ast_matchers
-//  namespace and refer to the internal types via the 'internal::':
+//  To define a matcher in user code, put it into your own namespace. This would
+//  help to prevent ODR violations in case a matcher with the same name is
+//  defined in multiple translation units:
+//
+//  namespace my_matchers {
+//  AST_MATCHER_P(clang::MemberExpr, Member,
+//                clang::ast_matchers::internal::Matcher<clang::ValueDecl>,
+//                InnerMatcher) {
+//    return InnerMatcher.matches(*Node.getMemberDecl(), Finder, Builder);
+//  }
+//  } // namespace my_matchers
+//
+//  Alternatively, an unnamed namespace may be used:
 //
 //  namespace clang {
 //  namespace ast_matchers {
+//  namespace {
 //  AST_MATCHER_P(MemberExpr, Member,
 //                internal::Matcher<ValueDecl>, InnerMatcher) {
 //    return InnerMatcher.matches(*Node.getMemberDecl(), Finder, Builder);
 //  }
-//  } // end namespace ast_matchers
-//  } // end namespace clang
+//  } // namespace
+//  } // namespace ast_matchers
+//  } // namespace clang
 //
 //===----------------------------------------------------------------------===//
 
@@ -43,12 +56,13 @@
 #define AST_MATCHER_FUNCTION(ReturnType, DefineMatcher)                        \
   inline ReturnType DefineMatcher##_getInstance();                             \
   inline ReturnType DefineMatcher() {                                          \
-    return internal::MemoizedMatcher<                                          \
+    return ::clang::ast_matchers::internal::MemoizedMatcher<                   \
         ReturnType, DefineMatcher##_getInstance>::getInstance();               \
   }                                                                            \
   inline ReturnType DefineMatcher##_getInstance()
 
-/// \brief AST_MATCHER_FUNCTION_P(ReturnType, DefineMatcher, ParamType, Param) { ... }
+/// \brief AST_MATCHER_FUNCTION_P(ReturnType, DefineMatcher, ParamType, Param) {
+/// ... }
 /// defines a single-parameter function named DefineMatcher() that returns a
 /// ReturnType object.
 ///
@@ -80,20 +94,24 @@
 /// The code should return true if 'Node' matches.
 #define AST_MATCHER(Type, DefineMatcher)                                       \
   namespace internal {                                                         \
-  class matcher_##DefineMatcher##Matcher : public MatcherInterface<Type> {     \
+  class matcher_##DefineMatcher##Matcher                                       \
+      : public ::clang::ast_matchers::internal::MatcherInterface<Type> {       \
   public:                                                                      \
     explicit matcher_##DefineMatcher##Matcher() {}                             \
-    bool matches(const Type &Node, ASTMatchFinder *Finder,                     \
-                 BoundNodesTreeBuilder *Builder) const override;               \
+    bool matches(const Type &Node,                                             \
+                 ::clang::ast_matchers::internal::ASTMatchFinder *Finder,      \
+                 ::clang::ast_matchers::internal::BoundNodesTreeBuilder        \
+                     *Builder) const override;                                 \
   };                                                                           \
   }                                                                            \
-  inline internal::Matcher<Type> DefineMatcher() {                             \
-    return internal::makeMatcher(                                              \
+  inline ::clang::ast_matchers::internal::Matcher<Type> DefineMatcher() {      \
+    return ::clang::ast_matchers::internal::makeMatcher(                       \
         new internal::matcher_##DefineMatcher##Matcher());                     \
   }                                                                            \
   inline bool internal::matcher_##DefineMatcher##Matcher::matches(             \
-      const Type &Node, ASTMatchFinder *Finder,                                \
-      BoundNodesTreeBuilder *Builder) const
+      const Type &Node,                                                        \
+      ::clang::ast_matchers::internal::ASTMatchFinder *Finder,                 \
+      ::clang::ast_matchers::internal::BoundNodesTreeBuilder *Builder) const
 
 /// \brief AST_MATCHER_P(Type, DefineMatcher, ParamType, Param) { ... }
 /// defines a single-parameter function named DefineMatcher() that returns a
@@ -115,27 +133,31 @@
                                OverloadId)                                     \
   namespace internal {                                                         \
   class matcher_##DefineMatcher##OverloadId##Matcher                           \
-      : public MatcherInterface<Type> {                                        \
+      : public ::clang::ast_matchers::internal::MatcherInterface<Type> {       \
   public:                                                                      \
     explicit matcher_##DefineMatcher##OverloadId##Matcher(                     \
         ParamType const &A##Param)                                             \
         : Param(A##Param) {}                                                   \
-    bool matches(const Type &Node, ASTMatchFinder *Finder,                     \
-                 BoundNodesTreeBuilder *Builder) const override;               \
+    bool matches(const Type &Node,                                             \
+                 ::clang::ast_matchers::internal::ASTMatchFinder *Finder,      \
+                 ::clang::ast_matchers::internal::BoundNodesTreeBuilder        \
+                     *Builder) const override;                                 \
                                                                                \
   private:                                                                     \
     ParamType const Param;                                                     \
   };                                                                           \
   }                                                                            \
-  inline internal::Matcher<Type> DefineMatcher(ParamType const &Param) {       \
-    return internal::makeMatcher(                                              \
+  inline ::clang::ast_matchers::internal::Matcher<Type> DefineMatcher(         \
+      ParamType const &Param) {                                                \
+    return ::clang::ast_matchers::internal::makeMatcher(                       \
         new internal::matcher_##DefineMatcher##OverloadId##Matcher(Param));    \
   }                                                                            \
-  typedef internal::Matcher<Type>(&DefineMatcher##_Type##OverloadId)(          \
-      ParamType const &Param);                                                 \
+  typedef ::clang::ast_matchers::internal::Matcher<Type>(                      \
+      &DefineMatcher##_Type##OverloadId)(ParamType const &Param);              \
   inline bool internal::matcher_##DefineMatcher##OverloadId##Matcher::matches( \
-      const Type &Node, ASTMatchFinder *Finder,                                \
-      BoundNodesTreeBuilder *Builder) const
+      const Type &Node,                                                        \
+      ::clang::ast_matchers::internal::ASTMatchFinder *Finder,                 \
+      ::clang::ast_matchers::internal::BoundNodesTreeBuilder *Builder) const
 
 /// \brief AST_MATCHER_P2(
 ///     Type, DefineMatcher, ParamType1, Param1, ParamType2, Param2) { ... }
@@ -160,30 +182,34 @@
                                 ParamType2, Param2, OverloadId)                \
   namespace internal {                                                         \
   class matcher_##DefineMatcher##OverloadId##Matcher                           \
-      : public MatcherInterface<Type> {                                        \
+      : public ::clang::ast_matchers::internal::MatcherInterface<Type> {       \
   public:                                                                      \
     matcher_##DefineMatcher##OverloadId##Matcher(ParamType1 const &A##Param1,  \
                                                  ParamType2 const &A##Param2)  \
         : Param1(A##Param1), Param2(A##Param2) {}                              \
-    bool matches(const Type &Node, ASTMatchFinder *Finder,                     \
-                 BoundNodesTreeBuilder *Builder) const override;               \
+    bool matches(const Type &Node,                                             \
+                 ::clang::ast_matchers::internal::ASTMatchFinder *Finder,      \
+                 ::clang::ast_matchers::internal::BoundNodesTreeBuilder        \
+                     *Builder) const override;                                 \
                                                                                \
   private:                                                                     \
     ParamType1 const Param1;                                                   \
     ParamType2 const Param2;                                                   \
   };                                                                           \
   }                                                                            \
-  inline internal::Matcher<Type> DefineMatcher(ParamType1 const &Param1,       \
-                                               ParamType2 const &Param2) {     \
-    return internal::makeMatcher(                                              \
+  inline ::clang::ast_matchers::internal::Matcher<Type> DefineMatcher(         \
+      ParamType1 const &Param1, ParamType2 const &Param2) {                    \
+    return ::clang::ast_matchers::internal::makeMatcher(                       \
         new internal::matcher_##DefineMatcher##OverloadId##Matcher(Param1,     \
                                                                    Param2));   \
   }                                                                            \
-  typedef internal::Matcher<Type>(&DefineMatcher##_Type##OverloadId)(          \
-      ParamType1 const &Param1, ParamType2 const &Param2);                     \
+  typedef ::clang::ast_matchers::internal::Matcher<Type>(                      \
+      &DefineMatcher##_Type##OverloadId)(ParamType1 const &Param1,             \
+                                         ParamType2 const &Param2);            \
   inline bool internal::matcher_##DefineMatcher##OverloadId##Matcher::matches( \
-      const Type &Node, ASTMatchFinder *Finder,                                \
-      BoundNodesTreeBuilder *Builder) const
+      const Type &Node,                                                        \
+      ::clang::ast_matchers::internal::ASTMatchFinder *Finder,                 \
+      ::clang::ast_matchers::internal::BoundNodesTreeBuilder *Builder) const
 
 /// \brief Construct a type-list to be passed to the AST_POLYMORPHIC_MATCHER*
 ///   macros.
@@ -194,7 +220,7 @@
 /// The \c PolymorphicMatcherWithParam* classes will unpack the function type to
 /// extract the TypeList object.
 #define AST_POLYMORPHIC_SUPPORTED_TYPES(...)                                   \
-  void(internal::TypeList<__VA_ARGS__>)
+  void(::clang::ast_matchers::internal::TypeList<__VA_ARGS__>)
 
 /// \brief AST_POLYMORPHIC_MATCHER(DefineMatcher) { ... }
 /// defines a single-parameter function named DefineMatcher() that is
@@ -205,22 +231,26 @@
 #define AST_POLYMORPHIC_MATCHER(DefineMatcher, ReturnTypesF)                   \
   namespace internal {                                                         \
   template <typename NodeType>                                                 \
-  class matcher_##DefineMatcher##Matcher : public MatcherInterface<NodeType> { \
+  class matcher_##DefineMatcher##Matcher                                       \
+      : public ::clang::ast_matchers::internal::MatcherInterface<NodeType> {   \
   public:                                                                      \
-    bool matches(const NodeType &Node, ASTMatchFinder *Finder,                 \
-                 BoundNodesTreeBuilder *Builder) const override;               \
+    bool matches(const NodeType &Node,                                         \
+                 ::clang::ast_matchers::internal::ASTMatchFinder *Finder,      \
+                 ::clang::ast_matchers::internal::BoundNodesTreeBuilder        \
+                     *Builder) const override;                                 \
   };                                                                           \
   }                                                                            \
-  inline internal::PolymorphicMatcherWithParam0<                               \
+  inline ::clang::ast_matchers::internal::PolymorphicMatcherWithParam0<        \
       internal::matcher_##DefineMatcher##Matcher, ReturnTypesF>                \
   DefineMatcher() {                                                            \
-    return internal::PolymorphicMatcherWithParam0<                             \
+    return ::clang::ast_matchers::internal::PolymorphicMatcherWithParam0<      \
         internal::matcher_##DefineMatcher##Matcher, ReturnTypesF>();           \
   }                                                                            \
   template <typename NodeType>                                                 \
   bool internal::matcher_##DefineMatcher##Matcher<NodeType>::matches(          \
-      const NodeType &Node, ASTMatchFinder *Finder,                            \
-      BoundNodesTreeBuilder *Builder) const
+      const NodeType &Node,                                                    \
+      ::clang::ast_matchers::internal::ASTMatchFinder *Finder,                 \
+      ::clang::ast_matchers::internal::BoundNodesTreeBuilder *Builder) const
 
 /// \brief AST_POLYMORPHIC_MATCHER_P(DefineMatcher, ParamType, Param) { ... }
 /// defines a single-parameter function named DefineMatcher() that is
@@ -241,33 +271,39 @@
   namespace internal {                                                         \
   template <typename NodeType, typename ParamT>                                \
   class matcher_##DefineMatcher##OverloadId##Matcher                           \
-      : public MatcherInterface<NodeType> {                                    \
+      : public ::clang::ast_matchers::internal::MatcherInterface<NodeType> {   \
   public:                                                                      \
     explicit matcher_##DefineMatcher##OverloadId##Matcher(                     \
         ParamType const &A##Param)                                             \
         : Param(A##Param) {}                                                   \
-    bool matches(const NodeType &Node, ASTMatchFinder *Finder,                 \
-                 BoundNodesTreeBuilder *Builder) const override;               \
+    bool matches(const NodeType &Node,                                         \
+                 ::clang::ast_matchers::internal::ASTMatchFinder *Finder,      \
+                 ::clang::ast_matchers::internal::BoundNodesTreeBuilder        \
+                     *Builder) const override;                                 \
                                                                                \
   private:                                                                     \
     ParamType const Param;                                                     \
   };                                                                           \
   }                                                                            \
-  inline internal::PolymorphicMatcherWithParam1<                               \
+  inline ::clang::ast_matchers::internal::PolymorphicMatcherWithParam1<        \
       internal::matcher_##DefineMatcher##OverloadId##Matcher, ParamType,       \
-      ReturnTypesF> DefineMatcher(ParamType const &Param) {                    \
-    return internal::PolymorphicMatcherWithParam1<                             \
+      ReturnTypesF>                                                            \
+  DefineMatcher(ParamType const &Param) {                                      \
+    return ::clang::ast_matchers::internal::PolymorphicMatcherWithParam1<      \
         internal::matcher_##DefineMatcher##OverloadId##Matcher, ParamType,     \
         ReturnTypesF>(Param);                                                  \
   }                                                                            \
-  typedef internal::PolymorphicMatcherWithParam1<                              \
+  typedef ::clang::ast_matchers::internal::PolymorphicMatcherWithParam1<       \
       internal::matcher_##DefineMatcher##OverloadId##Matcher, ParamType,       \
       ReturnTypesF>(&DefineMatcher##_Type##OverloadId)(                        \
       ParamType const &Param);                                                 \
   template <typename NodeType, typename ParamT>                                \
-  bool internal::matcher_##DefineMatcher##OverloadId##Matcher<                 \
-      NodeType, ParamT>::matches(const NodeType &Node, ASTMatchFinder *Finder, \
-                                 BoundNodesTreeBuilder *Builder) const
+  bool internal::                                                              \
+      matcher_##DefineMatcher##OverloadId##Matcher<NodeType, ParamT>::matches( \
+          const NodeType &Node,                                                \
+          ::clang::ast_matchers::internal::ASTMatchFinder *Finder,             \
+          ::clang::ast_matchers::internal::BoundNodesTreeBuilder *Builder)     \
+          const
 
 /// \brief AST_POLYMORPHIC_MATCHER_P2(
 ///     DefineMatcher, ParamType1, Param1, ParamType2, Param2) { ... }
@@ -288,41 +324,46 @@
   namespace internal {                                                         \
   template <typename NodeType, typename ParamT1, typename ParamT2>             \
   class matcher_##DefineMatcher##OverloadId##Matcher                           \
-      : public MatcherInterface<NodeType> {                                    \
+      : public ::clang::ast_matchers::internal::MatcherInterface<NodeType> {   \
   public:                                                                      \
     matcher_##DefineMatcher##OverloadId##Matcher(ParamType1 const &A##Param1,  \
                                                  ParamType2 const &A##Param2)  \
         : Param1(A##Param1), Param2(A##Param2) {}                              \
-     bool matches(const NodeType &Node, ASTMatchFinder *Finder,                \
-                  BoundNodesTreeBuilder *Builder) const override;              \
+    bool matches(const NodeType &Node,                                         \
+                 ::clang::ast_matchers::internal::ASTMatchFinder *Finder,      \
+                 ::clang::ast_matchers::internal::BoundNodesTreeBuilder        \
+                     *Builder) const override;                                 \
                                                                                \
   private:                                                                     \
     ParamType1 const Param1;                                                   \
     ParamType2 const Param2;                                                   \
   };                                                                           \
   }                                                                            \
-  inline internal::PolymorphicMatcherWithParam2<                               \
+  inline ::clang::ast_matchers::internal::PolymorphicMatcherWithParam2<        \
       internal::matcher_##DefineMatcher##OverloadId##Matcher, ParamType1,      \
-      ParamType2, ReturnTypesF> DefineMatcher(ParamType1 const &Param1,        \
-                                              ParamType2 const &Param2) {      \
-    return internal::PolymorphicMatcherWithParam2<                             \
+      ParamType2, ReturnTypesF>                                                \
+  DefineMatcher(ParamType1 const &Param1, ParamType2 const &Param2) {          \
+    return ::clang::ast_matchers::internal::PolymorphicMatcherWithParam2<      \
         internal::matcher_##DefineMatcher##OverloadId##Matcher, ParamType1,    \
         ParamType2, ReturnTypesF>(Param1, Param2);                             \
   }                                                                            \
-  typedef internal::PolymorphicMatcherWithParam2<                              \
+  typedef ::clang::ast_matchers::internal::PolymorphicMatcherWithParam2<       \
       internal::matcher_##DefineMatcher##OverloadId##Matcher, ParamType1,      \
       ParamType2, ReturnTypesF>(&DefineMatcher##_Type##OverloadId)(            \
       ParamType1 const &Param1, ParamType2 const &Param2);                     \
   template <typename NodeType, typename ParamT1, typename ParamT2>             \
   bool internal::matcher_##DefineMatcher##OverloadId##Matcher<                 \
-      NodeType, ParamT1, ParamT2>::matches(                                    \
-      const NodeType &Node, ASTMatchFinder *Finder,                            \
-      BoundNodesTreeBuilder *Builder) const
+      NodeType, ParamT1, ParamT2>::                                            \
+      matches(const NodeType &Node,                                            \
+              ::clang::ast_matchers::internal::ASTMatchFinder *Finder,         \
+              ::clang::ast_matchers::internal::BoundNodesTreeBuilder *Builder) \
+          const
 
 /// \brief Creates a variadic matcher for both a specific \c Type as well as
 /// the corresponding \c TypeLoc.
 #define AST_TYPE_MATCHER(NodeType, MatcherName)                                \
-  const internal::VariadicDynCastAllOfMatcher<Type, NodeType> MatcherName
+  const ::clang::ast_matchers::internal::VariadicDynCastAllOfMatcher<          \
+      Type, NodeType> MatcherName
 // FIXME: add a matcher for TypeLoc derived classes using its custom casting
 // API (no longer dyn_cast) if/when we need such matching
 
@@ -330,7 +371,7 @@
 /// the matcher \c MatcherName that can be used to traverse from one \c Type
 /// to another.
 ///
-/// For a specific \c SpecificType, the traversal is done using 
+/// For a specific \c SpecificType, the traversal is done using
 /// \c SpecificType::FunctionName. The existence of such a function determines
 /// whether a corresponding matcher can be used on \c SpecificType.
 #define AST_TYPE_TRAVERSE_MATCHER(MatcherName, FunctionName, ReturnTypesF)     \
@@ -339,9 +380,11 @@
     static QualType (T::*value())() const { return &T::FunctionName; }         \
   };                                                                           \
   }                                                                            \
-  const internal::TypeTraversePolymorphicMatcher<                              \
-      QualType, internal::TypeMatcher##MatcherName##Getter,                    \
-      internal::TypeTraverseMatcher, ReturnTypesF>::Func MatcherName
+  const ::clang::ast_matchers::internal::TypeTraversePolymorphicMatcher<       \
+      QualType,                                                                \
+      ::clang::ast_matchers::internal::TypeMatcher##MatcherName##Getter,       \
+      ::clang::ast_matchers::internal::TypeTraverseMatcher,                    \
+      ReturnTypesF>::Func MatcherName
 
 /// \brief AST_TYPELOC_TRAVERSE_MATCHER(MatcherName, FunctionName) works
 /// identical to \c AST_TYPE_TRAVERSE_MATCHER but operates on \c TypeLocs.
@@ -351,9 +394,11 @@
     static TypeLoc (T::*value())() const { return &T::FunctionName##Loc; }     \
   };                                                                           \
   }                                                                            \
-  const internal::TypeTraversePolymorphicMatcher<                              \
-      TypeLoc, internal::TypeLocMatcher##MatcherName##Getter,                  \
-      internal::TypeLocTraverseMatcher, ReturnTypesF>::Func MatcherName##Loc;  \
+  const ::clang::ast_matchers::internal::TypeTraversePolymorphicMatcher<       \
+      TypeLoc,                                                                 \
+      ::clang::ast_matchers::internal::TypeLocMatcher##MatcherName##Getter,    \
+      ::clang::ast_matchers::internal::TypeLocTraverseMatcher,                 \
+      ReturnTypesF>::Func MatcherName##Loc;                                    \
   AST_TYPE_TRAVERSE_MATCHER(MatcherName, FunctionName##Type, ReturnTypesF)
 
 #endif
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/Attr.td b/contrib/llvm/tools/clang/include/clang/Basic/Attr.td
index c310d25..f36f654 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/Attr.td
+++ b/contrib/llvm/tools/clang/include/clang/Basic/Attr.td
@@ -195,7 +195,8 @@ class CXX11<string namespace, string name, int version = 1>
     : Spelling<name, "CXX11"> {
   string Namespace = namespace;
   int Version = version;
-} class Keyword<string name> : Spelling<name, "Keyword">;
+}
+class Keyword<string name> : Spelling<name, "Keyword">;
 class Pragma<string namespace, string name> : Spelling<name, "Pragma"> {
   string Namespace = namespace;
 }
@@ -959,6 +960,22 @@ def ReturnsNonNull : InheritableAttr {
   let Documentation = [Undocumented];
 }
 
+// Nullability type attributes.
+def TypeNonNull : TypeAttr {
+  let Spellings = [Keyword<"__nonnull">];
+  let Documentation = [Undocumented];
+}
+
+def TypeNullable : TypeAttr {
+  let Spellings = [Keyword<"__nullable">];
+  let Documentation = [Undocumented];
+}
+
+def TypeNullUnspecified : TypeAttr {
+  let Spellings = [Keyword<"__null_unspecified">];
+  let Documentation = [Undocumented];
+}
+
 def AssumeAligned : InheritableAttr {
   let Spellings = [GCC<"assume_aligned">];
   let Subjects = SubjectList<[ObjCMethod, Function]>;
@@ -1250,6 +1267,14 @@ def Pascal : InheritableAttr {
   let Documentation = [Undocumented];
 }
 
+def Target : InheritableAttr {
+  let Spellings = [GCC<"target">];
+  let Args = [StringArgument<"features">];
+  let Subjects =
+      SubjectList<[Function], ErrorDiag, "ExpectedFunctionMethodOrClass">;
+  let Documentation = [Undocumented];
+}
+
 def TransparentUnion : InheritableAttr {
   let Spellings = [GCC<"transparent_union">];
 //  let Subjects = SubjectList<[Record, TypedefName]>;
@@ -1935,8 +1960,8 @@ def LoopHint : Attr {
                           ["Vectorize", "VectorizeWidth", "Interleave", "InterleaveCount",
                            "Unroll", "UnrollCount"]>,
               EnumArgument<"State", "LoopHintState",
-                           ["default", "enable", "disable"],
-                           ["Default", "Enable", "Disable"]>,
+                           ["default", "enable", "disable", "assume_safety"],
+                           ["Default", "Enable", "Disable", "AssumeSafety"]>,
               ExprArgument<"Value">];
 
   let AdditionalMembers = [{
@@ -1982,6 +2007,8 @@ def LoopHint : Attr {
       return "";
     else if (state == Enable)
       OS << (option == Unroll ? "full" : "enable");
+    else if (state == AssumeSafety)
+      OS << "assume_safety";
     else
       OS << "disable";
     OS << ")";
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/Builtins.def b/contrib/llvm/tools/clang/include/clang/Basic/Builtins.def
index 1927907..bf65b5f 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/Builtins.def
+++ b/contrib/llvm/tools/clang/include/clang/Basic/Builtins.def
@@ -1240,6 +1240,10 @@ BUILTIN(__builtin_addressof, "v*v&", "nct")
 BUILTIN(__builtin_operator_new, "v*z", "c")
 BUILTIN(__builtin_operator_delete, "vv*", "n")
 
+// Safestack builtins
+BUILTIN(__builtin___get_unsafe_stack_start, "v*", "Fn")
+BUILTIN(__builtin___get_unsafe_stack_ptr, "v*", "Fn")
+
 #undef BUILTIN
 #undef LIBBUILTIN
 #undef LANGBUILTIN
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsAArch64.def b/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsAArch64.def
index 9d223c3..1db4c14 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsAArch64.def
+++ b/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsAArch64.def
@@ -53,4 +53,12 @@ BUILTIN(__builtin_arm_isb, "vUi", "nc")
 // Prefetch
 BUILTIN(__builtin_arm_prefetch, "vvC*UiUiUiUi", "nc")
 
+// System Registers
+BUILTIN(__builtin_arm_rsr, "UicC*", "nc")
+BUILTIN(__builtin_arm_rsr64, "LUicC*", "nc")
+BUILTIN(__builtin_arm_rsrp, "v*cC*", "nc")
+BUILTIN(__builtin_arm_wsr, "vcC*Ui", "nc")
+BUILTIN(__builtin_arm_wsr64, "vcC*LUi", "nc")
+BUILTIN(__builtin_arm_wsrp, "vcC*vC*", "nc")
+
 #undef BUILTIN
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsR600.def b/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 84fc4fa..bb9931f 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsR600.def
+++ b/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -1,4 +1,4 @@
-//==- BuiltinsR600.def - R600 Builtin function database ----------*- C++ -*-==//
+//==- BuiltinsAMDGPU.def - AMDGPU Builtin function database ------*- C++ -*-==//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsARM.def b/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsARM.def
index 98d5ab7..0610d47 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsARM.def
+++ b/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsARM.def
@@ -84,6 +84,14 @@ BUILTIN(__builtin_arm_isb, "vUi", "nc")
 // Prefetch
 BUILTIN(__builtin_arm_prefetch, "vvC*UiUi", "nc")
 
+// System registers (ACLE)
+BUILTIN(__builtin_arm_rsr, "UicC*", "nc")
+BUILTIN(__builtin_arm_rsr64, "LLUicC*", "nc")
+BUILTIN(__builtin_arm_rsrp, "v*cC*", "nc")
+BUILTIN(__builtin_arm_wsr, "vcC*Ui", "nc")
+BUILTIN(__builtin_arm_wsr64, "vcC*LLUi", "nc")
+BUILTIN(__builtin_arm_wsrp, "vcC*vC*", "nc")
+
 // MSVC
 LANGBUILTIN(__emit, "vIUiC", "", ALL_MS_LANGUAGES)
 
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsPPC.def b/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsPPC.def
index 57ae63e..6f3bea8 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsPPC.def
@@ -231,6 +231,9 @@ BUILTIN(__builtin_altivec_vcmpgtsd_p, "iiV2LLiV2LLi", "")
 BUILTIN(__builtin_altivec_vcmpgtud_p, "iiV2ULLiV2ULLi", "")
 BUILTIN(__builtin_altivec_vcmpgtfp_p, "iiV4fV4f", "")
 
+BUILTIN(__builtin_altivec_vgbbd, "V16UcV16Uc", "")
+BUILTIN(__builtin_altivec_vbpermq, "V2ULLiV16UcV16Uc", "")
+
 // P8 Crypto built-ins.
 BUILTIN(__builtin_altivec_crypto_vsbox, "V2ULLiV2ULLi", "")
 BUILTIN(__builtin_altivec_crypto_vpermxor, "V16UcV16UcV16UcV16Uc", "")
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsX86.def b/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsX86.def
index 1a597b5..00c1340 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsX86.def
+++ b/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsX86.def
@@ -59,6 +59,9 @@ BUILTIN(__builtin_ia32_pswapdsi, "V2iV2i", "nc")
 // All MMX instructions will be generated via builtins. Any MMX vector
 // types (<1 x i64>, <2 x i32>, etc.) that aren't used by these builtins will be
 // expanded by the back-end.
+// FIXME: _mm_prefetch must be a built-in because it takes a compile-time constant
+// argument and our prior approach of using a #define to the current built-in
+// doesn't work in the presence of re-declaration of _mm_prefetch for windows.
 BUILTIN(_mm_prefetch, "vcC*i", "nc")
 BUILTIN(__builtin_ia32_emms, "v", "")
 BUILTIN(__builtin_ia32_paddb, "V8cV8cV8c", "")
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticCommonKinds.td b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticCommonKinds.td
index deb23f3..82718d9 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticCommonKinds.td
@@ -99,6 +99,37 @@ def err_enum_template : Error<"enumeration cannot be a template">;
 
 }
 
+let CategoryName = "Nullability Issue" in {
+
+def warn_nullability_duplicate : Warning<
+  "duplicate nullability specifier "
+  "'%select{__|}1%select{nonnull|nullable|null_unspecified}0'">,
+  InGroup<Nullability>;
+  
+def warn_conflicting_nullability_attr_overriding_ret_types : Warning<
+  "conflicting nullability specifier on return types, "
+  "'%select{%select{__|}1nonnull|"
+  "%select{__|}1nullable|%select{__|}1null_unspecified}0' "
+  "conflicts with existing specifier '%select{%select{__|}3nonnull|"
+  "%select{__|}3nullable|%select{__|}3null_unspecified}2'">,
+  InGroup<Nullability>;
+
+def warn_conflicting_nullability_attr_overriding_param_types : Warning<
+  "conflicting nullability specifier on parameter types, "
+  "'%select{%select{__|}1nonnull|"
+  "%select{__|}1nullable|%select{__|}1null_unspecified}0' "
+  "conflicts with existing specifier '%select{%select{__|}3nonnull|"
+  "%select{__|}3nullable|%select{__|}3null_unspecified}2'">,
+  InGroup<Nullability>;
+
+def err_nullability_conflicting : Error<
+  "nullability specifier "
+  "'%select{__|}1%select{nonnull|nullable|null_unspecified}0' conflicts with "
+  "existing specifier '%select{__|}3%select{nonnull|nullable|"
+  "null_unspecified}2'">;
+
+}
+
 // Sema && Lex
 def ext_c99_longlong : Extension<
   "'long long' is an extension when C99 mode is not enabled">,
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticGroups.td b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticGroups.td
index 016c2e1..85796cc 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticGroups.td
@@ -248,6 +248,10 @@ def MissingFieldInitializers : DiagGroup<"missing-field-initializers">;
 def ModuleBuild : DiagGroup<"module-build">;
 def ModuleConflict : DiagGroup<"module-conflict">;
 def NewlineEOF : DiagGroup<"newline-eof">;
+def Nullability : DiagGroup<"nullability">;
+def NullabilityDeclSpec : DiagGroup<"nullability-declspec">;
+def NullableToNonNullConversion : DiagGroup<"nullable-to-nonnull-conversion">;
+def NullabilityCompleteness : DiagGroup<"nullability-completeness">;
 def NullArithmetic : DiagGroup<"null-arithmetic">;
 def NullCharacter : DiagGroup<"null-character">;
 def NullDereference : DiagGroup<"null-dereference">;
@@ -598,6 +602,7 @@ def Most : DiagGroup<"most", [
     DeleteNonVirtualDtor,
     Format,
     Implicit,
+    InfiniteRecursion,
     MismatchedTags,
     MissingBraces,
     Move,
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticLexKinds.td b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticLexKinds.td
index 3d568e8..5dd4ae0 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticLexKinds.td
+++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -646,5 +646,20 @@ def warn_header_guard : Warning<
   "%0 is used as a header guard here, followed by #define of a different macro">,
   InGroup<DiagGroup<"header-guard">>;
 def note_header_guard : Note<
-  "%0 is defined here; did you mean %1?">;  
+  "%0 is defined here; did you mean %1?">;
+
+let CategoryName = "Nullability Issue" in {
+
+def err_pp_assume_nonnull_syntax : Error<"expected 'begin' or 'end'">;
+def err_pp_double_begin_of_assume_nonnull : Error<
+  "already inside '#pragma clang assume_nonnull'">;
+def err_pp_unmatched_end_of_assume_nonnull : Error<
+  "not currently inside '#pragma clang assume_nonnull'">;
+def err_pp_include_in_assume_nonnull : Error<
+  "cannot #include files inside '#pragma clang assume_nonnull'">;
+def err_pp_eof_in_assume_nonnull : Error<
+  "'#pragma clang assume_nonnull' was not ended within this file">;
+
+}
+
 }
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticOptions.def b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticOptions.def
index fdd3254..f4ba6da 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticOptions.def
+++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticOptions.def
@@ -69,7 +69,10 @@ ENUM_DIAGOPT(ShowOverloads, OverloadsShown, 1,
 DIAGOPT(VerifyDiagnostics, 1, 0) /// Check that diagnostics match the expected
                                  /// diagnostics, indicated by markers in the
                                  /// input source file.
-
+ENUM_DIAGOPT(VerifyIgnoreUnexpected, DiagnosticLevelMask, 4,
+             DiagnosticLevelMask::None) /// Ignore unexpected diagnostics of
+                                        /// the specified levels when using
+                                        /// -verify.
 DIAGOPT(ElideType, 1, 0)         /// Elide identical types in template diffing
 DIAGOPT(ShowTemplateTree, 1, 0)  /// Print a template tree when diffing
 DIAGOPT(CLFallbackMode, 1, 0)    /// Format for clang-cl fallback mode
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticOptions.h b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticOptions.h
index a16c774..c9b0c5d 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticOptions.h
+++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticOptions.h
@@ -13,6 +13,7 @@
 #include "clang/Basic/LLVM.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include <string>
+#include <type_traits>
 #include <vector>
 
 namespace clang {
@@ -24,6 +25,38 @@ enum OverloadsShown : unsigned {
   Ovl_Best  ///< Show just the "best" overload candidates.
 };
 
+/// \brief A bitmask representing the diagnostic levels used by
+/// VerifyDiagnosticConsumer.
+enum class DiagnosticLevelMask : unsigned {
+  None    = 0,
+  Note    = 1 << 0,
+  Remark  = 1 << 1,
+  Warning = 1 << 2,
+  Error   = 1 << 3,
+  All     = Note | Remark | Warning | Error
+};
+
+inline DiagnosticLevelMask operator~(DiagnosticLevelMask M) {
+  using UT = std::underlying_type<DiagnosticLevelMask>::type;
+  return static_cast<DiagnosticLevelMask>(~static_cast<UT>(M));
+}
+
+inline DiagnosticLevelMask operator|(DiagnosticLevelMask LHS,
+                                     DiagnosticLevelMask RHS) {
+  using UT = std::underlying_type<DiagnosticLevelMask>::type;
+  return static_cast<DiagnosticLevelMask>(
+    static_cast<UT>(LHS) | static_cast<UT>(RHS));
+}
+
+inline DiagnosticLevelMask operator&(DiagnosticLevelMask LHS,
+                                     DiagnosticLevelMask RHS) {
+  using UT = std::underlying_type<DiagnosticLevelMask>::type;
+  return static_cast<DiagnosticLevelMask>(
+    static_cast<UT>(LHS) & static_cast<UT>(RHS));
+}
+
+raw_ostream& operator<<(raw_ostream& Out, DiagnosticLevelMask M);
+
 /// \brief Options for controlling the compiler diagnostics engine.
 class DiagnosticOptions : public RefCountedBase<DiagnosticOptions>{
 public:
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticParseKinds.td b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticParseKinds.td
index f00a3b3..6a11d24 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticParseKinds.td
+++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -65,6 +65,10 @@ def ext_keyword_as_ident : ExtWarn<
   "%select{here|for the remainder of the translation unit}1">,
   InGroup<KeywordCompat>;
 
+def ext_nullability : Extension<
+  "type nullability specifier %0 is a Clang extension">,
+  InGroup<DiagGroup<"nullability-extension">>;
+
 def error_empty_enum : Error<"use of empty enum">;
 def err_invalid_sign_spec : Error<"'%0' cannot be signed or unsigned">;
 def err_invalid_short_spec : Error<"'short %0' is invalid">;
@@ -989,17 +993,21 @@ def err_omp_expected_identifier_for_critical : Error<
 // Pragma loop support.
 def err_pragma_loop_missing_argument : Error<
   "missing argument; expected %select{an integer value|"
-  "'%select{enable|full}1' or 'disable'}0">;
+  "%select{'enable', 'assume_safety'|'full'}1 or 'disable'}0">;
 def err_pragma_loop_invalid_option : Error<
   "%select{invalid|missing}0 option%select{ %1|}0; expected vectorize, "
   "vectorize_width, interleave, interleave_count, unroll, or unroll_count">;
 def err_pragma_invalid_keyword : Error<
-  "invalid argument; expected '%select{enable|full}0' or 'disable'">;
+  "invalid argument; expected %select{'enable', 'assume_safety'|'full'}0 or 'disable'">;
 
 // Pragma unroll support.
 def warn_pragma_unroll_cuda_value_in_parens : Warning<
   "argument to '#pragma unroll' should not be in parentheses in CUDA C/C++">,
   InGroup<CudaCompat>;
+
+def err_empty_attribute_block : Error<
+  "Microsoft attribute block cannot be empty">;
+
 } // end of Parse Issue category.
 
 let CategoryName = "Modules Issue" in {
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
index be1911e..803203c 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -108,6 +108,7 @@ def err_ice_ambiguous_conversion : Error<
 def err_ice_too_large : Error<
   "integer constant expression evaluates to value %0 that cannot be "
   "represented in a %1-bit %select{signed|unsigned}2 integer type">;
+def err_expr_not_string_literal : Error<"expression is not a string literal">;
 
 // Semantic analysis of constant literals.
 def ext_predef_outside_function : Warning<
@@ -424,6 +425,7 @@ def warn_redecl_library_builtin : Warning<
   "incompatible redeclaration of library function %0">,
   InGroup<DiagGroup<"incompatible-library-redeclaration">>;
 def err_builtin_definition : Error<"definition of builtin function %0">;
+def err_arm_invalid_specialreg : Error<"invalid special register for builtin">;
 def warn_builtin_unknown : Warning<"use of unknown builtin %0">,
   InGroup<ImplicitFunctionDeclare>, DefaultError;
 def warn_dyn_class_memaccess : Warning<
@@ -1974,8 +1976,11 @@ def err_attribute_too_few_arguments : Error<
 def err_attribute_invalid_vector_type : Error<"invalid vector element type %0">;
 def err_attribute_bad_neon_vector_size : Error<
   "Neon vector size must be 64 or 128 bits">;
-def err_attribute_unsupported : Error<
-  "%0 attribute is not supported for this target">;
+def warn_unsupported_target_attribute
+    : Warning<"Ignoring unsupported '%0' in the target attribute string">,
+    InGroup<IgnoredAttributes>;
+def err_attribute_unsupported
+    : Error<"%0 attribute is not supported for this target">;
 // The err_*_attribute_argument_not_int are seperate because they're used by
 // VerifyIntegerConstantExpression.
 def err_aligned_attribute_argument_not_int : Error<
@@ -2249,7 +2254,7 @@ def warn_attribute_dllimport_static_field_definition : Warning<
   InGroup<DiagGroup<"dllimport-static-field-def">>;
 def warn_attribute_dllexport_explicit_instantiation_decl : Warning<
   "explicit instantiation declaration should not be 'dllexport'">,
-  InGroup<DiagGroup<"dllexport-explicit-instantation-decl">>;
+  InGroup<DiagGroup<"dllexport-explicit-instantiation-decl">>;
 def warn_invalid_initializer_from_system_header : Warning<
   "invalid constructor form class in system header, should not be explicit">,
   InGroup<DiagGroup<"invalid-initializer-from-system-header">>;
@@ -2680,6 +2685,8 @@ def err_mode_not_primitive : Error<
   "mode attribute only supported for integer and floating-point types">;
 def err_mode_wrong_type : Error<
   "type of machine mode does not match type of base type">;
+def err_complex_mode_vector_type : Error<
+  "type of machine mode does not support base vector types">;
 def err_attr_wrong_decl : Error<
   "%0 attribute invalid on this declaration, requires typedef or value">;
 def warn_attribute_nonnull_no_pointers : Warning<
@@ -2744,8 +2751,8 @@ def warn_ns_attribute_wrong_return_type : Warning<
   "return %select{an Objective-C object|a pointer|a non-retainable pointer}2">,
   InGroup<IgnoredAttributes>;
 def warn_ns_attribute_wrong_parameter_type : Warning<
-  "%0 attribute only applies to %select{Objective-C object|pointer}1 "
-  "parameters">,
+  "%0 attribute only applies to "
+  "%select{Objective-C object|pointer|pointer-to-CF-pointer}1 parameters">,
   InGroup<IgnoredAttributes>;
 def warn_objc_requires_super_protocol : Warning<
   "%0 attribute cannot be applied to %select{methods in protocols|dealloc}1">,
@@ -7631,10 +7638,12 @@ def err_module_private_local : Error<
 def err_module_private_local_class : Error<
   "local %select{struct|interface|union|class|enum}0 cannot be declared "
   "__module_private__">;
-def err_module_private_declaration : Error<
-  "declaration of %0 must be imported from module '%1' before it is required">;
-def err_module_private_definition : Error<
-  "definition of %0 must be imported from module '%1' before it is required">;
+def err_module_unimported_use : Error<
+  "%select{declaration|definition|default argument}0 of %1 must be imported "
+  "from module '%2' before it is required">;
+def err_module_unimported_use_multiple : Error<
+  "%select{declaration|definition|default argument}0 of %1 must be imported "
+  "from one of the following modules before it is required:%2">;
 def err_module_import_in_extern_c : Error<
   "import of C++ module '%0' appears within extern \"C\" language linkage "
   "specification">;
@@ -7665,4 +7674,55 @@ def warn_profile_data_unprofiled : Warning<
 
 } // end of instrumentation issue category
 
+let CategoryName = "Nullability Issue" in {
+
+def warn_mismatched_nullability_attr : Warning<
+  "nullability specifier "
+  "'%select{__|}1%select{nonnull|nullable|null_unspecified}0' "
+  "conflicts with existing specifier "
+  "'%select{__|}3%select{nonnull|nullable|null_unspecified}2'">,
+  InGroup<Nullability>;
+
+def warn_nullability_declspec : Warning<
+  "nullability specifier "
+  "'%select{__nonnull|__nullable|__null_unspecified}0' cannot be applied "
+  "to non-pointer type %1; did you mean to apply the specifier to the "
+  "%select{pointer|block pointer|member pointer|function pointer|"
+  "member function pointer}2?">,
+  InGroup<NullabilityDeclSpec>,
+  DefaultError;
+
+def note_nullability_here : Note<
+  "'%select{__nonnull|__nullable|__null_unspecified}0' specified here">;
+
+def err_nullability_nonpointer : Error<
+  "nullability specifier "
+  "'%select{__|}1%select{nonnull|nullable|null_unspecified}0' cannot be applied "
+  "to non-pointer type %2">;
+
+def warn_nullability_lost : Warning<
+  "implicit conversion from nullable pointer %0 to non-nullable pointer "
+  "type %1">,
+  InGroup<NullableToNonNullConversion>, DefaultIgnore;
+
+def err_nullability_cs_multilevel : Error<
+  "nullability keyword "
+  "'%select{nonnull|nullable|null_unspecified}0' cannot be applied to "
+  "multi-level pointer type %1">;
+def note_nullability_type_specifier : Note<
+  "use nullability type specifier "
+  "'%select{__nonnull|__nullable|__null_unspecified}0' to affect the innermost "
+  "pointer type of %1">;
+
+def warn_null_resettable_setter : Warning<
+  "synthesized setter %0 for null_resettable property %1 does not handle nil">,
+  InGroup<Nullability>;
+
+def warn_nullability_missing : Warning<
+  "%select{pointer|block pointer|member pointer}0 is missing a nullability "
+  "type specifier (__nonnull, __nullable, or __null_unspecified)">,
+  InGroup<NullabilityCompleteness>;
+
+}
+
 } // end of sema component.
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/FileManager.h b/contrib/llvm/tools/clang/include/clang/Basic/FileManager.h
index 37e19e1..ac0d7a1 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/FileManager.h
+++ b/contrib/llvm/tools/clang/include/clang/Basic/FileManager.h
@@ -25,16 +25,8 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Allocator.h"
 #include <memory>
-// FIXME: Enhance libsystem to support inode and other fields in stat.
-#include <sys/types.h>
 #include <map>
 
-#ifdef _MSC_VER
-typedef unsigned short mode_t;
-#endif
-
-struct stat;
-
 namespace llvm {
 class MemoryBuffer;
 }
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/LangOptions.def b/contrib/llvm/tools/clang/include/clang/Basic/LangOptions.def
index 95d3f06..bef122f 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/LangOptions.def
+++ b/contrib/llvm/tools/clang/include/clang/Basic/LangOptions.def
@@ -127,8 +127,7 @@ LANGOPT(Modules           , 1, 0, "modules extension to C")
 COMPATIBLE_LANGOPT(ModulesDeclUse    , 1, 0, "require declaration of module uses")
 LANGOPT(ModulesSearchAll  , 1, 1, "search even non-imported modules to find unresolved references")
 COMPATIBLE_LANGOPT(ModulesStrictDeclUse, 1, 0, "require declaration of module uses and all headers to be in modules")
-LANGOPT(ModulesErrorRecovery, 1, 1, "automatically import modules as needed when performing error recovery")
-BENIGN_LANGOPT(ModulesImplicitMaps, 1, 1, "use files called module.modulemap implicitly as module maps")
+BENIGN_LANGOPT(ModulesErrorRecovery, 1, 1, "automatically import modules as needed when performing error recovery")
 BENIGN_LANGOPT(ImplicitModules, 1, 1, "build modules that are not specified via -fmodule-file")
 COMPATIBLE_LANGOPT(ModulesLocalVisibility, 1, 0, "local submodule visibility")
 COMPATIBLE_LANGOPT(Optimize          , 1, 0, "__OPTIMIZE__ predefined macro")
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/OpenMPKinds.def b/contrib/llvm/tools/clang/include/clang/Basic/OpenMPKinds.def
index 0145db0..b09f012 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/OpenMPKinds.def
+++ b/contrib/llvm/tools/clang/include/clang/Basic/OpenMPKinds.def
@@ -84,6 +84,7 @@ OPENMP_DIRECTIVE(critical)
 OPENMP_DIRECTIVE(taskyield)
 OPENMP_DIRECTIVE(barrier)
 OPENMP_DIRECTIVE(taskwait)
+OPENMP_DIRECTIVE(taskgroup)
 OPENMP_DIRECTIVE(flush)
 OPENMP_DIRECTIVE(ordered)
 OPENMP_DIRECTIVE(atomic)
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/Sanitizers.def b/contrib/llvm/tools/clang/include/clang/Basic/Sanitizers.def
index 65ababd..1b528c8 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/Sanitizers.def
+++ b/contrib/llvm/tools/clang/include/clang/Basic/Sanitizers.def
@@ -41,6 +41,9 @@
 // AddressSanitizer
 SANITIZER("address", Address)
 
+// Kernel AddressSanitizer (KASan)
+SANITIZER("kernel-address", KernelAddress)
+
 // MemorySanitizer
 SANITIZER("memory", Memory)
 
@@ -87,18 +90,20 @@ SANITIZER("cfi-vcall", CFIVCall)
 SANITIZER_GROUP("cfi", CFI,
                 CFIDerivedCast | CFIUnrelatedCast | CFINVCall | CFIVCall)
 
-// -fsanitize=undefined-trap includes sanitizers from -fsanitize=undefined
-// that can be used without runtime support, generally by providing extra
-// -fsanitize-undefined-trap-on-error flag.
-SANITIZER_GROUP("undefined-trap", UndefinedTrap,
+// Safe Stack
+SANITIZER("safe-stack", SafeStack)
+
+// -fsanitize=undefined includes all the sanitizers which have low overhead, no
+// ABI or address space layout implications, and only catch undefined behavior.
+SANITIZER_GROUP("undefined", Undefined,
                 Alignment | Bool | ArrayBounds | Enum | FloatCastOverflow |
                     FloatDivideByZero | IntegerDivideByZero | NonnullAttribute |
                     Null | ObjectSize | Return | ReturnsNonnullAttribute |
-                    Shift | SignedIntegerOverflow | Unreachable | VLABound)
+                    Shift | SignedIntegerOverflow | Unreachable | VLABound |
+                    Function | Vptr)
 
-// -fsanitize=undefined includes all the sanitizers which have low overhead, no
-// ABI or address space layout implications, and only catch undefined behavior.
-SANITIZER_GROUP("undefined", Undefined, UndefinedTrap | Function | Vptr)
+// -fsanitize=undefined-trap is an alias for -fsanitize=undefined.
+SANITIZER_GROUP("undefined-trap", UndefinedTrap, Undefined)
 
 SANITIZER_GROUP("integer", Integer,
                 SignedIntegerOverflow | UnsignedIntegerOverflow | Shift |
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/Sanitizers.h b/contrib/llvm/tools/clang/include/clang/Basic/Sanitizers.h
index 3b1797e..78c1ddb 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/Sanitizers.h
+++ b/contrib/llvm/tools/clang/include/clang/Basic/Sanitizers.h
@@ -52,6 +52,9 @@ struct SanitizerSet {
   /// \brief Check if a certain (single) sanitizer is enabled.
   bool has(SanitizerMask K) const;
 
+  /// \brief Check if one or more sanitizers are enabled.
+  bool hasOneOf(SanitizerMask K) const;
+
   /// \brief Enable or disable a certain (single) sanitizer.
   void set(SanitizerMask K, bool Value);
 
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/Specifiers.h b/contrib/llvm/tools/clang/include/clang/Basic/Specifiers.h
index 7569c16..5ce56c0 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/Specifiers.h
+++ b/contrib/llvm/tools/clang/include/clang/Basic/Specifiers.h
@@ -16,6 +16,9 @@
 #ifndef LLVM_CLANG_BASIC_SPECIFIERS_H
 #define LLVM_CLANG_BASIC_SPECIFIERS_H
 
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataTypes.h"
+
 namespace clang {
   /// \brief Specifies the width of a type, e.g., short, long, or long long.
   enum TypeSpecifierWidth {
@@ -239,6 +242,22 @@ namespace clang {
     SD_Static,         ///< Static storage duration.
     SD_Dynamic         ///< Dynamic storage duration.
   };
+
+  /// Describes the nullability of a particular type.
+  enum class NullabilityKind : uint8_t {
+    /// Values of this type can never be null.
+    NonNull = 0,
+    /// Values of this type can be null.
+    Nullable,
+    /// Whether values of this type can be null is (explicitly)
+    /// unspecified. This captures a (fairly rare) case where we
+    /// can't conclude anything about the nullability of the type even
+    /// though it has been considered.
+    Unspecified
+  };
+
+  /// Retrieve the spelling of the given nullability kind.
+  llvm::StringRef getNullabilitySpelling(NullabilityKind kind);
 } // end namespace clang
 
 #endif // LLVM_CLANG_BASIC_SPECIFIERS_H
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/StmtNodes.td b/contrib/llvm/tools/clang/include/clang/Basic/StmtNodes.td
index 750108f..675e91d 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/StmtNodes.td
+++ b/contrib/llvm/tools/clang/include/clang/Basic/StmtNodes.td
@@ -77,7 +77,9 @@ def CompoundLiteralExpr : DStmt<Expr>;
 def ExtVectorElementExpr : DStmt<Expr>;
 def InitListExpr : DStmt<Expr>;
 def DesignatedInitExpr : DStmt<Expr>;
+def DesignatedInitUpdateExpr : DStmt<Expr>;
 def ImplicitValueInitExpr : DStmt<Expr>;
+def NoInitExpr : DStmt<Expr>;
 def ParenListExpr : DStmt<Expr>;
 def VAArgExpr : DStmt<Expr>;
 def GenericSelectionExpr : DStmt<Expr>;
@@ -197,6 +199,7 @@ def OMPTaskDirective : DStmt<OMPExecutableDirective>;
 def OMPTaskyieldDirective : DStmt<OMPExecutableDirective>;
 def OMPBarrierDirective : DStmt<OMPExecutableDirective>;
 def OMPTaskwaitDirective : DStmt<OMPExecutableDirective>;
+def OMPTaskgroupDirective : DStmt<OMPExecutableDirective>;
 def OMPFlushDirective : DStmt<OMPExecutableDirective>;
 def OMPOrderedDirective : DStmt<OMPExecutableDirective>;
 def OMPAtomicDirective : DStmt<OMPExecutableDirective>;
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/TargetBuiltins.h b/contrib/llvm/tools/clang/include/clang/Basic/TargetBuiltins.h
index eece4e8..b4740c5 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/TargetBuiltins.h
+++ b/contrib/llvm/tools/clang/include/clang/Basic/TargetBuiltins.h
@@ -73,12 +73,12 @@ namespace clang {
     };
   }
 
-  /// \brief R600 builtins
-  namespace R600 {
+  /// \brief AMDGPU builtins
+  namespace AMDGPU {
   enum {
     LastTIBuiltin = clang::Builtin::FirstTSBuiltin - 1,
   #define BUILTIN(ID, TYPE, ATTRS) BI##ID,
-  #include "clang/Basic/BuiltinsR600.def"
+  #include "clang/Basic/BuiltinsAMDGPU.def"
     LastTSBuiltin
   };
   }
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/TargetInfo.h b/contrib/llvm/tools/clang/include/clang/Basic/TargetInfo.h
index 8406205..e415733 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/TargetInfo.h
+++ b/contrib/llvm/tools/clang/include/clang/Basic/TargetInfo.h
@@ -363,6 +363,10 @@ public:
     return *LongDoubleFormat;
   }
 
+  /// \brief Return true if the 'long double' type should be mangled like
+  /// __float128.
+  virtual bool useFloat128ManglingForLongDouble() const { return false; }
+
   /// \brief Return the value for the C99 FLT_EVAL_METHOD macro.
   virtual unsigned getFloatEvalMethod() const { return 0; }
 
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/TargetOptions.h b/contrib/llvm/tools/clang/include/clang/Basic/TargetOptions.h
index 9782539..ca0cca7 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/TargetOptions.h
+++ b/contrib/llvm/tools/clang/include/clang/Basic/TargetOptions.h
@@ -45,6 +45,8 @@ public:
   /// The list of target specific features to enable or disable -- this should
   /// be a list of strings starting with by '+' or '-'.
   std::vector<std::string> Features;
+  
+  std::vector<std::string> Reciprocals;
 };
 
 }  // end namespace clang
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/TokenKinds.def b/contrib/llvm/tools/clang/include/clang/Basic/TokenKinds.def
index b6d983b..67b9933 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/TokenKinds.def
+++ b/contrib/llvm/tools/clang/include/clang/Basic/TokenKinds.def
@@ -548,6 +548,11 @@ ALIAS("__typeof__"   , typeof     , KEYALL)
 ALIAS("__volatile"   , volatile   , KEYALL)
 ALIAS("__volatile__" , volatile   , KEYALL)
 
+// Type nullability.
+KEYWORD(__nonnull                 , KEYALL)
+KEYWORD(__nullable                , KEYALL)
+KEYWORD(__null_unspecified        , KEYALL)
+
 // Microsoft extensions which should be disabled in strict conformance mode
 KEYWORD(__ptr64                       , KEYMS)
 KEYWORD(__ptr32                       , KEYMS)
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/arm_neon.td b/contrib/llvm/tools/clang/include/clang/Basic/arm_neon.td
index 933f204..c6f8795 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/arm_neon.td
+++ b/contrib/llvm/tools/clang/include/clang/Basic/arm_neon.td
@@ -803,6 +803,13 @@ def VREINTERPRET
 def VFMA : SInst<"vfma", "dddd", "fQf">;
 
 ////////////////////////////////////////////////////////////////////////////////
+// fp16 vector operations
+def SCALAR_HALF_GET_LANE : IOpInst<"vget_lane", "sdi", "h", OP_SCALAR_HALF_GET_LN>;
+def SCALAR_HALF_SET_LANE : IOpInst<"vset_lane", "dsdi", "h", OP_SCALAR_HALF_SET_LN>;
+def SCALAR_HALF_GET_LANEQ : IOpInst<"vget_lane", "sdi", "Qh", OP_SCALAR_HALF_GET_LNQ>;
+def SCALAR_HALF_SET_LANEQ : IOpInst<"vset_lane", "dsdi", "Qh", OP_SCALAR_HALF_SET_LNQ>;
+
+////////////////////////////////////////////////////////////////////////////////
 // AArch64 Intrinsics
 
 let ArchGuard = "defined(__aarch64__)" in {
@@ -1594,10 +1601,4 @@ def SCALAR_SQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "ssji", "SsSi", OP_SCALAR_
 
 def SCALAR_VDUP_LANE : IInst<"vdup_lane", "sdi", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">;
 def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "sji", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">;
-
-// FIXME: Rename so it is obvious this only applies to halfs.
-def SCALAR_HALF_GET_LANE : IOpInst<"vget_lane", "sdi", "h", OP_SCALAR_HALF_GET_LN>;
-def SCALAR_HALF_SET_LANE : IOpInst<"vset_lane", "dsdi", "h", OP_SCALAR_HALF_SET_LN>;
-def SCALAR_HALF_GET_LANEQ : IOpInst<"vget_lane", "sdi", "Qh", OP_SCALAR_HALF_GET_LNQ>;
-def SCALAR_HALF_SET_LANEQ : IOpInst<"vset_lane", "dsdi", "Qh", OP_SCALAR_HALF_SET_LNQ>;
 }
diff --git a/contrib/llvm/tools/clang/include/clang/Driver/CC1Options.td b/contrib/llvm/tools/clang/include/clang/Driver/CC1Options.td
index 1622c41..f2ef71e 100644
--- a/contrib/llvm/tools/clang/include/clang/Driver/CC1Options.td
+++ b/contrib/llvm/tools/clang/include/clang/Driver/CC1Options.td
@@ -301,6 +301,10 @@ def fmessage_length : Separate<["-"], "fmessage-length">, MetaVarName<"<N>">,
   HelpText<"Format message diagnostics so that they fit within N columns or fewer, when possible.">;
 def verify : Flag<["-"], "verify">,
   HelpText<"Verify diagnostic output using comment directives">;
+def verify_ignore_unexpected : Flag<["-"], "verify-ignore-unexpected">,
+  HelpText<"Ignore unexpected diagnostic messages">;
+def verify_ignore_unexpected_EQ : CommaJoined<["-"], "verify-ignore-unexpected=">,
+  HelpText<"Ignore unexpected diagnostic messages">;
 def Wno_rewrite_macros : Flag<["-"], "Wno-rewrite-macros">,
   HelpText<"Silence ObjC rewriting warnings">;
 
diff --git a/contrib/llvm/tools/clang/include/clang/Driver/CLCompatOptions.td b/contrib/llvm/tools/clang/include/clang/Driver/CLCompatOptions.td
index e217cb7..01913be 100644
--- a/contrib/llvm/tools/clang/include/clang/Driver/CLCompatOptions.td
+++ b/contrib/llvm/tools/clang/include/clang/Driver/CLCompatOptions.td
@@ -139,6 +139,8 @@ def _SLASH_wd4005 : CLFlag<"wd4005">, Alias<W_Joined>,
   AliasArgs<["no-macro-redefined"]>;
 def _SLASH_wd4996 : CLFlag<"wd4996">, Alias<W_Joined>,
   AliasArgs<["no-deprecated-declarations"]>;
+def _SLASH_wd4910 : CLFlag<"wd4910">, Alias<W_Joined>,
+  AliasArgs<["no-dllexport-explicit-instantiation-decl"]>;
 def _SLASH_vd : CLJoined<"vd">, HelpText<"Control vtordisp placement">,
   Alias<vtordisp_mode_EQ>;
 def _SLASH_Zc_sizedDealloc : CLFlag<"Zc:sizedDealloc">,
@@ -202,7 +204,6 @@ def _SLASH_Fi : CLCompileJoined<"Fi">,
 def _SLASH_Fo : CLCompileJoined<"Fo">,
   HelpText<"Set output object file, or directory (ends in / or \\)">,
   MetaVarName<"<file or directory>">;
-def _SLASH_GL : CLFlag<"GL">, Alias<flto>;
 def _SLASH_LD : CLFlag<"LD">, HelpText<"Create DLL">;
 def _SLASH_LDd : CLFlag<"LDd">, HelpText<"Create debug DLL">;
 def _SLASH_link : CLRemainingArgs<"link">,
@@ -287,6 +288,7 @@ def _SLASH_G2 : CLFlag<"G2">;
 def _SLASH_Ge : CLFlag<"Ge">;
 def _SLASH_Gh : CLFlag<"Gh">;
 def _SLASH_GH : CLFlag<"GH">;
+def _SLASH_GL : CLFlag<"GL">;
 def _SLASH_GL_ : CLFlag<"GL-">;
 def _SLASH_Gm : CLFlag<"Gm">;
 def _SLASH_Gm_ : CLFlag<"Gm-">;
diff --git a/contrib/llvm/tools/clang/include/clang/Driver/Driver.h b/contrib/llvm/tools/clang/include/clang/Driver/Driver.h
index 09521c2..d7bb1d2 100644
--- a/contrib/llvm/tools/clang/include/clang/Driver/Driver.h
+++ b/contrib/llvm/tools/clang/include/clang/Driver/Driver.h
@@ -91,7 +91,7 @@ public:
   /// The path to the compiler resource directory.
   std::string ResourceDir;
 
-  /// A prefix directory used to emulated a limited subset of GCC's '-Bprefix'
+  /// A prefix directory used to emulate a limited subset of GCC's '-Bprefix'
   /// functionality.
   /// FIXME: This type of customization should be removed in favor of the
   /// universal driver when it is ready.
@@ -402,7 +402,7 @@ public:
   /// handle this action.
   bool ShouldUseClangCompiler(const JobAction &JA) const;
 
-  bool IsUsingLTO(const ToolChain &TC, const llvm::opt::ArgList &Args) const;
+  bool IsUsingLTO(const llvm::opt::ArgList &Args) const;
 
 private:
   /// \brief Retrieves a ToolChain for a particular target triple.
diff --git a/contrib/llvm/tools/clang/include/clang/Driver/Options.td b/contrib/llvm/tools/clang/include/clang/Driver/Options.td
index 7e39a9a..aae3776 100644
--- a/contrib/llvm/tools/clang/include/clang/Driver/Options.td
+++ b/contrib/llvm/tools/clang/include/clang/Driver/Options.td
@@ -561,8 +561,13 @@ def fno_sanitize_recover_EQ
     : CommaJoined<["-"], "fno-sanitize-recover=">,
       Group<f_clang_Group>,
       HelpText<"Disable recovery for specified sanitizers">;
+def fsanitize_trap_EQ : CommaJoined<["-"], "fsanitize-trap=">, Group<f_clang_Group>,
+                        Flags<[CC1Option, CoreOption]>,
+                        HelpText<"Enable trapping for specified sanitizers">;
+def fno_sanitize_trap_EQ : CommaJoined<["-"], "fno-sanitize-trap=">, Group<f_clang_Group>,
+                           HelpText<"Disable trapping for specified sanitizers">;
 def fsanitize_undefined_trap_on_error : Flag<["-"], "fsanitize-undefined-trap-on-error">,
-                                        Group<f_clang_Group>, Flags<[CC1Option]>;
+                                        Group<f_clang_Group>;
 def fno_sanitize_undefined_trap_on_error : Flag<["-"], "fno-sanitize-undefined-trap-on-error">,
                                            Group<f_clang_Group>;
 def fsanitize_link_cxx_runtime : Flag<["-"], "fsanitize-link-c++-runtime">,
@@ -691,9 +696,10 @@ def fmodules_validate_system_headers : Flag<["-"], "fmodules-validate-system-hea
 def fmodules : Flag <["-"], "fmodules">, Group<f_Group>,
   Flags<[DriverOption, CC1Option]>,
   HelpText<"Enable the 'modules' language feature">;
-def fmodule_maps : Flag <["-"], "fmodule-maps">, Group<f_Group>,
-  Flags<[DriverOption,CC1Option]>,
-  HelpText<"Read module maps to understand the structure of library headers">;
+def fimplicit_module_maps : Flag <["-"], "fimplicit-module-maps">, Group<f_Group>,
+  Flags<[DriverOption, CC1Option]>,
+  HelpText<"Implicitly search the file system for module map files.">;
+def fmodule_maps : Flag <["-"], "fmodule-maps">, Alias<fimplicit_module_maps>;
 def fmodule_name : JoinedOrSeparate<["-"], "fmodule-name=">, Group<f_Group>,
   Flags<[DriverOption,CC1Option]>, MetaVarName<"<name>">,
   HelpText<"Specify the name of the module to build">;
@@ -713,12 +719,6 @@ def fmodules_strict_decluse : Flag <["-"], "fmodules-strict-decluse">, Group<f_G
   HelpText<"Like -fmodules-decluse but requires all headers to be in modules">;
 def fno_modules_search_all : Flag <["-"], "fno-modules-search-all">, Group<f_Group>,
   Flags<[DriverOption, CC1Option]>;
-def fmodules_implicit_maps :
-  Flag <["-"], "fmodules-implicit-maps">,
-  Group<f_Group>, Flags<[DriverOption, CC1Option]>;
-def fno_modules_implicit_maps :
-  Flag <["-"], "fno-modules-implicit-maps">,
-  Group<f_Group>, Flags<[DriverOption, CC1Option]>;
 def fno_implicit_modules :
   Flag <["-"], "fno-implicit-modules">,
   Group<f_Group>, Flags<[DriverOption, CC1Option]>;
@@ -780,8 +780,9 @@ def fno_merge_all_constants : Flag<["-"], "fno-merge-all-constants">, Group<f_Gr
     Flags<[CC1Option]>, HelpText<"Disallow merging of constants">;
 def fno_modules : Flag <["-"], "fno-modules">, Group<f_Group>,
   Flags<[DriverOption]>;
-def fno_module_maps : Flag <["-"], "fno-module-maps">, Group<f_Group>,
+def fno_implicit_module_maps : Flag <["-"], "fno-implicit-module-maps">, Group<f_Group>,
   Flags<[DriverOption]>;
+def fno_module_maps : Flag <["-"], "fno-module-maps">, Alias<fno_implicit_module_maps>;
 def fno_modules_decluse : Flag <["-"], "fno-modules-decluse">, Group<f_Group>,
   Flags<[DriverOption]>;
 def fno_modules_strict_decluse : Flag <["-"], "fno-strict-modules-decluse">, Group<f_Group>,
@@ -1338,6 +1339,8 @@ def msoft_float : Flag<["-"], "msoft-float">, Group<m_Group>, Flags<[CC1Option]>
 def mno_implicit_float : Flag<["-"], "mno-implicit-float">, Group<m_Group>,
   HelpText<"Don't generate implicit floating point instructions">;
 def mimplicit_float : Flag<["-"], "mimplicit-float">, Group<m_Group>;
+def mrecip : Flag<["-"], "mrecip">, Group<m_Group>;
+def mrecip_EQ : CommaJoined<["-"], "mrecip=">, Group<m_Group>, Flags<[CC1Option]>;
 def msse2 : Flag<["-"], "msse2">, Group<m_x86_Features_Group>;
 def msse3 : Flag<["-"], "msse3">, Group<m_x86_Features_Group>;
 def msse4a : Flag<["-"], "msse4a">, Group<m_x86_Features_Group>;
diff --git a/contrib/llvm/tools/clang/include/clang/Driver/SanitizerArgs.h b/contrib/llvm/tools/clang/include/clang/Driver/SanitizerArgs.h
index bfa63e7..ceba912 100644
--- a/contrib/llvm/tools/clang/include/clang/Driver/SanitizerArgs.h
+++ b/contrib/llvm/tools/clang/include/clang/Driver/SanitizerArgs.h
@@ -23,13 +23,13 @@ class ToolChain;
 class SanitizerArgs {
   SanitizerSet Sanitizers;
   SanitizerSet RecoverableSanitizers;
+  SanitizerSet TrapSanitizers;
 
   std::vector<std::string> BlacklistFiles;
   int CoverageFeatures;
   int MsanTrackOrigins;
   int AsanFieldPadding;
   bool AsanZeroBaseShadow;
-  bool UbsanTrapOnError;
   bool AsanSharedRuntime;
   bool LinkCXXRuntimes;
 
@@ -47,10 +47,12 @@ class SanitizerArgs {
   }
   bool needsUbsanRt() const;
   bool needsDfsanRt() const { return Sanitizers.has(SanitizerKind::DataFlow); }
+  bool needsSafeStackRt() const {
+    return Sanitizers.has(SanitizerKind::SafeStack);
+  }
 
   bool requiresPIE() const;
   bool needsUnwindTables() const;
-  bool needsLTO() const;
   bool linkCXXRuntimes() const { return LinkCXXRuntimes; }
   void addArgs(const llvm::opt::ArgList &Args,
                llvm::opt::ArgStringList &CmdArgs) const;
diff --git a/contrib/llvm/tools/clang/include/clang/Driver/ToolChain.h b/contrib/llvm/tools/clang/include/clang/Driver/ToolChain.h
index 560df19..aba18c9 100644
--- a/contrib/llvm/tools/clang/include/clang/Driver/ToolChain.h
+++ b/contrib/llvm/tools/clang/include/clang/Driver/ToolChain.h
@@ -10,6 +10,7 @@
 #ifndef LLVM_CLANG_DRIVER_TOOLCHAIN_H
 #define LLVM_CLANG_DRIVER_TOOLCHAIN_H
 
+#include "clang/Basic/Sanitizers.h"
 #include "clang/Driver/Action.h"
 #include "clang/Driver/Multilib.h"
 #include "clang/Driver/Types.h"
@@ -164,7 +165,10 @@ public:
   }
 
   /// Choose a tool to use to handle the action \p JA.
-  Tool *SelectTool(const JobAction &JA) const;
+  ///
+  /// This can be overridden when a particular ToolChain needs to use
+  /// a C compiler other than Clang.
+  virtual Tool *SelectTool(const JobAction &JA) const;
 
   // Helper methods
 
@@ -345,6 +349,9 @@ public:
   virtual bool
   AddFastMathRuntimeIfAvailable(const llvm::opt::ArgList &Args,
                                 llvm::opt::ArgStringList &CmdArgs) const;
+
+  /// \brief Return sanitizers which are available in this toolchain.
+  virtual SanitizerMask getSupportedSanitizers() const;
 };
 
 } // end namespace driver
diff --git a/contrib/llvm/tools/clang/include/clang/Format/Format.h b/contrib/llvm/tools/clang/include/clang/Format/Format.h
index 3fdee99..ad87a05 100644
--- a/contrib/llvm/tools/clang/include/clang/Format/Format.h
+++ b/contrib/llvm/tools/clang/include/clang/Format/Format.h
@@ -210,10 +210,10 @@ struct FormatStyle {
   enum ShortFunctionStyle {
     /// \brief Never merge functions into a single line.
     SFS_None,
-    /// \brief Only merge functions defined inside a class.
-    SFS_Inline,
     /// \brief Only merge empty functions.
     SFS_Empty,
+    /// \brief Only merge functions defined inside a class. Implies "empty".
+    SFS_Inline,
     /// \brief Merge all functions fitting on a single line.
     SFS_All,
   };
@@ -287,6 +287,11 @@ struct FormatStyle {
   bool AlwaysBreakTemplateDeclarations;
 
   /// \brief If \c true, always break before multiline string literals.
+  ///
+  /// This flag is mean to make cases where there are multiple multiline strings
+  /// in a file look more consistent. Thus, it will only take effect if wrapping
+  /// the string at that point leads to it being indented
+  /// \c ContinuationIndentWidth spaces from the start of the line.
   bool AlwaysBreakBeforeMultilineStrings;
 
   /// \brief Different ways to use tab in formatting.
diff --git a/contrib/llvm/tools/clang/include/clang/Frontend/ASTUnit.h b/contrib/llvm/tools/clang/include/clang/Frontend/ASTUnit.h
index 405774b..2d38352 100644
--- a/contrib/llvm/tools/clang/include/clang/Frontend/ASTUnit.h
+++ b/contrib/llvm/tools/clang/include/clang/Frontend/ASTUnit.h
@@ -56,6 +56,7 @@ class FileEntry;
 class FileManager;
 class HeaderSearch;
 class Preprocessor;
+class PCHContainerOperations;
 class SourceManager;
 class TargetInfo;
 class ASTFrontendAction;
@@ -422,7 +423,8 @@ private:
   explicit ASTUnit(bool MainFileIsAST);
 
   void CleanTemporaryFiles();
-  bool Parse(std::unique_ptr<llvm::MemoryBuffer> OverrideMainBuffer);
+  bool Parse(std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+             std::unique_ptr<llvm::MemoryBuffer> OverrideMainBuffer);
 
   struct ComputedPreamble {
     llvm::MemoryBuffer *Buffer;
@@ -442,6 +444,7 @@ private:
                                    unsigned MaxLines);
 
   std::unique_ptr<llvm::MemoryBuffer> getMainBufferWithPrecompiledPreamble(
+      std::shared_ptr<PCHContainerOperations> PCHContainerOps,
       const CompilerInvocation &PreambleInvocationIn, bool AllowRebuild = true,
       unsigned MaxLines = 0);
   void RealizeTopLevelDeclsFromPreamble();
@@ -715,12 +718,16 @@ public:
   ///
   /// \param Filename - The AST file to load.
   ///
+  /// \param PCHContainerOps - The PCHContainerOperations to use for loading and
+  /// creating modules.
   /// \param Diags - The diagnostics engine to use for reporting errors; its
   /// lifetime is expected to extend past that of the returned ASTUnit.
   ///
   /// \returns - The initialized ASTUnit or null if the AST failed to load.
   static std::unique_ptr<ASTUnit> LoadFromASTFile(
-      const std::string &Filename, IntrusiveRefCntPtr<DiagnosticsEngine> Diags,
+      const std::string &Filename,
+      std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+      IntrusiveRefCntPtr<DiagnosticsEngine> Diags,
       const FileSystemOptions &FileSystemOpts, bool OnlyLocalDecls = false,
       ArrayRef<RemappedFile> RemappedFiles = None,
       bool CaptureDiagnostics = false, bool AllowPCHWithCompilerErrors = false,
@@ -735,8 +742,10 @@ private:
   ///
   /// \returns \c true if a catastrophic failure occurred (which means that the
   /// \c ASTUnit itself is invalid), or \c false otherwise.
-  bool LoadFromCompilerInvocation(bool PrecompilePreamble);
-  
+  bool LoadFromCompilerInvocation(
+      std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+      bool PrecompilePreamble);
+
 public:
   
   /// \brief Create an ASTUnit from a source file, via a CompilerInvocation
@@ -745,6 +754,9 @@ public:
   /// \param CI - The compiler invocation to use; it must have exactly one input
   /// source file. The ASTUnit takes ownership of the CompilerInvocation object.
   ///
+  /// \param PCHContainerOps - The PCHContainerOperations to use for loading and
+  /// creating modules.
+  ///
   /// \param Diags - The diagnostics engine to use for reporting errors; its
   /// lifetime is expected to extend past that of the returned ASTUnit.
   ///
@@ -765,7 +777,9 @@ public:
   /// created ASTUnit was passed in \p Unit then the caller can check that.
   ///
   static ASTUnit *LoadFromCompilerInvocationAction(
-      CompilerInvocation *CI, IntrusiveRefCntPtr<DiagnosticsEngine> Diags,
+      CompilerInvocation *CI,
+      std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+      IntrusiveRefCntPtr<DiagnosticsEngine> Diags,
       ASTFrontendAction *Action = nullptr, ASTUnit *Unit = nullptr,
       bool Persistent = true, StringRef ResourceFilesPath = StringRef(),
       bool OnlyLocalDecls = false, bool CaptureDiagnostics = false,
@@ -780,15 +794,20 @@ public:
   /// \param CI - The compiler invocation to use; it must have exactly one input
   /// source file. The ASTUnit takes ownership of the CompilerInvocation object.
   ///
+  /// \param PCHContainerOps - The PCHContainerOperations to use for loading and
+  /// creating modules.
+  ///
   /// \param Diags - The diagnostics engine to use for reporting errors; its
   /// lifetime is expected to extend past that of the returned ASTUnit.
   //
   // FIXME: Move OnlyLocalDecls, UseBumpAllocator to setters on the ASTUnit, we
   // shouldn't need to specify them at construction time.
   static std::unique_ptr<ASTUnit> LoadFromCompilerInvocation(
-      CompilerInvocation *CI, IntrusiveRefCntPtr<DiagnosticsEngine> Diags,
-      bool OnlyLocalDecls = false, bool CaptureDiagnostics = false,
-      bool PrecompilePreamble = false, TranslationUnitKind TUKind = TU_Complete,
+      CompilerInvocation *CI,
+      std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+      IntrusiveRefCntPtr<DiagnosticsEngine> Diags, bool OnlyLocalDecls = false,
+      bool CaptureDiagnostics = false, bool PrecompilePreamble = false,
+      TranslationUnitKind TUKind = TU_Complete,
       bool CacheCodeCompletionResults = false,
       bool IncludeBriefCommentsInCodeCompletion = false,
       bool UserFilesAreVolatile = false);
@@ -800,6 +819,9 @@ public:
   ///
   /// \param ArgEnd - The end of the argument vector.
   ///
+  /// \param PCHContainerOps - The PCHContainerOperations to use for loading and
+  /// creating modules.
+  ///
   /// \param Diags - The diagnostics engine to use for reporting errors; its
   /// lifetime is expected to extend past that of the returned ASTUnit.
   ///
@@ -813,6 +835,7 @@ public:
   // shouldn't need to specify them at construction time.
   static ASTUnit *LoadFromCommandLine(
       const char **ArgBegin, const char **ArgEnd,
+      std::shared_ptr<PCHContainerOperations> PCHContainerOps,
       IntrusiveRefCntPtr<DiagnosticsEngine> Diags, StringRef ResourceFilesPath,
       bool OnlyLocalDecls = false, bool CaptureDiagnostics = false,
       ArrayRef<RemappedFile> RemappedFiles = None,
@@ -828,8 +851,9 @@ public:
   /// were originally used to produce this translation unit.
   ///
   /// \returns True if a failure occurred that causes the ASTUnit not to
-  /// contain any translation-unit information, false otherwise.  
-  bool Reparse(ArrayRef<RemappedFile> RemappedFiles = None);
+  /// contain any translation-unit information, false otherwise.
+  bool Reparse(std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+               ArrayRef<RemappedFile> RemappedFiles = None);
 
   /// \brief Perform code completion at the given file, line, and
   /// column within this translation unit.
@@ -852,14 +876,14 @@ public:
   /// FIXME: The Diag, LangOpts, SourceMgr, FileMgr, StoredDiagnostics, and
   /// OwnedBuffers parameters are all disgusting hacks. They will go away.
   void CodeComplete(StringRef File, unsigned Line, unsigned Column,
-                    ArrayRef<RemappedFile> RemappedFiles,
-                    bool IncludeMacros, bool IncludeCodePatterns,
-                    bool IncludeBriefComments,
+                    ArrayRef<RemappedFile> RemappedFiles, bool IncludeMacros,
+                    bool IncludeCodePatterns, bool IncludeBriefComments,
                     CodeCompleteConsumer &Consumer,
+                    std::shared_ptr<PCHContainerOperations> PCHContainerOps,
                     DiagnosticsEngine &Diag, LangOptions &LangOpts,
                     SourceManager &SourceMgr, FileManager &FileMgr,
                     SmallVectorImpl<StoredDiagnostic> &StoredDiagnostics,
-              SmallVectorImpl<const llvm::MemoryBuffer *> &OwnedBuffers);
+                    SmallVectorImpl<const llvm::MemoryBuffer *> &OwnedBuffers);
 
   /// \brief Save this translation unit to a file with the given name.
   ///
diff --git a/contrib/llvm/tools/clang/include/clang/Frontend/CodeGenOptions.def b/contrib/llvm/tools/clang/include/clang/Frontend/CodeGenOptions.def
index adf1c87..d34cf0c 100644
--- a/contrib/llvm/tools/clang/include/clang/Frontend/CodeGenOptions.def
+++ b/contrib/llvm/tools/clang/include/clang/Frontend/CodeGenOptions.def
@@ -120,8 +120,6 @@ CODEGENOPT(SanitizeCoverageTraceCmp, 1, 0) ///< Enable cmp instruction tracing
                                            ///< in sanitizer coverage.
 CODEGENOPT(SanitizeCoverage8bitCounters, 1, 0) ///< Use 8-bit frequency counters
                                                ///< in sanitizer coverage.
-CODEGENOPT(SanitizeUndefinedTrapOnError, 1, 0) ///< Set on
-                                               /// -fsanitize-undefined-trap-on-error
 CODEGENOPT(SimplifyLibCalls  , 1, 1) ///< Set when -fbuiltin is enabled.
 CODEGENOPT(SoftFloat         , 1, 0) ///< -soft-float.
 CODEGENOPT(StrictEnums       , 1, 0) ///< Optimize based on strict enum definition.
diff --git a/contrib/llvm/tools/clang/include/clang/Frontend/CodeGenOptions.h b/contrib/llvm/tools/clang/include/clang/Frontend/CodeGenOptions.h
index 0c5ce58..66597bd 100644
--- a/contrib/llvm/tools/clang/include/clang/Frontend/CodeGenOptions.h
+++ b/contrib/llvm/tools/clang/include/clang/Frontend/CodeGenOptions.h
@@ -197,6 +197,9 @@ public:
   /// continued when possible).
   SanitizerSet SanitizeRecover;
 
+  /// Set of sanitizer checks that trap rather than diagnose.
+  SanitizerSet SanitizeTrap;
+
 public:
   // Define accessors/mutators for code generation options of enumeration type.
 #define CODEGENOPT(Name, Bits, Default)
diff --git a/contrib/llvm/tools/clang/include/clang/Frontend/CompilerInstance.h b/contrib/llvm/tools/clang/include/clang/Frontend/CompilerInstance.h
index 8d0d939..9cd806c 100644
--- a/contrib/llvm/tools/clang/include/clang/Frontend/CompilerInstance.h
+++ b/contrib/llvm/tools/clang/include/clang/Frontend/CompilerInstance.h
@@ -11,6 +11,7 @@
 #define LLVM_CLANG_FRONTEND_COMPILERINSTANCE_H_
 
 #include "clang/AST/ASTConsumer.h"
+#include "clang/Frontend/PCHContainerOperations.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Frontend/CompilerInvocation.h"
@@ -109,6 +110,9 @@ class CompilerInstance : public ModuleLoader {
   /// \brief The module dependency collector for crashdumps
   std::shared_ptr<ModuleDependencyCollector> ModuleDepCollector;
 
+  /// \brief The module provider.
+  std::shared_ptr<PCHContainerOperations> ThePCHContainerOperations;
+
   /// \brief The dependency file generator.
   std::unique_ptr<DependencyFileGenerator> TheDependencyFileGenerator;
 
@@ -172,7 +176,10 @@ class CompilerInstance : public ModuleLoader {
   CompilerInstance(const CompilerInstance &) = delete;
   void operator=(const CompilerInstance &) = delete;
 public:
-  explicit CompilerInstance(bool BuildingModule = false);
+  explicit CompilerInstance(
+      std::shared_ptr<PCHContainerOperations> PCHContainerOps =
+          std::make_shared<RawPCHContainerOperations>(),
+      bool BuildingModule = false);
   ~CompilerInstance() override;
 
   /// @name High-Level Operations
@@ -492,6 +499,10 @@ public:
   void setModuleDepCollector(
       std::shared_ptr<ModuleDependencyCollector> Collector);
 
+  std::shared_ptr<PCHContainerOperations> getPCHContainerOperations() const {
+    return ThePCHContainerOperations;
+  }
+
   /// }
   /// @name Code Completion
   /// {
@@ -605,6 +616,7 @@ public:
   static IntrusiveRefCntPtr<ASTReader> createPCHExternalASTSource(
       StringRef Path, const std::string &Sysroot, bool DisablePCHValidation,
       bool AllowPCHWithCompilerErrors, Preprocessor &PP, ASTContext &Context,
+      const PCHContainerOperations &PCHContainerOps,
       void *DeserializationListener, bool OwnDeserializationListener,
       bool Preamble, bool UseGlobalModuleIndex);
 
diff --git a/contrib/llvm/tools/clang/include/clang/Frontend/FrontendActions.h b/contrib/llvm/tools/clang/include/clang/Frontend/FrontendActions.h
index b150828..f61775f 100644
--- a/contrib/llvm/tools/clang/include/clang/Frontend/FrontendActions.h
+++ b/contrib/llvm/tools/clang/include/clang/Frontend/FrontendActions.h
@@ -85,10 +85,9 @@ public:
   /// create the PCHGenerator instance returned by CreateASTConsumer.
   ///
   /// \returns true if an error occurred, false otherwise.
-  static raw_ostream *ComputeASTConsumerArguments(CompilerInstance &CI,
-                                                  StringRef InFile,
-                                                  std::string &Sysroot,
-                                                  std::string &OutputFile);
+  static raw_pwrite_stream *
+  ComputeASTConsumerArguments(CompilerInstance &CI, StringRef InFile,
+                              std::string &Sysroot, std::string &OutputFile);
 };
 
 class GenerateModuleAction : public ASTFrontendAction {
@@ -118,10 +117,10 @@ public:
   /// create the PCHGenerator instance returned by CreateASTConsumer.
   ///
   /// \returns true if an error occurred, false otherwise.
-  raw_ostream *ComputeASTConsumerArguments(CompilerInstance &CI,
-                                           StringRef InFile,
-                                           std::string &Sysroot,
-                                           std::string &OutputFile);
+  raw_pwrite_stream *ComputeASTConsumerArguments(CompilerInstance &CI,
+                                                 StringRef InFile,
+                                                 std::string &Sysroot,
+                                                 std::string &OutputFile);
 };
 
 class SyntaxOnlyAction : public ASTFrontendAction {
diff --git a/contrib/llvm/tools/clang/include/clang/Frontend/PCHContainerOperations.h b/contrib/llvm/tools/clang/include/clang/Frontend/PCHContainerOperations.h
new file mode 100644
index 0000000..949ee63
--- /dev/null
+++ b/contrib/llvm/tools/clang/include/clang/Frontend/PCHContainerOperations.h
@@ -0,0 +1,76 @@
+//===--- Frontend/PCHContainerOperations.h - PCH Containers -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_PCH_CONTAINER_OPERATIONS_H
+#define LLVM_CLANG_PCH_CONTAINER_OPERATIONS_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <memory>
+
+namespace llvm {
+class raw_pwrite_stream;
+class BitstreamReader;
+}
+
+namespace clang {
+
+class ASTConsumer;
+class CodeGenOptions;
+class DiagnosticsEngine;
+class HeaderSearchOptions;
+class LangOptions;
+class PreprocessorOptions;
+class TargetOptions;
+
+struct PCHBuffer {
+  bool IsComplete;
+  llvm::SmallVector<char, 0> Data;
+};
+
+/// \brief This abstract interface provides operations for creating
+/// and unwrapping containers for serialized ASTs (precompiled headers
+/// and clang modules).
+class PCHContainerOperations {
+public:
+  virtual ~PCHContainerOperations();
+  /// \brief Return an ASTConsumer that can be chained with a
+  /// PCHGenerator that produces a wrapper file format containing a
+  /// serialized AST bitstream.
+  virtual std::unique_ptr<ASTConsumer> CreatePCHContainerGenerator(
+      DiagnosticsEngine &Diags, const HeaderSearchOptions &HSO,
+      const PreprocessorOptions &PPO, const TargetOptions &TO,
+      const LangOptions &LO, const std::string &MainFileName,
+      const std::string &OutputFileName, llvm::raw_pwrite_stream *OS,
+      std::shared_ptr<PCHBuffer> Buffer) const = 0;
+
+  /// \brief Initialize an llvm::BitstreamReader with the serialized AST inside
+  /// the PCH container Buffer.
+  virtual void ExtractPCH(llvm::MemoryBufferRef Buffer,
+                          llvm::BitstreamReader &StreamFile) const = 0;
+};
+
+/// \brief Implements a raw pass-through PCH container.
+class RawPCHContainerOperations : public PCHContainerOperations {
+  /// \brief Return an ASTConsumer that can be chained with a
+  /// PCHGenerator that writes the module to a flat file.
+  std::unique_ptr<ASTConsumer> CreatePCHContainerGenerator(
+      DiagnosticsEngine &Diags, const HeaderSearchOptions &HSO,
+      const PreprocessorOptions &PPO, const TargetOptions &TO,
+      const LangOptions &LO, const std::string &MainFileName,
+      const std::string &OutputFileName, llvm::raw_pwrite_stream *OS,
+      std::shared_ptr<PCHBuffer> Buffer) const override;
+
+  /// \brief Initialize an llvm::BitstreamReader with Buffer.
+  void ExtractPCH(llvm::MemoryBufferRef Buffer,
+                  llvm::BitstreamReader &StreamFile) const override;
+};
+}
+
+#endif
diff --git a/contrib/llvm/tools/clang/include/clang/Frontend/Utils.h b/contrib/llvm/tools/clang/include/clang/Frontend/Utils.h
index cd0ebf6..6399653 100644
--- a/contrib/llvm/tools/clang/include/clang/Frontend/Utils.h
+++ b/contrib/llvm/tools/clang/include/clang/Frontend/Utils.h
@@ -45,6 +45,7 @@ class HeaderSearch;
 class HeaderSearchOptions;
 class IdentifierTable;
 class LangOptions;
+class PCHContainerOperations;
 class Preprocessor;
 class PreprocessorOptions;
 class PreprocessorOutputOptions;
@@ -61,8 +62,8 @@ void ApplyHeaderSearchOptions(HeaderSearch &HS,
 
 /// InitializePreprocessor - Initialize the preprocessor getting it and the
 /// environment ready to process a single file.
-void InitializePreprocessor(Preprocessor &PP,
-                            const PreprocessorOptions &PPOpts,
+void InitializePreprocessor(Preprocessor &PP, const PreprocessorOptions &PPOpts,
+                            const PCHContainerOperations &PCHContainerOps,
                             const FrontendOptions &FEOpts);
 
 /// DoPrintPreprocessedInput - Implement -E mode.
diff --git a/contrib/llvm/tools/clang/include/clang/Lex/HeaderSearchOptions.h b/contrib/llvm/tools/clang/include/clang/Lex/HeaderSearchOptions.h
index 316134c..c9c3260 100644
--- a/contrib/llvm/tools/clang/include/clang/Lex/HeaderSearchOptions.h
+++ b/contrib/llvm/tools/clang/include/clang/Lex/HeaderSearchOptions.h
@@ -98,8 +98,9 @@ public:
   /// Note: Only used for testing!
   unsigned DisableModuleHash : 1;
 
-  /// \brief Interpret module maps.  This option is implied by full modules.
-  unsigned ModuleMaps : 1;
+  /// \brief Implicit module maps.  This option is enabld by default when
+  /// modules is enabled.
+  unsigned ImplicitModuleMaps : 1;
 
   /// \brief Set the 'home directory' of a module map file to the current
   /// working directory (or the home directory of the module map file that
@@ -166,7 +167,7 @@ public:
 
 public:
   HeaderSearchOptions(StringRef _Sysroot = "/")
-    : Sysroot(_Sysroot), DisableModuleHash(0), ModuleMaps(0),
+    : Sysroot(_Sysroot), DisableModuleHash(0), ImplicitModuleMaps(0),
       ModuleMapFileHomeIsCwd(0),
       ModuleCachePruneInterval(7*24*60*60),
       ModuleCachePruneAfter(31*24*60*60),
diff --git a/contrib/llvm/tools/clang/include/clang/Lex/ModuleMap.h b/contrib/llvm/tools/clang/include/clang/Lex/ModuleMap.h
index e41efc5..0bbcfac 100644
--- a/contrib/llvm/tools/clang/include/clang/Lex/ModuleMap.h
+++ b/contrib/llvm/tools/clang/include/clang/Lex/ModuleMap.h
@@ -268,15 +268,10 @@ public:
   ///
   /// \param File The header file that is likely to be included.
   ///
-  /// \param RequestingModule Specifies the module the header is intended to be
-  /// used from.  Used to disambiguate if a header is present in multiple
-  /// modules.
-  ///
   /// \returns The module KnownHeader, which provides the module that owns the
   /// given header file.  The KnownHeader is default constructed to indicate
   /// that no module owns this header file.
-  KnownHeader findModuleForHeader(const FileEntry *File,
-                                  Module *RequestingModule = nullptr);
+  KnownHeader findModuleForHeader(const FileEntry *File);
 
   /// \brief Reports errors if a module must not include a specific file.
   ///
diff --git a/contrib/llvm/tools/clang/include/clang/Lex/Preprocessor.h b/contrib/llvm/tools/clang/include/clang/Lex/Preprocessor.h
index f6e61c0..439a280 100644
--- a/contrib/llvm/tools/clang/include/clang/Lex/Preprocessor.h
+++ b/contrib/llvm/tools/clang/include/clang/Lex/Preprocessor.h
@@ -256,6 +256,10 @@ class Preprocessor : public RefCountedBase<Preprocessor> {
   /// \#pragma clang arc_cf_code_audited begin.
   SourceLocation PragmaARCCFCodeAuditedLoc;
 
+  /// \brief The source location of the currently-active
+  /// \#pragma clang assume_nonnull begin.
+  SourceLocation PragmaAssumeNonNullLoc;
+
   /// \brief True if we hit the code-completion point.
   bool CodeCompletionReached;
 
@@ -455,8 +459,9 @@ class Preprocessor : public RefCountedBase<Preprocessor> {
 
     void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) {
       if (auto *Info = getModuleInfo(PP, II)) {
-        for (auto *Active : Info->ActiveModuleMacros)
-          Info->OverriddenMacros.push_back(Active);
+        Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
+                                      Info->ActiveModuleMacros.begin(),
+                                      Info->ActiveModuleMacros.end());
         Info->ActiveModuleMacros.clear();
         Info->IsAmbiguous = false;
       }
@@ -1249,6 +1254,20 @@ public:
     PragmaARCCFCodeAuditedLoc = Loc;
   }
 
+  /// \brief The location of the currently-active \#pragma clang
+  /// assume_nonnull begin.
+  ///
+  /// Returns an invalid location if there is no such pragma active.
+  SourceLocation getPragmaAssumeNonNullLoc() const {
+    return PragmaAssumeNonNullLoc;
+  }
+
+  /// \brief Set the location of the currently-active \#pragma clang
+  /// assume_nonnull begin.  An invalid location ends the pragma.
+  void setPragmaAssumeNonNullLoc(SourceLocation Loc) {
+    PragmaAssumeNonNullLoc = Loc;
+  }
+
   /// \brief Set the directory in which the main file should be considered
   /// to have been found, if it is not a real file.
   void setMainFileDir(const DirectoryEntry *Dir) {
diff --git a/contrib/llvm/tools/clang/include/clang/Lex/Token.h b/contrib/llvm/tools/clang/include/clang/Lex/Token.h
index e087809..7ba22b2 100644
--- a/contrib/llvm/tools/clang/include/clang/Lex/Token.h
+++ b/contrib/llvm/tools/clang/include/clang/Lex/Token.h
@@ -94,6 +94,13 @@ public:
   /// "if (Tok.is(tok::l_brace)) {...}".
   bool is(tok::TokenKind K) const { return Kind == K; }
   bool isNot(tok::TokenKind K) const { return Kind != K; }
+  bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const {
+    return is(K1) || is(K2);
+  }
+  template <typename... Ts>
+  bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, Ts... Ks) const {
+    return is(K1) || isOneOf(K2, Ks...);
+  }
 
   /// \brief Return true if this is a raw identifier (when lexing
   /// in raw mode) or a non-keyword identifier (when lexing in non-raw mode).
diff --git a/contrib/llvm/tools/clang/include/clang/Parse/Parser.h b/contrib/llvm/tools/clang/include/clang/Parse/Parser.h
index caba77b..7042a1e 100644
--- a/contrib/llvm/tools/clang/include/clang/Parse/Parser.h
+++ b/contrib/llvm/tools/clang/include/clang/Parse/Parser.h
@@ -303,6 +303,12 @@ public:
     return true;
   }
 
+  /// Retrieve the underscored keyword (__nonnull, __nullable) that corresponds
+  /// to the given nullability kind.
+  IdentifierInfo *getNullabilityKeyword(NullabilityKind nullability) {
+    return Actions.getNullabilityKeyword(nullability);
+  }
+
 private:
   //===--------------------------------------------------------------------===//
   // Low-Level token peeking and consumption methods.
@@ -620,6 +626,16 @@ private:
                                 const char *&PrevSpec, unsigned &DiagID,
                                 bool &isInvalid);
 
+  /// Returns true if the current token is the identifier 'instancetype'.
+  ///
+  /// Should only be used in Objective-C language modes.
+  bool isObjCInstancetype() {
+    assert(getLangOpts().ObjC1);
+    if (!Ident_instancetype)
+      Ident_instancetype = PP.getIdentifierInfo("instancetype");
+    return Tok.getIdentifierInfo() == Ident_instancetype;
+  }
+
   /// TryKeywordIdentFallback - For compatibility with system headers using
   /// keywords as identifiers, attempt to convert the current token to an
   /// identifier and optionally disable the keyword for the remainder of the
@@ -1282,6 +1298,7 @@ private:
   // Definitions for Objective-c context sensitive keywords recognition.
   enum ObjCTypeQual {
     objc_in=0, objc_out, objc_inout, objc_oneway, objc_bycopy, objc_byref,
+    objc_nonnull, objc_nullable, objc_null_unspecified,
     objc_NumQuals
   };
   IdentifierInfo *ObjCTypeQuals[objc_NumQuals];
@@ -1685,7 +1702,8 @@ private:
     DSC_trailing, // C++11 trailing-type-specifier in a trailing return type
     DSC_alias_declaration, // C++11 type-specifier-seq in an alias-declaration
     DSC_top_level, // top-level/namespace declaration context
-    DSC_template_type_arg // template type argument context
+    DSC_template_type_arg, // template type argument context
+    DSC_objc_method_result, // ObjC method result context, enables 'instancetype'
   };
 
   /// Is this a context in which we are parsing just a type-specifier (or
@@ -1695,6 +1713,7 @@ private:
     case DSC_normal:
     case DSC_class:
     case DSC_top_level:
+    case DSC_objc_method_result:
       return false;
 
     case DSC_template_type_arg:
@@ -2106,6 +2125,7 @@ private:
   void ParseBorlandTypeAttributes(ParsedAttributes &attrs);
   void ParseOpenCLAttributes(ParsedAttributes &attrs);
   void ParseOpenCLQualifiers(ParsedAttributes &Attrs);
+  void ParseNullabilityTypeSpecifiers(ParsedAttributes &attrs);
 
   VersionTuple ParseVersionTuple(SourceRange &Range);
   void ParseAvailabilityAttribute(IdentifierInfo &Availability,
diff --git a/contrib/llvm/tools/clang/include/clang/Rewrite/Frontend/FixItRewriter.h b/contrib/llvm/tools/clang/include/clang/Rewrite/Frontend/FixItRewriter.h
index ad828e5..3b1b31e 100644
--- a/contrib/llvm/tools/clang/include/clang/Rewrite/Frontend/FixItRewriter.h
+++ b/contrib/llvm/tools/clang/include/clang/Rewrite/Frontend/FixItRewriter.h
@@ -27,7 +27,7 @@ class FileEntry;
 
 class FixItOptions {
 public:
-  FixItOptions() : FixWhatYouCan(false),
+  FixItOptions() : InPlace(false), FixWhatYouCan(false),
                    FixOnlyWarnings(false), Silent(false) { }
 
   virtual ~FixItOptions();
@@ -41,6 +41,10 @@ public:
   ///
   virtual std::string RewriteFilename(const std::string &Filename, int &fd) = 0;
 
+  /// True if files should be updated in place. RewriteFilename is only called
+  /// if this is false.
+  bool InPlace;
+
   /// \brief Whether to abort fixing a file when not all errors could be fixed.
   bool FixWhatYouCan;
 
diff --git a/contrib/llvm/tools/clang/include/clang/Sema/AttributeList.h b/contrib/llvm/tools/clang/include/clang/Sema/AttributeList.h
index 58b1b9e..4d18633 100644
--- a/contrib/llvm/tools/clang/include/clang/Sema/AttributeList.h
+++ b/contrib/llvm/tools/clang/include/clang/Sema/AttributeList.h
@@ -81,6 +81,8 @@ public:
     AS_Declspec,
     /// __ptr16, alignas(...), etc.
     AS_Keyword,
+    /// Context-sensitive version of a keyword attribute.
+    AS_ContextSensitiveKeyword,
     /// #pragma ...
     AS_Pragma
   };
@@ -343,14 +345,20 @@ public:
 
   bool isAlignasAttribute() const {
     // FIXME: Use a better mechanism to determine this.
-    return getKind() == AT_Aligned && SyntaxUsed == AS_Keyword;
+    return getKind() == AT_Aligned && isKeywordAttribute();
   }
 
   bool isDeclspecAttribute() const { return SyntaxUsed == AS_Declspec; }
   bool isCXX11Attribute() const {
     return SyntaxUsed == AS_CXX11 || isAlignasAttribute();
   }
-  bool isKeywordAttribute() const { return SyntaxUsed == AS_Keyword; }
+  bool isKeywordAttribute() const {
+    return SyntaxUsed == AS_Keyword || SyntaxUsed == AS_ContextSensitiveKeyword;
+  }
+
+  bool isContextSensitiveKeywordAttribute() const {
+    return SyntaxUsed == AS_ContextSensitiveKeyword;
+  }
 
   bool isInvalid() const { return Invalid; }
   void setInvalid(bool b = true) const { Invalid = b; }
diff --git a/contrib/llvm/tools/clang/include/clang/Sema/DeclSpec.h b/contrib/llvm/tools/clang/include/clang/Sema/DeclSpec.h
index de39d83..2ec3286 100644
--- a/contrib/llvm/tools/clang/include/clang/Sema/DeclSpec.h
+++ b/contrib/llvm/tools/clang/include/clang/Sema/DeclSpec.h
@@ -31,6 +31,7 @@
 #include "clang/Lex/Token.h"
 #include "clang/Sema/AttributeList.h"
 #include "clang/Sema/Ownership.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -785,7 +786,8 @@ public:
     DQ_Out = 0x4,
     DQ_Bycopy = 0x8,
     DQ_Byref = 0x10,
-    DQ_Oneway = 0x20
+    DQ_Oneway = 0x20,
+    DQ_CSNullability = 0x40
   };
 
   /// PropertyAttributeKind - list of property attributes.
@@ -802,17 +804,22 @@ public:
     DQ_PR_atomic = 0x100,
     DQ_PR_weak =   0x200,
     DQ_PR_strong = 0x400,
-    DQ_PR_unsafe_unretained = 0x800
+    DQ_PR_unsafe_unretained = 0x800,
+    DQ_PR_nullability = 0x1000,
+    DQ_PR_null_resettable = 0x2000
   };
 
-
   ObjCDeclSpec()
     : objcDeclQualifier(DQ_None), PropertyAttributes(DQ_PR_noattr),
-      GetterName(nullptr), SetterName(nullptr) { }
+      Nullability(0), GetterName(nullptr), SetterName(nullptr) { }
+
   ObjCDeclQualifier getObjCDeclQualifier() const { return objcDeclQualifier; }
   void setObjCDeclQualifier(ObjCDeclQualifier DQVal) {
     objcDeclQualifier = (ObjCDeclQualifier) (objcDeclQualifier | DQVal);
   }
+  void clearObjCDeclQualifier(ObjCDeclQualifier DQVal) {
+    objcDeclQualifier = (ObjCDeclQualifier) (objcDeclQualifier & ~DQVal);
+  }
 
   ObjCPropertyAttributeKind getPropertyAttributes() const {
     return ObjCPropertyAttributeKind(PropertyAttributes);
@@ -822,6 +829,28 @@ public:
       (ObjCPropertyAttributeKind)(PropertyAttributes | PRVal);
   }
 
+  NullabilityKind getNullability() const {
+    assert(((getObjCDeclQualifier() & DQ_CSNullability) ||
+            (getPropertyAttributes() & DQ_PR_nullability)) &&
+           "Objective-C declspec doesn't have nullability");
+    return static_cast<NullabilityKind>(Nullability);
+  }
+
+  SourceLocation getNullabilityLoc() const {
+    assert(((getObjCDeclQualifier() & DQ_CSNullability) ||
+            (getPropertyAttributes() & DQ_PR_nullability)) &&
+           "Objective-C declspec doesn't have nullability");
+    return NullabilityLoc;
+  }
+
+  void setNullability(SourceLocation loc, NullabilityKind kind) {
+    assert(((getObjCDeclQualifier() & DQ_CSNullability) ||
+            (getPropertyAttributes() & DQ_PR_nullability)) &&
+           "Set the nullability declspec or property attribute first");
+    Nullability = static_cast<unsigned>(kind);
+    NullabilityLoc = loc;
+  }
+
   const IdentifierInfo *getGetterName() const { return GetterName; }
   IdentifierInfo *getGetterName() { return GetterName; }
   void setGetterName(IdentifierInfo *name) { GetterName = name; }
@@ -834,10 +863,15 @@ private:
   // FIXME: These two are unrelated and mutually exclusive. So perhaps
   // we can put them in a union to reflect their mutual exclusivity
   // (space saving is negligible).
-  ObjCDeclQualifier objcDeclQualifier : 6;
+  ObjCDeclQualifier objcDeclQualifier : 7;
 
   // NOTE: VC++ treats enums as signed, avoid using ObjCPropertyAttributeKind
-  unsigned PropertyAttributes : 12;
+  unsigned PropertyAttributes : 14;
+
+  unsigned Nullability : 2;
+
+  SourceLocation NullabilityLoc;
+
   IdentifierInfo *GetterName;    // getter name or NULL if no getter
   IdentifierInfo *SetterName;    // setter name or NULL if no setter
 };
@@ -1616,7 +1650,13 @@ private:
   bool InlineParamsUsed;
 
   /// \brief true if the declaration is preceded by \c __extension__.
-  bool Extension : 1;
+  unsigned Extension : 1;
+
+  /// Indicates whether this is an Objective-C instance variable.
+  unsigned ObjCIvar : 1;
+    
+  /// Indicates whether this is an Objective-C 'weak' property.
+  unsigned ObjCWeakProperty : 1;
 
   /// \brief If this is the second or subsequent declarator in this declaration,
   /// the location of the comma before this declarator.
@@ -1635,7 +1675,8 @@ public:
       GroupingParens(false), FunctionDefinition(FDK_Declaration), 
       Redeclaration(false),
       Attrs(ds.getAttributePool().getFactory()), AsmLabel(nullptr),
-      InlineParamsUsed(false), Extension(false) {
+      InlineParamsUsed(false), Extension(false), ObjCIvar(false),
+      ObjCWeakProperty(false) {
   }
 
   ~Declarator() {
@@ -1713,6 +1754,8 @@ public:
     Attrs.clear();
     AsmLabel = nullptr;
     InlineParamsUsed = false;
+    ObjCIvar = false;
+    ObjCWeakProperty = false;
     CommaLoc = SourceLocation();
     EllipsisLoc = SourceLocation();
   }
@@ -2121,6 +2164,12 @@ public:
   void setExtension(bool Val = true) { Extension = Val; }
   bool getExtension() const { return Extension; }
 
+  void setObjCIvar(bool Val = true) { ObjCIvar = Val; }
+  bool isObjCIvar() const { return ObjCIvar; }
+    
+  void setObjCWeakProperty(bool Val = true) { ObjCWeakProperty = Val; }
+  bool isObjCWeakProperty() const { return ObjCWeakProperty; }
+
   void setInvalidType(bool Val = true) { InvalidType = Val; }
   bool isInvalidType() const {
     return InvalidType || DS.getTypeSpecType() == DeclSpec::TST_error;
diff --git a/contrib/llvm/tools/clang/include/clang/Sema/Lookup.h b/contrib/llvm/tools/clang/include/clang/Sema/Lookup.h
index 97192b5..5bfee8b 100644
--- a/contrib/llvm/tools/clang/include/clang/Sema/Lookup.h
+++ b/contrib/llvm/tools/clang/include/clang/Sema/Lookup.h
@@ -302,14 +302,10 @@ public:
     if (!D->isInIdentifierNamespace(IDNS))
       return nullptr;
 
-    if (isVisible(getSema(), D))
+    if (isHiddenDeclarationVisible() || isVisible(getSema(), D))
       return D;
 
-    if (auto *Visible = getAcceptableDeclSlow(D))
-      return Visible;
-
-    // Even if hidden declarations are visible, prefer a visible declaration.
-    return isHiddenDeclarationVisible() ? D : nullptr;
+    return getAcceptableDeclSlow(D);
   }
 
 private:
diff --git a/contrib/llvm/tools/clang/include/clang/Sema/Sema.h b/contrib/llvm/tools/clang/include/clang/Sema/Sema.h
index 60664c5..cb75b96 100644
--- a/contrib/llvm/tools/clang/include/clang/Sema/Sema.h
+++ b/contrib/llvm/tools/clang/include/clang/Sema/Sema.h
@@ -210,6 +210,50 @@ namespace threadSafety {
 typedef std::pair<llvm::PointerUnion<const TemplateTypeParmType*, NamedDecl*>,
                   SourceLocation> UnexpandedParameterPack;
 
+/// Describes whether we've seen any nullability information for the given
+/// file.
+struct FileNullability {
+  /// The first pointer declarator (of any pointer kind) in the file that does
+  /// not have a corresponding nullability annotation.
+  SourceLocation PointerLoc;
+
+  /// Which kind of pointer declarator we saw.
+  uint8_t PointerKind;
+
+  /// Whether we saw any type nullability annotations in the given file.
+  bool SawTypeNullability = false;
+};
+
+/// A mapping from file IDs to a record of whether we've seen nullability
+/// information in that file.
+class FileNullabilityMap {
+  /// A mapping from file IDs to the nullability information for each file ID.
+  llvm::DenseMap<FileID, FileNullability> Map;
+
+  /// A single-element cache based on the file ID.
+  struct {
+    FileID File;
+    FileNullability Nullability;
+  } Cache;
+
+public:
+  FileNullability &operator[](FileID file) {
+    // Check the single-element cache.
+    if (file == Cache.File)
+      return Cache.Nullability;
+
+    // It's not in the single-element cache; flush the cache if we have one.
+    if (!Cache.File.isInvalid()) {
+      Map[Cache.File] = Cache.Nullability;
+    }
+
+    // Pull this entry into the cache.
+    Cache.File = file;
+    Cache.Nullability = Map[file];
+    return Cache.Nullability;
+  }
+};
+
 /// Sema - This implements semantic analysis and AST building for C.
 class Sema {
   Sema(const Sema &) = delete;
@@ -341,6 +385,9 @@ public:
   PragmaStack<StringLiteral *> ConstSegStack;
   PragmaStack<StringLiteral *> CodeSegStack;
 
+  /// A mapping that describes the nullability we've seen in each header file.
+  FileNullabilityMap NullabilityMap;
+
   /// Last section used with #pragma init_seg.
   StringLiteral *CurInitSeg;
   SourceLocation CurInitSegLoc;
@@ -1157,6 +1204,16 @@ public:
 
   bool CheckFunctionReturnType(QualType T, SourceLocation Loc);
 
+  unsigned deduceWeakPropertyFromType(QualType T) {
+    if ((getLangOpts().getGC() != LangOptions::NonGC &&
+         T.isObjCGCWeak()) ||
+        (getLangOpts().ObjCAutoRefCount &&
+         T.getObjCLifetime() == Qualifiers::OCL_Weak))
+        return ObjCDeclSpec::DQ_PR_weak;
+    return 0;
+  }
+
+
   /// \brief Build a function type.
   ///
   /// This routine checks the function type according to C++ rules and
@@ -1325,6 +1382,11 @@ public:
     return hasVisibleDefinition(const_cast<NamedDecl*>(D), &Hidden);
   }
 
+  /// Determine if the template parameter \p D has a visible default argument.
+  bool
+  hasVisibleDefaultArgument(const NamedDecl *D,
+                            llvm::SmallVectorImpl<Module *> *Modules = nullptr);
+
   bool RequireCompleteType(SourceLocation Loc, QualType T,
                            TypeDiagnoser &Diagnoser);
   bool RequireCompleteType(SourceLocation Loc, QualType T,
@@ -1727,6 +1789,22 @@ public:
   void createImplicitModuleImportForErrorRecovery(SourceLocation Loc,
                                                   Module *Mod);
 
+  /// Kinds of missing import. Note, the values of these enumerators correspond
+  /// to %select values in diagnostics.
+  enum class MissingImportKind {
+    Declaration,
+    Definition,
+    DefaultArgument
+  };
+
+  /// \brief Diagnose that the specified declaration needs to be visible but
+  /// isn't, and suggest a module import that would resolve the problem.
+  void diagnoseMissingImport(SourceLocation Loc, NamedDecl *Decl,
+                             bool NeedDefinition, bool Recover = true);
+  void diagnoseMissingImport(SourceLocation Loc, NamedDecl *Decl,
+                             SourceLocation DeclLoc, ArrayRef<Module *> Modules,
+                             MissingImportKind MIK, bool Recover);
+
   /// \brief Retrieve a suitable printing policy.
   PrintingPolicy getPrintingPolicy() const {
     return getPrintingPolicy(Context, PP);
@@ -1847,10 +1925,10 @@ public:
   /// struct, or union).
   void ActOnTagStartDefinition(Scope *S, Decl *TagDecl);
 
+  typedef void *SkippedDefinitionContext;
+
   /// \brief Invoked when we enter a tag definition that we're skipping.
-  void ActOnTagStartSkippedDefinition(Scope *S, Decl *TD) {
-    PushDeclContext(S, cast<DeclContext>(TD));
-  }
+  SkippedDefinitionContext ActOnTagStartSkippedDefinition(Scope *S, Decl *TD);
 
   Decl *ActOnObjCContainerStartDefinition(Decl *IDecl);
 
@@ -1867,9 +1945,7 @@ public:
   void ActOnTagFinishDefinition(Scope *S, Decl *TagDecl,
                                 SourceLocation RBraceLoc);
 
-  void ActOnTagFinishSkippedDefinition() {
-    PopDeclContext();
-  }
+  void ActOnTagFinishSkippedDefinition(SkippedDefinitionContext Context);
 
   void ActOnObjCContainerFinishDefinition();
 
@@ -2819,6 +2895,7 @@ public:
                                       unsigned ArgNum, StringRef &Str,
                                       SourceLocation *ArgLocation = nullptr);
   bool checkSectionName(SourceLocation LiteralLoc, StringRef Str);
+  void checkTargetAttr(SourceLocation LiteralLoc, StringRef Str);
   bool checkMSInheritanceAttrOnDefinition(
       CXXRecordDecl *RD, SourceRange Range, bool BestCase,
       MSInheritanceAttr::Spelling SemanticSpelling);
@@ -2839,6 +2916,26 @@ public:
   /// Valid types should not have multiple attributes with different CCs.
   const AttributedType *getCallingConvAttributedType(QualType T) const;
 
+  /// Check whether a nullability type specifier can be added to the given
+  /// type.
+  ///
+  /// \param type The type to which the nullability specifier will be
+  /// added. On success, this type will be updated appropriately.
+  ///
+  /// \param nullability The nullability specifier to add.
+  ///
+  /// \param nullabilityLoc The location of the nullability specifier.
+  ///
+  /// \param isContextSensitive Whether this nullability specifier was
+  /// written as a context-sensitive keyword (in an Objective-C
+  /// method) or an Objective-C property attribute, rather than as an
+  /// underscored type specifier.
+  ///
+  /// \returns true if nullability cannot be applied, false otherwise.
+  bool checkNullabilityTypeSpecifier(QualType &type, NullabilityKind nullability,
+                                     SourceLocation nullabilityLoc,
+                                     bool isContextSensitive);
+
   /// \brief Stmt attributes - this routine is the top level dispatcher.
   StmtResult ProcessStmtAttributes(Stmt *Stmt, AttributeList *Attrs,
                                    SourceRange Range);
@@ -2878,6 +2975,9 @@ public:
                                        ObjCContainerDecl *CDecl,
                                        bool SynthesizeProperties);
 
+  /// Diagnose any null-resettable synthesized setters.
+  void diagnoseNullResettableSynthesizedSetters(ObjCImplDecl *impDecl);
+
   /// DefaultSynthesizeProperties - This routine default synthesizes all
   /// properties which must be synthesized in the class's \@implementation.
   void DefaultSynthesizeProperties (Scope *S, ObjCImplDecl* IMPDecl,
@@ -2914,7 +3014,8 @@ public:
                       const unsigned Attributes,
                       const unsigned AttributesAsWritten,
                       bool *isOverridingProperty,
-                      TypeSourceInfo *T,
+                      QualType T,
+                      TypeSourceInfo *TSI,
                       tok::ObjCKeywordKind MethodImplKind);
 
   /// Called by ActOnProperty and HandlePropertyInClassExtension to
@@ -2930,7 +3031,8 @@ public:
                                        const bool isReadWrite,
                                        const unsigned Attributes,
                                        const unsigned AttributesAsWritten,
-                                       TypeSourceInfo *T,
+                                       QualType T,
+                                       TypeSourceInfo *TSI,
                                        tok::ObjCKeywordKind MethodImplKind,
                                        DeclContext *lexicalDC = nullptr);
 
@@ -7629,6 +7731,9 @@ public:
   /// \brief Called on well-formed '\#pragma omp taskwait'.
   StmtResult ActOnOpenMPTaskwaitDirective(SourceLocation StartLoc,
                                           SourceLocation EndLoc);
+  /// \brief Called on well-formed '\#pragma omp taskgroup'.
+  StmtResult ActOnOpenMPTaskgroupDirective(Stmt *AStmt, SourceLocation StartLoc,
+                                           SourceLocation EndLoc);
   /// \brief Called on well-formed '\#pragma omp flush'.
   StmtResult ActOnOpenMPFlushDirective(ArrayRef<OMPClause *> Clauses,
                                        SourceLocation StartLoc,
@@ -8557,9 +8662,10 @@ private:
                             const FunctionProtoType *Proto,
                             SourceLocation Loc);
 
-  void checkCall(NamedDecl *FDecl, ArrayRef<const Expr *> Args,
-                 unsigned NumParams, bool IsMemberFunction, SourceLocation Loc,
-                 SourceRange Range, VariadicCallType CallType);
+  void checkCall(NamedDecl *FDecl, const FunctionProtoType *Proto,
+                 ArrayRef<const Expr *> Args, bool IsMemberFunction, 
+                 SourceLocation Loc, SourceRange Range, 
+                 VariadicCallType CallType);
 
   bool CheckObjCString(Expr *Arg);
 
@@ -8602,7 +8708,9 @@ private:
                               llvm::APSInt &Result);
   bool SemaBuiltinConstantArgRange(CallExpr *TheCall, int ArgNum,
                                    int Low, int High);
-
+  bool SemaBuiltinARMSpecialReg(unsigned BuiltinID, CallExpr *TheCall,
+                                int ArgNum, unsigned ExpectedFieldNum,
+                                bool AllowName);
 public:
   enum FormatStringType {
     FST_Scanf,
@@ -8731,6 +8839,13 @@ private:
   mutable IdentifierInfo *Ident_super;
   mutable IdentifierInfo *Ident___float128;
 
+  /// Nullability type specifiers.
+  IdentifierInfo *Ident___nonnull = nullptr;
+  IdentifierInfo *Ident___nullable = nullptr;
+  IdentifierInfo *Ident___null_unspecified = nullptr;
+
+  IdentifierInfo *Ident_NSError = nullptr;
+
 protected:
   friend class Parser;
   friend class InitializationSequence;
@@ -8739,6 +8854,15 @@ protected:
   friend class ASTWriter;
 
 public:
+  /// Retrieve the keyword associated
+  IdentifierInfo *getNullabilityKeyword(NullabilityKind nullability);
+
+  /// The struct behind the CFErrorRef pointer.
+  RecordDecl *CFError = nullptr;
+
+  /// Retrieve the identifier "NSError".
+  IdentifierInfo *getNSErrorIdent();
+
   /// \brief Retrieve the parser's current scope.
   ///
   /// This routine must only be used when it is certain that semantic analysis
diff --git a/contrib/llvm/tools/clang/include/clang/Serialization/ASTBitCodes.h b/contrib/llvm/tools/clang/include/clang/Serialization/ASTBitCodes.h
index e0f01c8..83185a8 100644
--- a/contrib/llvm/tools/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/contrib/llvm/tools/clang/include/clang/Serialization/ASTBitCodes.h
@@ -1211,8 +1211,12 @@ namespace clang {
       EXPR_INIT_LIST,
       /// \brief A DesignatedInitExpr record.
       EXPR_DESIGNATED_INIT,
+      /// \brief A DesignatedInitUpdateExpr record.
+      EXPR_DESIGNATED_INIT_UPDATE,
       /// \brief An ImplicitValueInitExpr record.
       EXPR_IMPLICIT_VALUE_INIT,
+      /// \brief An NoInitExpr record.
+      EXPR_NO_INIT,
       /// \brief A VAArgExpr record.
       EXPR_VA_ARG,
       /// \brief An AddrLabelExpr record.
@@ -1392,6 +1396,7 @@ namespace clang {
       STMT_OMP_ATOMIC_DIRECTIVE,
       STMT_OMP_TARGET_DIRECTIVE,
       STMT_OMP_TEAMS_DIRECTIVE,
+      STMT_OMP_TASKGROUP_DIRECTIVE,
 
       // ARC
       EXPR_OBJC_BRIDGED_CAST,     // ObjCBridgedCastExpr
diff --git a/contrib/llvm/tools/clang/include/clang/Serialization/ASTReader.h b/contrib/llvm/tools/clang/include/clang/Serialization/ASTReader.h
index c7cc1be..429f00f 100644
--- a/contrib/llvm/tools/clang/include/clang/Serialization/ASTReader.h
+++ b/contrib/llvm/tools/clang/include/clang/Serialization/ASTReader.h
@@ -362,6 +362,7 @@ private:
 
   SourceManager &SourceMgr;
   FileManager &FileMgr;
+  const PCHContainerOperations &PCHContainerOps;
   DiagnosticsEngine &Diags;
 
   /// \brief The semantic analysis object that will be processing the
@@ -1237,6 +1238,9 @@ public:
   /// \param Context the AST context that this precompiled header will be
   /// loaded into.
   ///
+  /// \param PCHContainerOps the PCHContainerOperations to use for loading and
+  /// creating modules.
+  ///
   /// \param isysroot If non-NULL, the system include path specified by the
   /// user. This is only used with relocatable PCH files. If non-NULL,
   /// a relocatable PCH file will use the default path "/".
@@ -1258,12 +1262,12 @@ public:
   ///
   /// \param UseGlobalIndex If true, the AST reader will try to load and use
   /// the global module index.
-  ASTReader(Preprocessor &PP, ASTContext &Context, StringRef isysroot = "",
-            bool DisableValidation = false,
+  ASTReader(Preprocessor &PP, ASTContext &Context,
+            const PCHContainerOperations &PCHContainerOps,
+            StringRef isysroot = "", bool DisableValidation = false,
             bool AllowASTWithCompilerErrors = false,
             bool AllowConfigurationMismatch = false,
-            bool ValidateSystemInputs = false,
-            bool UseGlobalIndex = true);
+            bool ValidateSystemInputs = false, bool UseGlobalIndex = true);
 
   ~ASTReader() override;
 
@@ -1425,21 +1429,23 @@ public:
 
   /// \brief Retrieve the name of the original source file name directly from
   /// the AST file, without actually loading the AST file.
-  static std::string getOriginalSourceFile(const std::string &ASTFileName,
-                                           FileManager &FileMgr,
-                                           DiagnosticsEngine &Diags);
+  static std::string
+  getOriginalSourceFile(const std::string &ASTFileName, FileManager &FileMgr,
+                        const PCHContainerOperations &PCHContainerOps,
+                        DiagnosticsEngine &Diags);
 
   /// \brief Read the control block for the named AST file.
   ///
   /// \returns true if an error occurred, false otherwise.
-  static bool readASTFileControlBlock(StringRef Filename,
-                                      FileManager &FileMgr,
-                                      ASTReaderListener &Listener);
+  static bool
+  readASTFileControlBlock(StringRef Filename, FileManager &FileMgr,
+                          const PCHContainerOperations &PCHContainerOps,
+                          ASTReaderListener &Listener);
 
   /// \brief Determine whether the given AST file is acceptable to load into a
   /// translation unit with the given language and target options.
-  static bool isAcceptableASTFile(StringRef Filename,
-                                  FileManager &FileMgr,
+  static bool isAcceptableASTFile(StringRef Filename, FileManager &FileMgr,
+                                  const PCHContainerOperations &PCHContainerOps,
                                   const LangOptions &LangOpts,
                                   const TargetOptions &TargetOpts,
                                   const PreprocessorOptions &PPOpts,
diff --git a/contrib/llvm/tools/clang/include/clang/Serialization/ASTWriter.h b/contrib/llvm/tools/clang/include/clang/Serialization/ASTWriter.h
index 297ee22..decd07a 100644
--- a/contrib/llvm/tools/clang/include/clang/Serialization/ASTWriter.h
+++ b/contrib/llvm/tools/clang/include/clang/Serialization/ASTWriter.h
@@ -17,6 +17,7 @@
 #include "clang/AST/ASTMutationListener.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclarationName.h"
+#include "clang/Frontend/PCHContainerOperations.h"
 #include "clang/AST/TemplateBase.h"
 #include "clang/Sema/SemaConsumer.h"
 #include "clang/Serialization/ASTBitCodes.h"
@@ -868,30 +869,28 @@ class PCHGenerator : public SemaConsumer {
   std::string OutputFile;
   clang::Module *Module;
   std::string isysroot;
-  raw_ostream *Out;
   Sema *SemaPtr;
-  SmallVector<char, 128> Buffer;
+  std::shared_ptr<PCHBuffer> Buffer;
   llvm::BitstreamWriter Stream;
   ASTWriter Writer;
   bool AllowASTWithErrors;
-  bool HasEmittedPCH;
 
 protected:
   ASTWriter &getWriter() { return Writer; }
   const ASTWriter &getWriter() const { return Writer; }
+  SmallVectorImpl<char> &getPCH() const { return Buffer->Data; }
 
 public:
   PCHGenerator(const Preprocessor &PP, StringRef OutputFile,
-               clang::Module *Module,
-               StringRef isysroot, raw_ostream *Out,
+               clang::Module *Module, StringRef isysroot,
+               std::shared_ptr<PCHBuffer> Buffer,
                bool AllowASTWithErrors = false);
   ~PCHGenerator() override;
   void InitializeSema(Sema &S) override { SemaPtr = &S; }
   void HandleTranslationUnit(ASTContext &Ctx) override;
   ASTMutationListener *GetASTMutationListener() override;
   ASTDeserializationListener *GetASTDeserializationListener() override;
-
-  bool hasEmittedPCH() const { return HasEmittedPCH; }
+  bool hasEmittedPCH() const { return Buffer->IsComplete; }
 };
 
 } // end namespace clang
diff --git a/contrib/llvm/tools/clang/include/clang/Serialization/GlobalModuleIndex.h b/contrib/llvm/tools/clang/include/clang/Serialization/GlobalModuleIndex.h
index 640c7bb..7e20510 100644
--- a/contrib/llvm/tools/clang/include/clang/Serialization/GlobalModuleIndex.h
+++ b/contrib/llvm/tools/clang/include/clang/Serialization/GlobalModuleIndex.h
@@ -35,6 +35,7 @@ class DirectoryEntry;
 class FileEntry;
 class FileManager;
 class IdentifierIterator;
+class PCHContainerOperations;
 
 namespace serialization {
   class ModuleFile;
@@ -192,10 +193,13 @@ public:
   /// \brief Write a global index into the given
   ///
   /// \param FileMgr The file manager to use to load module files.
-  ///
+  /// \param PCHContainerOps - The PCHContainerOperations to use for loading and
+  /// creating modules.
   /// \param Path The path to the directory containing module files, into
   /// which the global index will be written.
-  static ErrorCode writeIndex(FileManager &FileMgr, StringRef Path);
+  static ErrorCode writeIndex(FileManager &FileMgr,
+                              const PCHContainerOperations &PCHContainerOps,
+                              StringRef Path);
 };
 
 }
diff --git a/contrib/llvm/tools/clang/include/clang/Serialization/ModuleManager.h b/contrib/llvm/tools/clang/include/clang/Serialization/ModuleManager.h
index 3de86fe..ea4b57f 100644
--- a/contrib/llvm/tools/clang/include/clang/Serialization/ModuleManager.h
+++ b/contrib/llvm/tools/clang/include/clang/Serialization/ModuleManager.h
@@ -24,6 +24,7 @@ namespace clang {
 
 class GlobalModuleIndex;
 class ModuleMap;
+class PCHContainerOperations;
 
 namespace serialization {
 
@@ -49,7 +50,10 @@ class ModuleManager {
   /// \brief FileManager that handles translating between filenames and
   /// FileEntry *.
   FileManager &FileMgr;
-  
+
+  /// \brief Knows how to unwrap module containers.
+  const PCHContainerOperations &PCHContainerOps;
+
   /// \brief A lookup of in-memory (virtual file) buffers
   llvm::DenseMap<const FileEntry *, std::unique_ptr<llvm::MemoryBuffer>>
       InMemoryBuffers;
@@ -112,8 +116,9 @@ public:
   typedef SmallVectorImpl<ModuleFile*>::const_iterator ModuleConstIterator;
   typedef SmallVectorImpl<ModuleFile*>::reverse_iterator ModuleReverseIterator;
   typedef std::pair<uint32_t, StringRef> ModuleOffset;
-  
-  explicit ModuleManager(FileManager &FileMgr);
+
+  explicit ModuleManager(FileManager &FileMgr,
+                         const PCHContainerOperations &PCHContainerOps);
   ~ModuleManager();
   
   /// \brief Forward iterator to traverse all loaded modules.  This is reverse
diff --git a/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Checkers/ObjCRetainCount.h b/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Checkers/ObjCRetainCount.h
index ab92a24..5850656 100644
--- a/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Checkers/ObjCRetainCount.h
+++ b/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Checkers/ObjCRetainCount.h
@@ -69,6 +69,14 @@ enum ArgEffect {
   /// transfers the object to the Garbage Collector under GC.
   MakeCollectable,
 
+  /// The argument is a pointer to a retain-counted object; on exit, the new
+  /// value of the pointer is a +0 value or NULL.
+  UnretainedOutParameter,
+
+  /// The argument is a pointer to a retain-counted object; on exit, the new
+  /// value of the pointer is a +1 value or NULL.
+  RetainedOutParameter,
+
   /// The argument is treated as potentially escaping, meaning that
   /// even when its reference count hits 0 it should be treated as still
   /// possibly being alive as someone else *may* be holding onto the object.
diff --git a/contrib/llvm/tools/clang/include/clang/Tooling/Refactoring.h b/contrib/llvm/tools/clang/include/clang/Tooling/Refactoring.h
index e3e7f83..944fd41 100644
--- a/contrib/llvm/tools/clang/include/clang/Tooling/Refactoring.h
+++ b/contrib/llvm/tools/clang/include/clang/Tooling/Refactoring.h
@@ -38,7 +38,9 @@ class RefactoringTool : public ClangTool {
 public:
   /// \see ClangTool::ClangTool.
   RefactoringTool(const CompilationDatabase &Compilations,
-                  ArrayRef<std::string> SourcePaths);
+                  ArrayRef<std::string> SourcePaths,
+                  std::shared_ptr<PCHContainerOperations> PCHContainerOps =
+                      std::make_shared<RawPCHContainerOperations>());
 
   /// \brief Returns the set of replacements to which replacements should
   /// be added during the run of the tool.
diff --git a/contrib/llvm/tools/clang/include/clang/Tooling/Tooling.h b/contrib/llvm/tools/clang/include/clang/Tooling/Tooling.h
index ca187b1..fb887e1 100644
--- a/contrib/llvm/tools/clang/include/clang/Tooling/Tooling.h
+++ b/contrib/llvm/tools/clang/include/clang/Tooling/Tooling.h
@@ -31,11 +31,13 @@
 #define LLVM_CLANG_TOOLING_TOOLING_H
 
 #include "clang/AST/ASTConsumer.h"
+#include "clang/Frontend/PCHContainerOperations.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/LLVM.h"
 #include "clang/Driver/Util.h"
 #include "clang/Frontend/FrontendAction.h"
+#include "clang/Lex/ModuleLoader.h"
 #include "clang/Tooling/ArgumentsAdjusters.h"
 #include "clang/Tooling/CompilationDatabase.h"
 #include "llvm/ADT/StringMap.h"
@@ -66,9 +68,10 @@ public:
   virtual ~ToolAction();
 
   /// \brief Perform an action for an invocation.
-  virtual bool runInvocation(clang::CompilerInvocation *Invocation,
-                             FileManager *Files,
-                             DiagnosticConsumer *DiagConsumer) = 0;
+  virtual bool
+  runInvocation(clang::CompilerInvocation *Invocation, FileManager *Files,
+                std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+                DiagnosticConsumer *DiagConsumer) = 0;
 };
 
 /// \brief Interface to generate clang::FrontendActions.
@@ -83,6 +86,7 @@ public:
 
   /// \brief Invokes the compiler with a FrontendAction created by create().
   bool runInvocation(clang::CompilerInvocation *Invocation, FileManager *Files,
+                     std::shared_ptr<PCHContainerOperations> PCHContainerOps,
                      DiagnosticConsumer *DiagConsumer) override;
 
   /// \brief Returns a new clang::FrontendAction.
@@ -139,10 +143,14 @@ inline std::unique_ptr<FrontendActionFactory> newFrontendActionFactory(
 /// \param ToolAction The action to run over the code.
 /// \param Code C++ code.
 /// \param FileName The file name which 'Code' will be mapped as.
+/// \param PCHContainerOps  The PCHContainerOperations for loading and creating
+///                         clang modules.
 ///
 /// \return - True if 'ToolAction' was successfully executed.
 bool runToolOnCode(clang::FrontendAction *ToolAction, const Twine &Code,
-                   const Twine &FileName = "input.cc");
+                   const Twine &FileName = "input.cc",
+                   std::shared_ptr<PCHContainerOperations> PCHContainerOps =
+                       std::make_shared<RawPCHContainerOperations>());
 
 /// The first part of the pair is the filename, the second part the
 /// file-content.
@@ -155,37 +163,48 @@ typedef std::vector<std::pair<std::string, std::string>> FileContentMappings;
 /// \param Code C++ code.
 /// \param Args Additional flags to pass on.
 /// \param FileName The file name which 'Code' will be mapped as.
+/// \param PCHContainerOps   The PCHContainerOperations for loading and creating
+///                          clang modules.
 ///
 /// \return - True if 'ToolAction' was successfully executed.
 bool runToolOnCodeWithArgs(
     clang::FrontendAction *ToolAction, const Twine &Code,
     const std::vector<std::string> &Args, const Twine &FileName = "input.cc",
+    std::shared_ptr<PCHContainerOperations> PCHContainerOps =
+        std::make_shared<RawPCHContainerOperations>(),
     const FileContentMappings &VirtualMappedFiles = FileContentMappings());
 
 /// \brief Builds an AST for 'Code'.
 ///
 /// \param Code C++ code.
 /// \param FileName The file name which 'Code' will be mapped as.
+/// \param PCHContainerOps The PCHContainerOperations for loading and creating
+/// clang modules.
 ///
 /// \return The resulting AST or null if an error occurred.
-std::unique_ptr<ASTUnit> buildASTFromCode(const Twine &Code,
-                                          const Twine &FileName = "input.cc");
+std::unique_ptr<ASTUnit>
+buildASTFromCode(const Twine &Code, const Twine &FileName = "input.cc",
+                 std::shared_ptr<PCHContainerOperations> PCHContainerOps =
+                     std::make_shared<RawPCHContainerOperations>());
 
 /// \brief Builds an AST for 'Code' with additional flags.
 ///
 /// \param Code C++ code.
 /// \param Args Additional flags to pass on.
 /// \param FileName The file name which 'Code' will be mapped as.
+/// \param PCHContainerOps The PCHContainerOperations for loading and creating
+/// clang modules.
 ///
 /// \return The resulting AST or null if an error occurred.
-std::unique_ptr<ASTUnit>
-buildASTFromCodeWithArgs(const Twine &Code,
-                         const std::vector<std::string> &Args,
-                         const Twine &FileName = "input.cc");
+std::unique_ptr<ASTUnit> buildASTFromCodeWithArgs(
+    const Twine &Code, const std::vector<std::string> &Args,
+    const Twine &FileName = "input.cc",
+    std::shared_ptr<PCHContainerOperations> PCHContainerOps =
+        std::make_shared<RawPCHContainerOperations>());
 
 /// \brief Utility to run a FrontendAction in a single clang invocation.
 class ToolInvocation {
- public:
+public:
   /// \brief Create a tool invocation.
   ///
   /// \param CommandLine The command line arguments to clang. Note that clang
@@ -195,16 +214,23 @@ class ToolInvocation {
   /// \param FAction The action to be executed. Class takes ownership.
   /// \param Files The FileManager used for the execution. Class does not take
   /// ownership.
+  /// \param PCHContainerOps The PCHContainerOperations for loading and creating
+  /// clang modules.
   ToolInvocation(std::vector<std::string> CommandLine, FrontendAction *FAction,
-                 FileManager *Files);
+                 FileManager *Files,
+                 std::shared_ptr<PCHContainerOperations> PCHContainerOps =
+                     std::make_shared<RawPCHContainerOperations>());
 
   /// \brief Create a tool invocation.
   ///
   /// \param CommandLine The command line arguments to clang.
   /// \param Action The action to be executed.
   /// \param Files The FileManager used for the execution.
+  /// \param PCHContainerOps The PCHContainerOperations for loading and creating
+  /// clang modules.
   ToolInvocation(std::vector<std::string> CommandLine, ToolAction *Action,
-                 FileManager *Files);
+                 FileManager *Files,
+                 std::shared_ptr<PCHContainerOperations> PCHContainerOps);
 
   ~ToolInvocation();
 
@@ -229,12 +255,14 @@ class ToolInvocation {
 
   bool runInvocation(const char *BinaryName,
                      clang::driver::Compilation *Compilation,
-                     clang::CompilerInvocation *Invocation);
+                     clang::CompilerInvocation *Invocation,
+                     std::shared_ptr<PCHContainerOperations> PCHContainerOps);
 
   std::vector<std::string> CommandLine;
   ToolAction *Action;
   bool OwnsAction;
   FileManager *Files;
+  std::shared_ptr<PCHContainerOperations> PCHContainerOps;
   // Maps <file name> -> <file content>.
   llvm::StringMap<StringRef> MappedFileContents;
   DiagnosticConsumer *DiagConsumer;
@@ -255,8 +283,12 @@ class ClangTool {
   ///        command lines for the given source paths.
   /// \param SourcePaths The source files to run over. If a source files is
   ///        not found in Compilations, it is skipped.
+  /// \param PCHContainerOps The PCHContainerOperations for loading and creating
+  /// clang modules.
   ClangTool(const CompilationDatabase &Compilations,
-            ArrayRef<std::string> SourcePaths);
+            ArrayRef<std::string> SourcePaths,
+            std::shared_ptr<PCHContainerOperations> PCHContainerOps =
+                std::make_shared<RawPCHContainerOperations>());
 
   ~ClangTool();
 
@@ -297,6 +329,7 @@ class ClangTool {
  private:
   const CompilationDatabase &Compilations;
   std::vector<std::string> SourcePaths;
+  std::shared_ptr<PCHContainerOperations> PCHContainerOps;
 
   llvm::IntrusiveRefCntPtr<FileManager> Files;
   // Contains a list of pairs (<file name>, <file content>).
diff --git a/contrib/llvm/tools/clang/lib/ARCMigrate/ARCMT.cpp b/contrib/llvm/tools/clang/lib/ARCMigrate/ARCMT.cpp
index f266eaf..f33ad21 100644
--- a/contrib/llvm/tools/clang/lib/ARCMigrate/ARCMT.cpp
+++ b/contrib/llvm/tools/clang/lib/ARCMigrate/ARCMT.cpp
@@ -166,7 +166,8 @@ static bool HasARCRuntime(CompilerInvocation &origCI) {
 }
 
 static CompilerInvocation *
-createInvocationForMigration(CompilerInvocation &origCI) {
+createInvocationForMigration(CompilerInvocation &origCI,
+                             const PCHContainerOperations &PCHContainerOps) {
   std::unique_ptr<CompilerInvocation> CInvok;
   CInvok.reset(new CompilerInvocation(origCI));
   PreprocessorOptions &PPOpts = CInvok->getPreprocessorOpts();
@@ -178,9 +179,8 @@ createInvocationForMigration(CompilerInvocation &origCI) {
     IntrusiveRefCntPtr<DiagnosticsEngine> Diags(
         new DiagnosticsEngine(DiagID, &origCI.getDiagnosticOpts(),
                               new IgnoringDiagConsumer()));
-    std::string OriginalFile =
-        ASTReader::getOriginalSourceFile(PPOpts.ImplicitPCHInclude,
-                                         FileMgr, *Diags);
+    std::string OriginalFile = ASTReader::getOriginalSourceFile(
+        PPOpts.ImplicitPCHInclude, FileMgr, PCHContainerOps, *Diags);
     if (!OriginalFile.empty())
       PPOpts.Includes.insert(PPOpts.Includes.begin(), OriginalFile);
     PPOpts.ImplicitPCHInclude.clear();
@@ -230,11 +230,11 @@ static void emitPremigrationErrors(const CapturedDiagList &arcDiags,
 // checkForManualIssues.
 //===----------------------------------------------------------------------===//
 
-bool arcmt::checkForManualIssues(CompilerInvocation &origCI,
-                                 const FrontendInputFile &Input,
-                                 DiagnosticConsumer *DiagClient,
-                                 bool emitPremigrationARCErrors,
-                                 StringRef plistOut) {
+bool arcmt::checkForManualIssues(
+    CompilerInvocation &origCI, const FrontendInputFile &Input,
+    std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+    DiagnosticConsumer *DiagClient, bool emitPremigrationARCErrors,
+    StringRef plistOut) {
   if (!origCI.getLangOpts()->ObjC1)
     return false;
 
@@ -247,7 +247,7 @@ bool arcmt::checkForManualIssues(CompilerInvocation &origCI,
   assert(!transforms.empty());
 
   std::unique_ptr<CompilerInvocation> CInvok;
-  CInvok.reset(createInvocationForMigration(origCI));
+  CInvok.reset(createInvocationForMigration(origCI, *PCHContainerOps));
   CInvok->getFrontendOpts().Inputs.clear();
   CInvok->getFrontendOpts().Inputs.push_back(Input);
 
@@ -263,8 +263,8 @@ bool arcmt::checkForManualIssues(CompilerInvocation &origCI,
   CaptureDiagnosticConsumer errRec(*Diags, *DiagClient, capturedDiags);
   Diags->setClient(&errRec, /*ShouldOwnClient=*/false);
 
-  std::unique_ptr<ASTUnit> Unit(
-      ASTUnit::LoadFromCompilerInvocationAction(CInvok.release(), Diags));
+  std::unique_ptr<ASTUnit> Unit(ASTUnit::LoadFromCompilerInvocationAction(
+      CInvok.release(), PCHContainerOps, Diags));
   if (!Unit) {
     errRec.FinishCapture();
     return true;
@@ -330,12 +330,11 @@ bool arcmt::checkForManualIssues(CompilerInvocation &origCI,
 // applyTransformations.
 //===----------------------------------------------------------------------===//
 
-static bool applyTransforms(CompilerInvocation &origCI,
-                            const FrontendInputFile &Input,
-                            DiagnosticConsumer *DiagClient,
-                            StringRef outputDir,
-                            bool emitPremigrationARCErrors,
-                            StringRef plistOut) {
+static bool
+applyTransforms(CompilerInvocation &origCI, const FrontendInputFile &Input,
+                std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+                DiagnosticConsumer *DiagClient, StringRef outputDir,
+                bool emitPremigrationARCErrors, StringRef plistOut) {
   if (!origCI.getLangOpts()->ObjC1)
     return false;
 
@@ -343,15 +342,16 @@ static bool applyTransforms(CompilerInvocation &origCI,
 
   // Make sure checking is successful first.
   CompilerInvocation CInvokForCheck(origCI);
-  if (arcmt::checkForManualIssues(CInvokForCheck, Input, DiagClient,
-                                  emitPremigrationARCErrors, plistOut))
+  if (arcmt::checkForManualIssues(CInvokForCheck, Input, PCHContainerOps,
+                                  DiagClient, emitPremigrationARCErrors,
+                                  plistOut))
     return true;
 
   CompilerInvocation CInvok(origCI);
   CInvok.getFrontendOpts().Inputs.clear();
   CInvok.getFrontendOpts().Inputs.push_back(Input);
-  
-  MigrationProcess migration(CInvok, DiagClient, outputDir);
+
+  MigrationProcess migration(CInvok, PCHContainerOps, DiagClient, outputDir);
   bool NoFinalizeRemoval = origCI.getMigratorOpts().NoFinalizeRemoval;
 
   std::vector<TransformFn> transforms = arcmt::getAllTransformations(OrigGCMode,
@@ -376,22 +376,22 @@ static bool applyTransforms(CompilerInvocation &origCI,
   }
 }
 
-bool arcmt::applyTransformations(CompilerInvocation &origCI,
-                                 const FrontendInputFile &Input,
-                                 DiagnosticConsumer *DiagClient) {
-  return applyTransforms(origCI, Input, DiagClient,
+bool arcmt::applyTransformations(
+    CompilerInvocation &origCI, const FrontendInputFile &Input,
+    std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+    DiagnosticConsumer *DiagClient) {
+  return applyTransforms(origCI, Input, PCHContainerOps, DiagClient,
                          StringRef(), false, StringRef());
 }
 
-bool arcmt::migrateWithTemporaryFiles(CompilerInvocation &origCI,
-                                      const FrontendInputFile &Input,
-                                      DiagnosticConsumer *DiagClient,
-                                      StringRef outputDir,
-                                      bool emitPremigrationARCErrors,
-                                      StringRef plistOut) {
+bool arcmt::migrateWithTemporaryFiles(
+    CompilerInvocation &origCI, const FrontendInputFile &Input,
+    std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+    DiagnosticConsumer *DiagClient, StringRef outputDir,
+    bool emitPremigrationARCErrors, StringRef plistOut) {
   assert(!outputDir.empty() && "Expected output directory path");
-  return applyTransforms(origCI, Input, DiagClient,
-                         outputDir, emitPremigrationARCErrors, plistOut);
+  return applyTransforms(origCI, Input, PCHContainerOps, DiagClient, outputDir,
+                         emitPremigrationARCErrors, plistOut);
 }
 
 bool arcmt::getFileRemappings(std::vector<std::pair<std::string,std::string> > &
@@ -499,10 +499,12 @@ public:
 /// \brief Anchor for VTable.
 MigrationProcess::RewriteListener::~RewriteListener() { }
 
-MigrationProcess::MigrationProcess(const CompilerInvocation &CI,
-                                   DiagnosticConsumer *diagClient,
-                                   StringRef outputDir)
-  : OrigCI(CI), DiagClient(diagClient), HadARCErrors(false) {
+MigrationProcess::MigrationProcess(
+    const CompilerInvocation &CI,
+    std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+    DiagnosticConsumer *diagClient, StringRef outputDir)
+    : OrigCI(CI), PCHContainerOps(PCHContainerOps), DiagClient(diagClient),
+      HadARCErrors(false) {
   if (!outputDir.empty()) {
     IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
     IntrusiveRefCntPtr<DiagnosticsEngine> Diags(
@@ -515,7 +517,7 @@ MigrationProcess::MigrationProcess(const CompilerInvocation &CI,
 bool MigrationProcess::applyTransform(TransformFn trans,
                                       RewriteListener *listener) {
   std::unique_ptr<CompilerInvocation> CInvok;
-  CInvok.reset(createInvocationForMigration(OrigCI));
+  CInvok.reset(createInvocationForMigration(OrigCI, *PCHContainerOps));
   CInvok->getDiagnosticOpts().IgnoreWarnings = true;
 
   Remapper.applyMappings(CInvok->getPreprocessorOpts());
@@ -537,7 +539,7 @@ bool MigrationProcess::applyTransform(TransformFn trans,
   ASTAction.reset(new ARCMTMacroTrackerAction(ARCMTMacroLocs));
 
   std::unique_ptr<ASTUnit> Unit(ASTUnit::LoadFromCompilerInvocationAction(
-      CInvok.release(), Diags, ASTAction.get()));
+      CInvok.release(), PCHContainerOps, Diags, ASTAction.get()));
   if (!Unit) {
     errRec.FinishCapture();
     return true;
diff --git a/contrib/llvm/tools/clang/lib/ARCMigrate/ARCMTActions.cpp b/contrib/llvm/tools/clang/lib/ARCMigrate/ARCMTActions.cpp
index 0ed36dd..39a922f 100644
--- a/contrib/llvm/tools/clang/lib/ARCMigrate/ARCMTActions.cpp
+++ b/contrib/llvm/tools/clang/lib/ARCMigrate/ARCMTActions.cpp
@@ -16,6 +16,7 @@ using namespace arcmt;
 
 bool CheckAction::BeginInvocation(CompilerInstance &CI) {
   if (arcmt::checkForManualIssues(CI.getInvocation(), getCurrentInput(),
+                                  CI.getPCHContainerOperations(),
                                   CI.getDiagnostics().getClient()))
     return false; // errors, stop the action.
 
@@ -29,6 +30,7 @@ CheckAction::CheckAction(FrontendAction *WrappedAction)
 
 bool ModifyAction::BeginInvocation(CompilerInstance &CI) {
   return !arcmt::applyTransformations(CI.getInvocation(), getCurrentInput(),
+                                      CI.getPCHContainerOperations(),
                                       CI.getDiagnostics().getClient());
 }
 
@@ -36,12 +38,10 @@ ModifyAction::ModifyAction(FrontendAction *WrappedAction)
   : WrapperFrontendAction(WrappedAction) {}
 
 bool MigrateAction::BeginInvocation(CompilerInstance &CI) {
-  if (arcmt::migrateWithTemporaryFiles(CI.getInvocation(),
-                                       getCurrentInput(),
-                                       CI.getDiagnostics().getClient(),
-                                       MigrateDir,
-                                       EmitPremigrationARCErros,
-                                       PlistOut))
+  if (arcmt::migrateWithTemporaryFiles(
+          CI.getInvocation(), getCurrentInput(), CI.getPCHContainerOperations(),
+          CI.getDiagnostics().getClient(), MigrateDir, EmitPremigrationARCErros,
+          PlistOut))
     return false; // errors, stop the action.
 
   // We only want to see diagnostics emitted by migrateWithTemporaryFiles.
diff --git a/contrib/llvm/tools/clang/lib/AST/ASTConsumer.cpp b/contrib/llvm/tools/clang/lib/AST/ASTConsumer.cpp
index 55033b2..cff82e9 100644
--- a/contrib/llvm/tools/clang/lib/AST/ASTConsumer.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/ASTConsumer.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/AST/ASTConsumer.h"
+#include "llvm/Bitcode/BitstreamReader.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclGroup.h"
 using namespace clang;
diff --git a/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp b/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp
index 4a831d9..049eebd 100644
--- a/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp
@@ -5610,8 +5610,7 @@ void ASTContext::getObjCEncodingForTypeImpl(QualType T, std::string& S,
     // @encode(class_name)
     ObjCInterfaceDecl *OI = OIT->getDecl();
     S += '{';
-    const IdentifierInfo *II = OI->getIdentifier();
-    S += II->getName();
+    S += OI->getObjCRuntimeNameAsString();
     S += '=';
     SmallVector<const ObjCIvarDecl*, 32> Ivars;
     DeepCollectObjCIvars(OI, true, Ivars);
@@ -5654,7 +5653,7 @@ void ASTContext::getObjCEncodingForTypeImpl(QualType T, std::string& S,
         S += '"';
         for (const auto *I : OPT->quals()) {
           S += '<';
-          S += I->getNameAsString();
+          S += I->getObjCRuntimeNameAsString();
           S += '>';
         }
         S += '"';
@@ -5678,7 +5677,7 @@ void ASTContext::getObjCEncodingForTypeImpl(QualType T, std::string& S,
         for (unsigned i = 0, e = Ivars.size(); i != e; ++i) {
           if (cast<FieldDecl>(Ivars[i]) == FD) {
             S += '{';
-            S += OI->getIdentifier()->getName();
+            S += OI->getObjCRuntimeNameAsString();
             S += '}';
             return;
           }
@@ -5696,10 +5695,10 @@ void ASTContext::getObjCEncodingForTypeImpl(QualType T, std::string& S,
     if (OPT->getInterfaceDecl() && 
         (FD || EncodingProperty || EncodeClassNames)) {
       S += '"';
-      S += OPT->getInterfaceDecl()->getIdentifier()->getName();
+      S += OPT->getInterfaceDecl()->getObjCRuntimeNameAsString();
       for (const auto *I : OPT->quals()) {
         S += '<';
-        S += I->getNameAsString();
+        S += I->getObjCRuntimeNameAsString();
         S += '>';
       }
       S += '"';
diff --git a/contrib/llvm/tools/clang/lib/AST/ASTImporter.cpp b/contrib/llvm/tools/clang/lib/AST/ASTImporter.cpp
index 911f168..76e4e11 100644
--- a/contrib/llvm/tools/clang/lib/AST/ASTImporter.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/ASTImporter.cpp
@@ -3922,8 +3922,8 @@ Decl *ASTNodeImporter::VisitObjCPropertyDecl(ObjCPropertyDecl *D) {
   }
 
   // Import the type.
-  TypeSourceInfo *T = Importer.Import(D->getTypeSourceInfo());
-  if (!T)
+  TypeSourceInfo *TSI = Importer.Import(D->getTypeSourceInfo());
+  if (!TSI)
     return nullptr;
 
   // Create the new property.
@@ -3932,7 +3932,8 @@ Decl *ASTNodeImporter::VisitObjCPropertyDecl(ObjCPropertyDecl *D) {
                                Name.getAsIdentifierInfo(), 
                                Importer.Import(D->getAtLoc()),
                                Importer.Import(D->getLParenLoc()),
-                               T,
+                               Importer.Import(D->getType()),
+                               TSI,
                                D->getPropertyImplementation());
   Importer.Imported(D, ToProperty);
   ToProperty->setLexicalDeclContext(LexicalDC);
diff --git a/contrib/llvm/tools/clang/lib/AST/Decl.cpp b/contrib/llvm/tools/clang/lib/AST/Decl.cpp
index 8eff4c4..d9a3389 100644
--- a/contrib/llvm/tools/clang/lib/AST/Decl.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/Decl.cpp
@@ -3681,7 +3681,8 @@ void RecordDecl::LoadFieldsFromExternalStorage() const {
 
 bool RecordDecl::mayInsertExtraPadding(bool EmitRemark) const {
   ASTContext &Context = getASTContext();
-  if (!Context.getLangOpts().Sanitize.has(SanitizerKind::Address) ||
+  if (!Context.getLangOpts().Sanitize.hasOneOf(
+          SanitizerKind::Address | SanitizerKind::KernelAddress) ||
       !Context.getLangOpts().SanitizeAddressFieldPadding)
     return false;
   const auto &Blacklist = Context.getSanitizerBlacklist();
diff --git a/contrib/llvm/tools/clang/lib/AST/DeclObjC.cpp b/contrib/llvm/tools/clang/lib/AST/DeclObjC.cpp
index a63ba7e..a1600cb 100644
--- a/contrib/llvm/tools/clang/lib/AST/DeclObjC.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/DeclObjC.cpp
@@ -1862,16 +1862,18 @@ ObjCPropertyDecl *ObjCPropertyDecl::Create(ASTContext &C, DeclContext *DC,
                                            IdentifierInfo *Id,
                                            SourceLocation AtLoc,
                                            SourceLocation LParenLoc,
-                                           TypeSourceInfo *T,
+                                           QualType T,
+                                           TypeSourceInfo *TSI,
                                            PropertyControl propControl) {
-  return new (C, DC) ObjCPropertyDecl(DC, L, Id, AtLoc, LParenLoc, T);
+  return new (C, DC) ObjCPropertyDecl(DC, L, Id, AtLoc, LParenLoc, T, TSI,
+                                      propControl);
 }
 
 ObjCPropertyDecl *ObjCPropertyDecl::CreateDeserialized(ASTContext &C,
                                                        unsigned ID) {
   return new (C, ID) ObjCPropertyDecl(nullptr, SourceLocation(), nullptr,
                                       SourceLocation(), SourceLocation(),
-                                      nullptr);
+                                      QualType(), nullptr, None);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/tools/clang/lib/AST/DeclPrinter.cpp b/contrib/llvm/tools/clang/lib/AST/DeclPrinter.cpp
index d8cd40e..c3ce476 100644
--- a/contrib/llvm/tools/clang/lib/AST/DeclPrinter.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/DeclPrinter.cpp
@@ -37,6 +37,13 @@ namespace {
 
     void Print(AccessSpecifier AS);
 
+    /// Print an Objective-C method type in parentheses.
+    ///
+    /// \param Quals The Objective-C declaration qualifiers.
+    /// \param T The type to print.
+    void PrintObjCMethodType(ASTContext &Ctx, Decl::ObjCDeclQualifier Quals, 
+                             QualType T);
+
   public:
     DeclPrinter(raw_ostream &Out, const PrintingPolicy &Policy,
                 unsigned Indentation = 0, bool PrintInstantiation = false)
@@ -930,24 +937,52 @@ void DeclPrinter::VisitClassTemplateDecl(ClassTemplateDecl *D) {
 // Objective-C declarations
 //----------------------------------------------------------------------------
 
+void DeclPrinter::PrintObjCMethodType(ASTContext &Ctx, 
+                                      Decl::ObjCDeclQualifier Quals, 
+                                      QualType T) {
+  Out << '(';
+  if (Quals & Decl::ObjCDeclQualifier::OBJC_TQ_In)
+    Out << "in ";
+  if (Quals & Decl::ObjCDeclQualifier::OBJC_TQ_Inout)
+    Out << "inout ";
+  if (Quals & Decl::ObjCDeclQualifier::OBJC_TQ_Out)
+    Out << "out ";
+  if (Quals & Decl::ObjCDeclQualifier::OBJC_TQ_Bycopy)
+    Out << "bycopy ";
+  if (Quals & Decl::ObjCDeclQualifier::OBJC_TQ_Byref)
+    Out << "byref ";
+  if (Quals & Decl::ObjCDeclQualifier::OBJC_TQ_Oneway)
+    Out << "oneway ";
+  if (Quals & Decl::ObjCDeclQualifier::OBJC_TQ_CSNullability) {
+    if (auto nullability = AttributedType::stripOuterNullability(T)) {
+      Out << getNullabilitySpelling(*nullability).substr(2) << ' ';
+    }
+  }
+  
+  Out << Ctx.getUnqualifiedObjCPointerType(T).getAsString(Policy);
+  Out << ')';
+}
+
 void DeclPrinter::VisitObjCMethodDecl(ObjCMethodDecl *OMD) {
   if (OMD->isInstanceMethod())
     Out << "- ";
   else
     Out << "+ ";
-  if (!OMD->getReturnType().isNull())
-    Out << '(' << OMD->getASTContext()
-                      .getUnqualifiedObjCPointerType(OMD->getReturnType())
-                      .getAsString(Policy) << ")";
+  if (!OMD->getReturnType().isNull()) {
+    PrintObjCMethodType(OMD->getASTContext(), OMD->getObjCDeclQualifier(),
+                        OMD->getReturnType());
+  }
 
   std::string name = OMD->getSelector().getAsString();
   std::string::size_type pos, lastPos = 0;
   for (const auto *PI : OMD->params()) {
     // FIXME: selector is missing here!
     pos = name.find_first_of(':', lastPos);
-    Out << " " << name.substr(lastPos, pos - lastPos);
-    Out << ":(" << PI->getASTContext().getUnqualifiedObjCPointerType(PI->getType()).
-                      getAsString(Policy) << ')' << *PI;
+    Out << " " << name.substr(lastPos, pos - lastPos) << ':';
+    PrintObjCMethodType(OMD->getASTContext(), 
+                        PI->getObjCDeclQualifier(),
+                        PI->getType());
+    Out << *PI;
     lastPos = pos + 1;
   }
 
@@ -1103,6 +1138,8 @@ void DeclPrinter::VisitObjCPropertyDecl(ObjCPropertyDecl *PDecl) {
   else if (PDecl->getPropertyImplementation() == ObjCPropertyDecl::Optional)
     Out << "@optional\n";
 
+  QualType T = PDecl->getType();
+
   Out << "@property";
   if (PDecl->getPropertyAttributes() != ObjCPropertyDecl::OBJC_PR_noattr) {
     bool first = true;
@@ -1161,10 +1198,25 @@ void DeclPrinter::VisitObjCPropertyDecl(ObjCPropertyDecl *PDecl) {
       first = false;
     }
     
+    if (PDecl->getPropertyAttributes() &
+        ObjCPropertyDecl::OBJC_PR_nullability) {
+      if (auto nullability = AttributedType::stripOuterNullability(T)) {
+        if (*nullability == NullabilityKind::Unspecified &&
+            (PDecl->getPropertyAttributes() &
+               ObjCPropertyDecl::OBJC_PR_null_resettable)) {
+          Out << (first ? ' ' : ',') << "null_resettable";
+        } else {
+          Out << (first ? ' ' : ',')
+              << getNullabilitySpelling(*nullability).substr(2);
+        }
+        first = false;
+      }
+    }
+
     (void) first; // Silence dead store warning due to idiomatic code.
     Out << " )";
   }
-  Out << ' ' << PDecl->getASTContext().getUnqualifiedObjCPointerType(PDecl->getType()).
+  Out << ' ' << PDecl->getASTContext().getUnqualifiedObjCPointerType(T).
                   getAsString(Policy) << ' ' << *PDecl;
   if (Policy.PolishForDeclaration)
     Out << ';';
diff --git a/contrib/llvm/tools/clang/lib/AST/DeclTemplate.cpp b/contrib/llvm/tools/clang/lib/AST/DeclTemplate.cpp
index 6374a92..cde497b 100644
--- a/contrib/llvm/tools/clang/lib/AST/DeclTemplate.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/DeclTemplate.cpp
@@ -124,6 +124,12 @@ static void AdoptTemplateParameterList(TemplateParameterList *Params,
   }
 }
 
+namespace clang {
+void *allocateDefaultArgStorageChain(const ASTContext &C) {
+  return new (C) char[sizeof(void*) * 2];
+}
+}
+
 //===----------------------------------------------------------------------===//
 // RedeclarableTemplateDecl Implementation
 //===----------------------------------------------------------------------===//
@@ -504,14 +510,14 @@ TemplateTypeParmDecl::CreateDeserialized(const ASTContext &C, unsigned ID) {
 
 SourceLocation TemplateTypeParmDecl::getDefaultArgumentLoc() const {
   return hasDefaultArgument()
-    ? DefaultArgument->getTypeLoc().getBeginLoc()
-    : SourceLocation();
+             ? getDefaultArgumentInfo()->getTypeLoc().getBeginLoc()
+             : SourceLocation();
 }
 
 SourceRange TemplateTypeParmDecl::getSourceRange() const {
   if (hasDefaultArgument() && !defaultArgumentWasInherited())
     return SourceRange(getLocStart(),
-                       DefaultArgument->getTypeLoc().getEndLoc());
+                       getDefaultArgumentInfo()->getTypeLoc().getEndLoc());
   else
     return TypeDecl::getSourceRange();
 }
@@ -543,10 +549,8 @@ NonTypeTemplateParmDecl::NonTypeTemplateParmDecl(DeclContext *DC,
                                                  unsigned NumExpandedTypes,
                                                 TypeSourceInfo **ExpandedTInfos)
   : DeclaratorDecl(NonTypeTemplateParm, DC, IdLoc, Id, T, TInfo, StartLoc),
-    TemplateParmPosition(D, P), DefaultArgumentAndInherited(nullptr, false),
-    ParameterPack(true), ExpandedParameterPack(true),
-    NumExpandedTypes(NumExpandedTypes)
-{
+    TemplateParmPosition(D, P), ParameterPack(true),
+    ExpandedParameterPack(true), NumExpandedTypes(NumExpandedTypes) {
   if (ExpandedTypes && ExpandedTInfos) {
     void **TypesAndInfos = reinterpret_cast<void **>(this + 1);
     for (unsigned I = 0; I != NumExpandedTypes; ++I) {
@@ -621,8 +625,7 @@ TemplateTemplateParmDecl::TemplateTemplateParmDecl(
     IdentifierInfo *Id, TemplateParameterList *Params,
     unsigned NumExpansions, TemplateParameterList * const *Expansions)
   : TemplateDecl(TemplateTemplateParm, DC, L, Id, Params),
-    TemplateParmPosition(D, P), DefaultArgument(),
-    DefaultArgumentWasInherited(false), ParameterPack(true),
+    TemplateParmPosition(D, P), ParameterPack(true),
     ExpandedParameterPack(true), NumExpandedParams(NumExpansions) {
   if (Expansions)
     std::memcpy(reinterpret_cast<void*>(this + 1), Expansions,
@@ -663,6 +666,19 @@ TemplateTemplateParmDecl::CreateDeserialized(ASTContext &C, unsigned ID,
                                nullptr, NumExpansions, nullptr);
 }
 
+SourceLocation TemplateTemplateParmDecl::getDefaultArgumentLoc() const {
+  return hasDefaultArgument() ? getDefaultArgument().getLocation()
+                              : SourceLocation();
+}
+
+void TemplateTemplateParmDecl::setDefaultArgument(
+    const ASTContext &C, const TemplateArgumentLoc &DefArg) {
+  if (DefArg.getArgument().isNull())
+    DefaultArgument.set(nullptr);
+  else
+    DefaultArgument.set(new (C) TemplateArgumentLoc(DefArg));
+}
+
 //===----------------------------------------------------------------------===//
 // TemplateArgumentList Implementation
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/tools/clang/lib/AST/Expr.cpp b/contrib/llvm/tools/clang/lib/AST/Expr.cpp
index 76a4da2..36f4139 100644
--- a/contrib/llvm/tools/clang/lib/AST/Expr.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/Expr.cpp
@@ -1238,7 +1238,7 @@ unsigned CallExpr::getBuiltinCallee() const {
   return FDecl->getBuiltinID();
 }
 
-bool CallExpr::isUnevaluatedBuiltinCall(ASTContext &Ctx) const {
+bool CallExpr::isUnevaluatedBuiltinCall(const ASTContext &Ctx) const {
   if (unsigned BI = getBuiltinCallee())
     return Ctx.BuiltinInfo.isUnevaluated(BI);
   return false;
@@ -2772,6 +2772,11 @@ bool Expr::isConstantInitializer(ASTContext &Ctx, bool IsForRef,
     const Expr *Exp = cast<CompoundLiteralExpr>(this)->getInitializer();
     return Exp->isConstantInitializer(Ctx, false, Culprit);
   }
+  case DesignatedInitUpdateExprClass: {
+    const DesignatedInitUpdateExpr *DIUE = cast<DesignatedInitUpdateExpr>(this);
+    return DIUE->getBase()->isConstantInitializer(Ctx, false, Culprit) &&
+           DIUE->getUpdater()->isConstantInitializer(Ctx, false, Culprit);
+  }
   case InitListExprClass: {
     const InitListExpr *ILE = cast<InitListExpr>(this);
     if (ILE->getType()->isArrayType()) {
@@ -2818,6 +2823,7 @@ bool Expr::isConstantInitializer(ASTContext &Ctx, bool IsForRef,
     break;
   }
   case ImplicitValueInitExprClass:
+  case NoInitExprClass:
     return true;
   case ParenExprClass:
     return cast<ParenExpr>(this)->getSubExpr()
@@ -2881,6 +2887,28 @@ bool Expr::isConstantInitializer(ASTContext &Ctx, bool IsForRef,
   return false;
 }
 
+namespace {
+  /// \brief Look for any side effects within a Stmt.
+  class SideEffectFinder : public ConstEvaluatedExprVisitor<SideEffectFinder> {
+    typedef ConstEvaluatedExprVisitor<SideEffectFinder> Inherited;
+    const bool IncludePossibleEffects;
+    bool HasSideEffects;
+
+  public:
+    explicit SideEffectFinder(const ASTContext &Context, bool IncludePossible)
+      : Inherited(Context),
+        IncludePossibleEffects(IncludePossible), HasSideEffects(false) { }
+
+    bool hasSideEffects() const { return HasSideEffects; }
+
+    void VisitExpr(const Expr *E) {
+      if (!HasSideEffects &&
+          E->HasSideEffects(Context, IncludePossibleEffects))
+        HasSideEffects = true;
+    }
+  };
+}
+
 bool Expr::HasSideEffects(const ASTContext &Ctx,
                           bool IncludePossibleEffects) const {
   // In circumstances where we care about definite side effects instead of
@@ -2925,6 +2953,7 @@ bool Expr::HasSideEffects(const ASTContext &Ctx,
   case UnaryExprOrTypeTraitExprClass:
   case AddrLabelExprClass:
   case GNUNullExprClass:
+  case NoInitExprClass:
   case CXXBoolLiteralExprClass:
   case CXXNullPtrLiteralExprClass:
   case CXXThisExprClass:
@@ -2967,7 +2996,6 @@ bool Expr::HasSideEffects(const ASTContext &Ctx,
   case CompoundAssignOperatorClass:
   case VAArgExprClass:
   case AtomicExprClass:
-  case StmtExprClass:
   case CXXThrowExprClass:
   case CXXNewExprClass:
   case CXXDeleteExprClass:
@@ -2975,6 +3003,13 @@ bool Expr::HasSideEffects(const ASTContext &Ctx,
     // These always have a side-effect.
     return true;
 
+  case StmtExprClass: {
+    // StmtExprs have a side-effect if any substatement does.
+    SideEffectFinder Finder(Ctx, IncludePossibleEffects);
+    Finder.Visit(cast<StmtExpr>(this)->getSubStmt());
+    return Finder.hasSideEffects();
+  }
+
   case ParenExprClass:
   case ArraySubscriptExprClass:
   case MemberExprClass:
@@ -2983,6 +3018,7 @@ bool Expr::HasSideEffects(const ASTContext &Ctx,
   case CompoundLiteralExprClass:
   case ExtVectorElementExprClass:
   case DesignatedInitExprClass:
+  case DesignatedInitUpdateExprClass:
   case ParenListExprClass:
   case CXXPseudoDestructorExprClass:
   case CXXStdInitializerListExprClass:
@@ -3128,21 +3164,21 @@ bool Expr::HasSideEffects(const ASTContext &Ctx,
 
 namespace {
   /// \brief Look for a call to a non-trivial function within an expression.
-  class NonTrivialCallFinder : public EvaluatedExprVisitor<NonTrivialCallFinder>
+  class NonTrivialCallFinder : public ConstEvaluatedExprVisitor<NonTrivialCallFinder>
   {
-    typedef EvaluatedExprVisitor<NonTrivialCallFinder> Inherited;
-    
+    typedef ConstEvaluatedExprVisitor<NonTrivialCallFinder> Inherited;
+
     bool NonTrivial;
     
   public:
-    explicit NonTrivialCallFinder(ASTContext &Context) 
+    explicit NonTrivialCallFinder(const ASTContext &Context)
       : Inherited(Context), NonTrivial(false) { }
     
     bool hasNonTrivialCall() const { return NonTrivial; }
-    
-    void VisitCallExpr(CallExpr *E) {
-      if (CXXMethodDecl *Method
-          = dyn_cast_or_null<CXXMethodDecl>(E->getCalleeDecl())) {
+
+    void VisitCallExpr(const CallExpr *E) {
+      if (const CXXMethodDecl *Method
+          = dyn_cast_or_null<const CXXMethodDecl>(E->getCalleeDecl())) {
         if (Method->isTrivial()) {
           // Recurse to children of the call.
           Inherited::VisitStmt(E);
@@ -3152,8 +3188,8 @@ namespace {
       
       NonTrivial = true;
     }
-    
-    void VisitCXXConstructExpr(CXXConstructExpr *E) {
+
+    void VisitCXXConstructExpr(const CXXConstructExpr *E) {
       if (E->getConstructor()->isTrivial()) {
         // Recurse to children of the call.
         Inherited::VisitStmt(E);
@@ -3162,8 +3198,8 @@ namespace {
       
       NonTrivial = true;
     }
-    
-    void VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E) {
+
+    void VisitCXXBindTemporaryExpr(const CXXBindTemporaryExpr *E) {
       if (E->getTemporary()->getDestructor()->isTrivial()) {
         Inherited::VisitStmt(E);
         return;
@@ -3174,7 +3210,7 @@ namespace {
   };
 }
 
-bool Expr::hasNonTrivialCall(ASTContext &Ctx) {
+bool Expr::hasNonTrivialCall(const ASTContext &Ctx) const {
   NonTrivialCallFinder Finder(Ctx);
   Finder.Visit(this);
   return Finder.hasNonTrivialCall();  
@@ -3989,6 +4025,25 @@ void DesignatedInitExpr::ExpandDesignator(const ASTContext &C, unsigned Idx,
   NumDesignators = NumDesignators - 1 + NumNewDesignators;
 }
 
+DesignatedInitUpdateExpr::DesignatedInitUpdateExpr(const ASTContext &C,
+    SourceLocation lBraceLoc, Expr *baseExpr, SourceLocation rBraceLoc)
+  : Expr(DesignatedInitUpdateExprClass, baseExpr->getType(), VK_RValue,
+         OK_Ordinary, false, false, false, false) {
+  BaseAndUpdaterExprs[0] = baseExpr;
+
+  InitListExpr *ILE = new (C) InitListExpr(C, lBraceLoc, None, rBraceLoc);
+  ILE->setType(baseExpr->getType());
+  BaseAndUpdaterExprs[1] = ILE;
+}
+
+SourceLocation DesignatedInitUpdateExpr::getLocStart() const {
+  return getBase()->getLocStart();
+}
+
+SourceLocation DesignatedInitUpdateExpr::getLocEnd() const {
+  return getBase()->getLocEnd();
+}
+
 ParenListExpr::ParenListExpr(const ASTContext& C, SourceLocation lparenloc,
                              ArrayRef<Expr*> exprs,
                              SourceLocation rparenloc)
diff --git a/contrib/llvm/tools/clang/lib/AST/ExprClassification.cpp b/contrib/llvm/tools/clang/lib/AST/ExprClassification.cpp
index 5b320c2..9cc612e 100644
--- a/contrib/llvm/tools/clang/lib/AST/ExprClassification.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/ExprClassification.cpp
@@ -183,6 +183,8 @@ static Cl::Kinds ClassifyInternal(ASTContext &Ctx, const Expr *E) {
   case Expr::ObjCIndirectCopyRestoreExprClass:
   case Expr::AtomicExprClass:
   case Expr::CXXFoldExprClass:
+  case Expr::NoInitExprClass:
+  case Expr::DesignatedInitUpdateExprClass:
     return Cl::CL_PRValue;
 
     // Next come the complicated cases.
diff --git a/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp b/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp
index be24a2a..8e472f1 100644
--- a/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp
@@ -8675,6 +8675,8 @@ static ICEDiag CheckICE(const Expr* E, const ASTContext &Ctx) {
   case Expr::CompoundLiteralExprClass:
   case Expr::ExtVectorElementExprClass:
   case Expr::DesignatedInitExprClass:
+  case Expr::NoInitExprClass:
+  case Expr::DesignatedInitUpdateExprClass:
   case Expr::ImplicitValueInitExprClass:
   case Expr::ParenListExprClass:
   case Expr::VAArgExprClass:
diff --git a/contrib/llvm/tools/clang/lib/AST/ItaniumMangle.cpp b/contrib/llvm/tools/clang/lib/AST/ItaniumMangle.cpp
index d07efae..e5a31f8 100644
--- a/contrib/llvm/tools/clang/lib/AST/ItaniumMangle.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/ItaniumMangle.cpp
@@ -2010,7 +2010,11 @@ void CXXNameMangler::mangleType(const BuiltinType *T) {
   case BuiltinType::Half: Out << "Dh"; break;
   case BuiltinType::Float: Out << 'f'; break;
   case BuiltinType::Double: Out << 'd'; break;
-  case BuiltinType::LongDouble: Out << 'e'; break;
+  case BuiltinType::LongDouble:
+    Out << (getASTContext().getTargetInfo().useFloat128ManglingForLongDouble()
+                ? 'g'
+                : 'e');
+    break;
   case BuiltinType::NullPtr: Out << "Dn"; break;
 
 #define BUILTIN_TYPE(Id, SingletonId)
@@ -2676,7 +2680,9 @@ recurse:
   // These all can only appear in local or variable-initialization
   // contexts and so should never appear in a mangling.
   case Expr::AddrLabelExprClass:
+  case Expr::DesignatedInitUpdateExprClass:
   case Expr::ImplicitValueInitExprClass:
+  case Expr::NoInitExprClass:
   case Expr::ParenListExprClass:
   case Expr::LambdaExprClass:
   case Expr::MSPropertyRefExprClass:
@@ -4060,8 +4066,7 @@ void ItaniumMangleContextImpl::mangleTypeName(QualType Ty, raw_ostream &Out) {
 
 void ItaniumMangleContextImpl::mangleCXXVTableBitSet(const CXXRecordDecl *RD,
                                                      raw_ostream &Out) {
-  Linkage L = RD->getLinkageInternal();
-  if (L == InternalLinkage || L == UniqueExternalLinkage) {
+  if (!RD->isExternallyVisible()) {
     // This part of the identifier needs to be unique across all translation
     // units in the linked program. The scheme fails if multiple translation
     // units are compiled using the same relative source file path, or if
diff --git a/contrib/llvm/tools/clang/lib/AST/MicrosoftCXXABI.cpp b/contrib/llvm/tools/clang/lib/AST/MicrosoftCXXABI.cpp
index 93ff77a..aba6796 100644
--- a/contrib/llvm/tools/clang/lib/AST/MicrosoftCXXABI.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/MicrosoftCXXABI.cpp
@@ -179,8 +179,9 @@ MSVtorDispAttr::Mode CXXRecordDecl::getMSVtorDispMode() const {
 //     // slot.
 //     void *FunctionPointerOrVirtualThunk;
 //
-//     // An offset to add to the address of the vbtable pointer after (possibly)
-//     // selecting the virtual base but before resolving and calling the function.
+//     // An offset to add to the address of the vbtable pointer after
+//     // (possibly) selecting the virtual base but before resolving and calling
+//     // the function.
 //     // Only needed if the class has any virtual bases or bases at a non-zero
 //     // offset.
 //     int NonVirtualBaseAdjustment;
diff --git a/contrib/llvm/tools/clang/lib/AST/MicrosoftMangle.cpp b/contrib/llvm/tools/clang/lib/AST/MicrosoftMangle.cpp
index db5b48e..29a95a5 100644
--- a/contrib/llvm/tools/clang/lib/AST/MicrosoftMangle.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/MicrosoftMangle.cpp
@@ -2761,7 +2761,17 @@ void MicrosoftMangleContextImpl::mangleStringLiteral(const StringLiteral *SL,
 
 void MicrosoftMangleContextImpl::mangleCXXVTableBitSet(const CXXRecordDecl *RD,
                                                        raw_ostream &Out) {
-  llvm::report_fatal_error("Cannot mangle bitsets yet");
+  if (!RD->isExternallyVisible()) {
+    // This part of the identifier needs to be unique across all translation
+    // units in the linked program. The scheme fails if multiple translation
+    // units are compiled using the same relative source file path, or if
+    // multiple translation units are built from the same source file.
+    SourceManager &SM = getASTContext().getSourceManager();
+    Out << "[" << SM.getFileEntryForID(SM.getMainFileID())->getName() << "]";
+  }
+
+  MicrosoftCXXNameMangler mangler(*this, Out);
+  mangler.mangleName(RD);
 }
 
 MicrosoftMangleContext *
diff --git a/contrib/llvm/tools/clang/lib/AST/RecordLayoutBuilder.cpp b/contrib/llvm/tools/clang/lib/AST/RecordLayoutBuilder.cpp
index 2101a55..388c91c 100644
--- a/contrib/llvm/tools/clang/lib/AST/RecordLayoutBuilder.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/RecordLayoutBuilder.cpp
@@ -1467,7 +1467,7 @@ void RecordLayoutBuilder::LayoutBitField(const FieldDecl *D) {
   // ms_struct basically requests a complete replacement of the
   // platform ABI's struct-layout algorithm, with the high-level goal
   // of duplicating MSVC's layout.  For non-bitfields, this follows
-  // the the standard algorithm.  The basic bitfield layout rule is to
+  // the standard algorithm.  The basic bitfield layout rule is to
   // allocate an entire unit of the bitfield's declared type
   // (e.g. 'unsigned long'), then parcel it up among successive
   // bitfields whose declared types have the same size, making a new
diff --git a/contrib/llvm/tools/clang/lib/AST/Stmt.cpp b/contrib/llvm/tools/clang/lib/AST/Stmt.cpp
index 09bb17b..6f4a89f 100644
--- a/contrib/llvm/tools/clang/lib/AST/Stmt.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/Stmt.cpp
@@ -1636,7 +1636,7 @@ OMPSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc,
   Dir->setLastIteration(Exprs.LastIteration);
   Dir->setCalcLastIteration(Exprs.CalcLastIteration);
   Dir->setPreCond(Exprs.PreCond);
-  Dir->setCond(Exprs.Cond, Exprs.SeparatedCond);
+  Dir->setCond(Exprs.Cond);
   Dir->setInit(Exprs.Init);
   Dir->setInc(Exprs.Inc);
   Dir->setCounters(Exprs.Counters);
@@ -1675,7 +1675,7 @@ OMPForDirective::Create(const ASTContext &C, SourceLocation StartLoc,
   Dir->setLastIteration(Exprs.LastIteration);
   Dir->setCalcLastIteration(Exprs.CalcLastIteration);
   Dir->setPreCond(Exprs.PreCond);
-  Dir->setCond(Exprs.Cond, Exprs.SeparatedCond);
+  Dir->setCond(Exprs.Cond);
   Dir->setInit(Exprs.Init);
   Dir->setInc(Exprs.Inc);
   Dir->setIsLastIterVariable(Exprs.IL);
@@ -1721,7 +1721,7 @@ OMPForSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc,
   Dir->setLastIteration(Exprs.LastIteration);
   Dir->setCalcLastIteration(Exprs.CalcLastIteration);
   Dir->setPreCond(Exprs.PreCond);
-  Dir->setCond(Exprs.Cond, Exprs.SeparatedCond);
+  Dir->setCond(Exprs.Cond);
   Dir->setInit(Exprs.Init);
   Dir->setInc(Exprs.Inc);
   Dir->setIsLastIterVariable(Exprs.IL);
@@ -1777,7 +1777,7 @@ OMPSectionDirective *OMPSectionDirective::Create(const ASTContext &C,
                                                  SourceLocation StartLoc,
                                                  SourceLocation EndLoc,
                                                  Stmt *AssociatedStmt) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPSectionsDirective),
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPSectionDirective),
                                            llvm::alignOf<Stmt *>());
   void *Mem = C.Allocate(Size + sizeof(Stmt *));
   OMPSectionDirective *Dir = new (Mem) OMPSectionDirective(StartLoc, EndLoc);
@@ -1876,7 +1876,7 @@ OMPParallelForDirective *OMPParallelForDirective::Create(
   Dir->setLastIteration(Exprs.LastIteration);
   Dir->setCalcLastIteration(Exprs.CalcLastIteration);
   Dir->setPreCond(Exprs.PreCond);
-  Dir->setCond(Exprs.Cond, Exprs.SeparatedCond);
+  Dir->setCond(Exprs.Cond);
   Dir->setInit(Exprs.Init);
   Dir->setInc(Exprs.Inc);
   Dir->setIsLastIterVariable(Exprs.IL);
@@ -1920,7 +1920,7 @@ OMPParallelForSimdDirective *OMPParallelForSimdDirective::Create(
   Dir->setLastIteration(Exprs.LastIteration);
   Dir->setCalcLastIteration(Exprs.CalcLastIteration);
   Dir->setPreCond(Exprs.PreCond);
-  Dir->setCond(Exprs.Cond, Exprs.SeparatedCond);
+  Dir->setCond(Exprs.Cond);
   Dir->setInit(Exprs.Init);
   Dir->setInc(Exprs.Inc);
   Dir->setIsLastIterVariable(Exprs.IL);
@@ -2041,6 +2041,27 @@ OMPTaskwaitDirective *OMPTaskwaitDirective::CreateEmpty(const ASTContext &C,
   return new (Mem) OMPTaskwaitDirective();
 }
 
+OMPTaskgroupDirective *OMPTaskgroupDirective::Create(const ASTContext &C,
+                                                     SourceLocation StartLoc,
+                                                     SourceLocation EndLoc,
+                                                     Stmt *AssociatedStmt) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPTaskgroupDirective),
+                                           llvm::alignOf<Stmt *>());
+  void *Mem = C.Allocate(Size + sizeof(Stmt *));
+  OMPTaskgroupDirective *Dir =
+      new (Mem) OMPTaskgroupDirective(StartLoc, EndLoc);
+  Dir->setAssociatedStmt(AssociatedStmt);
+  return Dir;
+}
+
+OMPTaskgroupDirective *OMPTaskgroupDirective::CreateEmpty(const ASTContext &C,
+                                                          EmptyShell) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPTaskgroupDirective),
+                                           llvm::alignOf<Stmt *>());
+  void *Mem = C.Allocate(Size + sizeof(Stmt *));
+  return new (Mem) OMPTaskgroupDirective();
+}
+
 OMPFlushDirective *OMPFlushDirective::Create(const ASTContext &C,
                                              SourceLocation StartLoc,
                                              SourceLocation EndLoc,
diff --git a/contrib/llvm/tools/clang/lib/AST/StmtPrinter.cpp b/contrib/llvm/tools/clang/lib/AST/StmtPrinter.cpp
index db6d8c2..658e3df 100644
--- a/contrib/llvm/tools/clang/lib/AST/StmtPrinter.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/StmtPrinter.cpp
@@ -910,6 +910,11 @@ void StmtPrinter::VisitOMPTaskwaitDirective(OMPTaskwaitDirective *Node) {
   PrintOMPExecutableDirective(Node);
 }
 
+void StmtPrinter::VisitOMPTaskgroupDirective(OMPTaskgroupDirective *Node) {
+  Indent() << "#pragma omp taskgroup";
+  PrintOMPExecutableDirective(Node);
+}
+
 void StmtPrinter::VisitOMPFlushDirective(OMPFlushDirective *Node) {
   Indent() << "#pragma omp flush ";
   PrintOMPExecutableDirective(Node);
@@ -1430,6 +1435,22 @@ void StmtPrinter::VisitDesignatedInitExpr(DesignatedInitExpr *Node) {
   PrintExpr(Node->getInit());
 }
 
+void StmtPrinter::VisitDesignatedInitUpdateExpr(
+    DesignatedInitUpdateExpr *Node) {
+  OS << "{";
+  OS << "/*base*/";
+  PrintExpr(Node->getBase());
+  OS << ", ";
+
+  OS << "/*updater*/";
+  PrintExpr(Node->getUpdater());
+  OS << "}";
+}
+
+void StmtPrinter::VisitNoInitExpr(NoInitExpr *Node) {
+  OS << "/*no init*/";
+}
+
 void StmtPrinter::VisitImplicitValueInitExpr(ImplicitValueInitExpr *Node) {
   if (Policy.LangOpts.CPlusPlus) {
     OS << "/*implicit*/";
diff --git a/contrib/llvm/tools/clang/lib/AST/StmtProfile.cpp b/contrib/llvm/tools/clang/lib/AST/StmtProfile.cpp
index c66b153..23f8d0c 100644
--- a/contrib/llvm/tools/clang/lib/AST/StmtProfile.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/StmtProfile.cpp
@@ -510,6 +510,10 @@ void StmtProfiler::VisitOMPTaskwaitDirective(const OMPTaskwaitDirective *S) {
   VisitOMPExecutableDirective(S);
 }
 
+void StmtProfiler::VisitOMPTaskgroupDirective(const OMPTaskgroupDirective *S) {
+  VisitOMPExecutableDirective(S);
+}
+
 void StmtProfiler::VisitOMPFlushDirective(const OMPFlushDirective *S) {
   VisitOMPExecutableDirective(S);
 }
@@ -744,6 +748,18 @@ void StmtProfiler::VisitDesignatedInitExpr(const DesignatedInitExpr *S) {
   }
 }
 
+// Seems that if VisitInitListExpr() only works on the syntactic form of an
+// InitListExpr, then a DesignatedInitUpdateExpr is not encountered.
+void StmtProfiler::VisitDesignatedInitUpdateExpr(
+    const DesignatedInitUpdateExpr *S) {
+  llvm_unreachable("Unexpected DesignatedInitUpdateExpr in syntactic form of "
+                   "initializer");
+}
+
+void StmtProfiler::VisitNoInitExpr(const NoInitExpr *S) {
+  llvm_unreachable("Unexpected NoInitExpr in syntactic form of initializer");
+}
+
 void StmtProfiler::VisitImplicitValueInitExpr(const ImplicitValueInitExpr *S) {
   VisitExpr(S);
 }
diff --git a/contrib/llvm/tools/clang/lib/AST/Type.cpp b/contrib/llvm/tools/clang/lib/AST/Type.cpp
index 09bb769..3ac1171 100644
--- a/contrib/llvm/tools/clang/lib/AST/Type.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/Type.cpp
@@ -1919,7 +1919,10 @@ bool AttributedType::isCallingConv() const {
   case attr_objc_gc:
   case attr_objc_ownership:
   case attr_noreturn:
-      return false;
+  case attr_nonnull:
+  case attr_nullable:
+  case attr_null_unspecified:
+    return false;
   case attr_pcs:
   case attr_pcs_vfp:
   case attr_cdecl:
@@ -2340,6 +2343,153 @@ LinkageInfo Type::getLinkageAndVisibility() const {
   return LV;
 }
 
+Optional<NullabilityKind> Type::getNullability(const ASTContext &context) const {
+  QualType type(this, 0);
+  do {
+    // Check whether this is an attributed type with nullability
+    // information.
+    if (auto attributed = dyn_cast<AttributedType>(type.getTypePtr())) {
+      if (auto nullability = attributed->getImmediateNullability())
+        return nullability;
+    }
+
+    // Desugar the type. If desugaring does nothing, we're done.
+    QualType desugared = type.getSingleStepDesugaredType(context);
+    if (desugared.getTypePtr() == type.getTypePtr())
+      return None;
+    
+    type = desugared;
+  } while (true);
+}
+
+bool Type::canHaveNullability() const {
+  QualType type = getCanonicalTypeInternal();
+  
+  switch (type->getTypeClass()) {
+  // We'll only see canonical types here.
+#define NON_CANONICAL_TYPE(Class, Parent)       \
+  case Type::Class:                             \
+    llvm_unreachable("non-canonical type");
+#define TYPE(Class, Parent)
+#include "clang/AST/TypeNodes.def"
+
+  // Pointer types.
+  case Type::Pointer:
+  case Type::BlockPointer:
+  case Type::MemberPointer:
+  case Type::ObjCObjectPointer:
+    return true;
+
+  // Dependent types that could instantiate to pointer types.
+  case Type::UnresolvedUsing:
+  case Type::TypeOfExpr:
+  case Type::TypeOf:
+  case Type::Decltype:
+  case Type::UnaryTransform:
+  case Type::TemplateTypeParm:
+  case Type::SubstTemplateTypeParmPack:
+  case Type::DependentName:
+  case Type::DependentTemplateSpecialization:
+    return true;
+
+  // Dependent template specializations can instantiate to pointer
+  // types unless they're known to be specializations of a class
+  // template.
+  case Type::TemplateSpecialization:
+    if (TemplateDecl *templateDecl
+          = cast<TemplateSpecializationType>(type.getTypePtr())
+              ->getTemplateName().getAsTemplateDecl()) {
+      if (isa<ClassTemplateDecl>(templateDecl))
+        return false;
+    }
+    return true;
+
+  // auto is considered dependent when it isn't deduced.
+  case Type::Auto:
+    return !cast<AutoType>(type.getTypePtr())->isDeduced();
+
+  case Type::Builtin:
+    switch (cast<BuiltinType>(type.getTypePtr())->getKind()) {
+      // Signed, unsigned, and floating-point types cannot have nullability.
+#define SIGNED_TYPE(Id, SingletonId) case BuiltinType::Id:
+#define UNSIGNED_TYPE(Id, SingletonId) case BuiltinType::Id:
+#define FLOATING_TYPE(Id, SingletonId) case BuiltinType::Id:
+#define BUILTIN_TYPE(Id, SingletonId)
+#include "clang/AST/BuiltinTypes.def"
+      return false;
+
+    // Dependent types that could instantiate to a pointer type.
+    case BuiltinType::Dependent:
+    case BuiltinType::Overload:
+    case BuiltinType::BoundMember:
+    case BuiltinType::PseudoObject:
+    case BuiltinType::UnknownAny:
+    case BuiltinType::ARCUnbridgedCast:
+      return true;
+
+    case BuiltinType::Void:
+    case BuiltinType::ObjCId:
+    case BuiltinType::ObjCClass:
+    case BuiltinType::ObjCSel:
+    case BuiltinType::OCLImage1d:
+    case BuiltinType::OCLImage1dArray:
+    case BuiltinType::OCLImage1dBuffer:
+    case BuiltinType::OCLImage2d:
+    case BuiltinType::OCLImage2dArray:
+    case BuiltinType::OCLImage3d:
+    case BuiltinType::OCLSampler:
+    case BuiltinType::OCLEvent:
+    case BuiltinType::BuiltinFn:
+    case BuiltinType::NullPtr:
+      return false;
+    }
+
+  // Non-pointer types.
+  case Type::Complex:
+  case Type::LValueReference:
+  case Type::RValueReference:
+  case Type::ConstantArray:
+  case Type::IncompleteArray:
+  case Type::VariableArray:
+  case Type::DependentSizedArray:
+  case Type::DependentSizedExtVector:
+  case Type::Vector:
+  case Type::ExtVector:
+  case Type::FunctionProto:
+  case Type::FunctionNoProto:
+  case Type::Record:
+  case Type::Enum:
+  case Type::InjectedClassName:
+  case Type::PackExpansion:
+  case Type::ObjCObject:
+  case Type::ObjCInterface:
+  case Type::Atomic:
+    return false;
+  }
+  llvm_unreachable("bad type kind!");
+}
+
+llvm::Optional<NullabilityKind> AttributedType::getImmediateNullability() const {
+  if (getAttrKind() == AttributedType::attr_nonnull)
+    return NullabilityKind::NonNull;
+  if (getAttrKind() == AttributedType::attr_nullable)
+    return NullabilityKind::Nullable;
+  if (getAttrKind() == AttributedType::attr_null_unspecified)
+    return NullabilityKind::Unspecified;
+  return None;
+}
+
+Optional<NullabilityKind> AttributedType::stripOuterNullability(QualType &T) {
+  if (auto attributed = dyn_cast<AttributedType>(T.getTypePtr())) {
+    if (auto nullability = attributed->getImmediateNullability()) {
+      T = attributed->getModifiedType();
+      return nullability;
+    }
+  }
+
+  return None;
+}
+
 Qualifiers::ObjCLifetime Type::getObjCARCImplicitLifetime() const {
   if (isObjCARCImplicitlyUnretainedType())
     return Qualifiers::OCL_ExplicitNone;
diff --git a/contrib/llvm/tools/clang/lib/AST/TypePrinter.cpp b/contrib/llvm/tools/clang/lib/AST/TypePrinter.cpp
index 3928fe8..ebe09d8 100644
--- a/contrib/llvm/tools/clang/lib/AST/TypePrinter.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/TypePrinter.cpp
@@ -1141,6 +1141,21 @@ void TypePrinter::printAttributedBefore(const AttributedType *T,
     }
     spaceBeforePlaceHolder(OS);
   }
+
+  // Print nullability type specifiers.
+  if (T->getAttrKind() == AttributedType::attr_nonnull ||
+      T->getAttrKind() == AttributedType::attr_nullable ||
+      T->getAttrKind() == AttributedType::attr_null_unspecified) {
+    if (T->getAttrKind() == AttributedType::attr_nonnull)
+      OS << " __nonnull";
+    else if (T->getAttrKind() == AttributedType::attr_nullable)
+      OS << " __nullable";
+    else if (T->getAttrKind() == AttributedType::attr_null_unspecified)
+      OS << " __null_unspecified";
+    else
+      llvm_unreachable("unhandled nullability");
+    spaceBeforePlaceHolder(OS);
+  }
 }
 
 void TypePrinter::printAttributedAfter(const AttributedType *T,
@@ -1154,12 +1169,34 @@ void TypePrinter::printAttributedAfter(const AttributedType *T,
   if (T->isMSTypeSpec())
     return;
 
+  // Nothing to print after.
+  if (T->getAttrKind() == AttributedType::attr_nonnull ||
+      T->getAttrKind() == AttributedType::attr_nullable ||
+      T->getAttrKind() == AttributedType::attr_null_unspecified)
+    return printAfter(T->getModifiedType(), OS);
+
   // If this is a calling convention attribute, don't print the implicit CC from
   // the modified type.
   SaveAndRestore<bool> MaybeSuppressCC(InsideCCAttribute, T->isCallingConv());
 
   printAfter(T->getModifiedType(), OS);
 
+  // Print nullability type specifiers that occur after
+  if (T->getAttrKind() == AttributedType::attr_nonnull ||
+      T->getAttrKind() == AttributedType::attr_nullable ||
+      T->getAttrKind() == AttributedType::attr_null_unspecified) {
+    if (T->getAttrKind() == AttributedType::attr_nonnull)
+      OS << " __nonnull";
+    else if (T->getAttrKind() == AttributedType::attr_nullable)
+      OS << " __nullable";
+    else if (T->getAttrKind() == AttributedType::attr_null_unspecified)
+      OS << " __null_unspecified";
+    else
+      llvm_unreachable("unhandled nullability");
+
+    return;
+  }
+
   OS << " __attribute__((";
   switch (T->getAttrKind()) {
   default: llvm_unreachable("This attribute should have been handled already");
diff --git a/contrib/llvm/tools/clang/lib/Analysis/CFG.cpp b/contrib/llvm/tools/clang/lib/Analysis/CFG.cpp
index b2fdd27..19b3f5a 100644
--- a/contrib/llvm/tools/clang/lib/Analysis/CFG.cpp
+++ b/contrib/llvm/tools/clang/lib/Analysis/CFG.cpp
@@ -454,7 +454,7 @@ private:
           TerminatorExpr(nullptr) {}
 
     /// Returns whether we need to start a new branch for a temporary destructor
-    /// call. This is the case when the the temporary destructor is
+    /// call. This is the case when the temporary destructor is
     /// conditionally executed, and it is the first one we encounter while
     /// visiting a subexpression - other temporary destructors at the same level
     /// will be added to the same block and are executed under the same
diff --git a/contrib/llvm/tools/clang/lib/Analysis/CocoaConventions.cpp b/contrib/llvm/tools/clang/lib/Analysis/CocoaConventions.cpp
index 0db3cac..be1262d 100644
--- a/contrib/llvm/tools/clang/lib/Analysis/CocoaConventions.cpp
+++ b/contrib/llvm/tools/clang/lib/Analysis/CocoaConventions.cpp
@@ -25,7 +25,7 @@ using namespace ento;
 bool cocoa::isRefType(QualType RetTy, StringRef Prefix,
                       StringRef Name) {
   // Recursively walk the typedef stack, allowing typedefs of reference types.
-  while (const TypedefType *TD = dyn_cast<TypedefType>(RetTy.getTypePtr())) {
+  while (const TypedefType *TD = RetTy->getAs<TypedefType>()) {
     StringRef TDName = TD->getDecl()->getIdentifier()->getName();
     if (TDName.startswith(Prefix) && TDName.endswith("Ref"))
       return true;
diff --git a/contrib/llvm/tools/clang/lib/Basic/Diagnostic.cpp b/contrib/llvm/tools/clang/lib/Basic/Diagnostic.cpp
index 1992804..7f5a15d 100644
--- a/contrib/llvm/tools/clang/lib/Basic/Diagnostic.cpp
+++ b/contrib/llvm/tools/clang/lib/Basic/Diagnostic.cpp
@@ -321,18 +321,10 @@ void DiagnosticsEngine::Report(const StoredDiagnostic &storedDiag) {
   NumDiagArgs = 0;
 
   DiagRanges.clear();
-  DiagRanges.reserve(storedDiag.range_size());
-  for (StoredDiagnostic::range_iterator
-         RI = storedDiag.range_begin(),
-         RE = storedDiag.range_end(); RI != RE; ++RI)
-    DiagRanges.push_back(*RI);
+  DiagRanges.append(storedDiag.range_begin(), storedDiag.range_end());
 
   DiagFixItHints.clear();
-  DiagFixItHints.reserve(storedDiag.fixit_size());
-  for (StoredDiagnostic::fixit_iterator
-         FI = storedDiag.fixit_begin(),
-         FE = storedDiag.fixit_end(); FI != FE; ++FI)
-    DiagFixItHints.push_back(*FI);
+  DiagFixItHints.append(storedDiag.fixit_begin(), storedDiag.fixit_end());
 
   assert(Client && "DiagnosticConsumer not set!");
   Level DiagLevel = storedDiag.getLevel();
diff --git a/contrib/llvm/tools/clang/lib/Basic/DiagnosticOptions.cpp b/contrib/llvm/tools/clang/lib/Basic/DiagnosticOptions.cpp
new file mode 100644
index 0000000..f54a0ef
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Basic/DiagnosticOptions.cpp
@@ -0,0 +1,24 @@
+//===--- DiagnosticOptions.cpp - C Language Family Diagnostic Handling ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the DiagnosticOptions related interfaces.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/DiagnosticOptions.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace clang {
+
+raw_ostream& operator<<(raw_ostream& Out, DiagnosticLevelMask M) {
+  using UT = std::underlying_type<DiagnosticLevelMask>::type;
+  return Out << static_cast<UT>(M);
+}
+
+} // end namespace clang
diff --git a/contrib/llvm/tools/clang/lib/Basic/FileSystemStatCache.cpp b/contrib/llvm/tools/clang/lib/Basic/FileSystemStatCache.cpp
index 83e42bd..187ea37 100644
--- a/contrib/llvm/tools/clang/lib/Basic/FileSystemStatCache.cpp
+++ b/contrib/llvm/tools/clang/lib/Basic/FileSystemStatCache.cpp
@@ -15,19 +15,8 @@
 #include "clang/Basic/VirtualFileSystem.h"
 #include "llvm/Support/Path.h"
 
-// FIXME: This is terrible, we need this for ::close.
-#if !defined(_MSC_VER) && !defined(__MINGW32__)
-#include <unistd.h>
-#include <sys/uio.h>
-#else
-#include <io.h>
-#endif
 using namespace clang;
 
-#if defined(_MSC_VER)
-#define S_ISDIR(s) ((_S_IFDIR & s) !=0)
-#endif
-
 void FileSystemStatCache::anchor() { }
 
 static void copyStatusToFileData(const vfs::Status &Status,
diff --git a/contrib/llvm/tools/clang/lib/Basic/IdentifierTable.cpp b/contrib/llvm/tools/clang/lib/Basic/IdentifierTable.cpp
index 4e06352..b295008 100644
--- a/contrib/llvm/tools/clang/lib/Basic/IdentifierTable.cpp
+++ b/contrib/llvm/tools/clang/lib/Basic/IdentifierTable.cpp
@@ -16,6 +16,7 @@
 #include "clang/Basic/IdentifierTable.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/OperatorKinds.h"
+#include "clang/Basic/Specifiers.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/SmallString.h"
@@ -645,3 +646,17 @@ const char *clang::getOperatorSpelling(OverloadedOperatorKind Operator) {
 
   llvm_unreachable("Invalid OverloadedOperatorKind!");
 }
+
+StringRef clang::getNullabilitySpelling(NullabilityKind kind) {
+  switch (kind) {
+  case NullabilityKind::NonNull:
+    return "__nonnull";
+
+  case NullabilityKind::Nullable:
+    return "__nullable";
+
+  case NullabilityKind::Unspecified:
+    return "__null_unspecified";
+  }
+  llvm_unreachable("Unknown nullability kind.");
+}
diff --git a/contrib/llvm/tools/clang/lib/Basic/OpenMPKinds.cpp b/contrib/llvm/tools/clang/lib/Basic/OpenMPKinds.cpp
index b83a069..b2798b7 100644
--- a/contrib/llvm/tools/clang/lib/Basic/OpenMPKinds.cpp
+++ b/contrib/llvm/tools/clang/lib/Basic/OpenMPKinds.cpp
@@ -332,6 +332,7 @@ bool clang::isAllowedClauseForDirective(OpenMPDirectiveKind DKind,
   case OMPD_taskyield:
   case OMPD_barrier:
   case OMPD_taskwait:
+  case OMPD_taskgroup:
   case OMPD_ordered:
     break;
   }
diff --git a/contrib/llvm/tools/clang/lib/Basic/Sanitizers.cpp b/contrib/llvm/tools/clang/lib/Basic/Sanitizers.cpp
index 8c4884b..d3676b6 100644
--- a/contrib/llvm/tools/clang/lib/Basic/Sanitizers.cpp
+++ b/contrib/llvm/tools/clang/lib/Basic/Sanitizers.cpp
@@ -25,6 +25,10 @@ bool SanitizerSet::has(SanitizerMask K) const {
   return Mask & K;
 }
 
+bool SanitizerSet::hasOneOf(SanitizerMask K) const {
+  return Mask & K;
+}
+
 void SanitizerSet::set(SanitizerMask K, bool Value) {
   assert(llvm::countPopulation(K) == 1);
   Mask = Value ? (Mask | K) : (Mask & ~K);
diff --git a/contrib/llvm/tools/clang/lib/Basic/Targets.cpp b/contrib/llvm/tools/clang/lib/Basic/Targets.cpp
index c0c6924..076b04b 100644
--- a/contrib/llvm/tools/clang/lib/Basic/Targets.cpp
+++ b/contrib/llvm/tools/clang/lib/Basic/Targets.cpp
@@ -992,6 +992,12 @@ public:
   bool hasSjLjLowering() const override {
     return true;
   }
+
+  bool useFloat128ManglingForLongDouble() const override {
+    return LongDoubleWidth == 128 &&
+           LongDoubleFormat == &llvm::APFloat::PPCDoubleDouble &&
+           getTriple().isOSBinFormatELF();
+  }
 };
 
 const Builtin::Info PPCTargetInfo::BuiltinInfo[] = {
@@ -1661,7 +1667,7 @@ public:
     }
   };
 
-static const unsigned R600AddrSpaceMap[] = {
+static const unsigned AMDGPUAddrSpaceMap[] = {
   1,    // opencl_global
   3,    // opencl_local
   2,    // opencl_constant
@@ -1687,11 +1693,11 @@ static const char *DescriptionStringSI =
   "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
   "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
 
-class R600TargetInfo : public TargetInfo {
+class AMDGPUTargetInfo : public TargetInfo {
   static const Builtin::Info BuiltinInfo[];
   static const char * const GCCRegNames[];
 
-  /// \brief The GPU profiles supported by the R600 target.
+  /// \brief The GPU profiles supported by the AMDGPU target.
   enum GPUKind {
     GK_NONE,
     GK_R600,
@@ -1703,7 +1709,8 @@ class R600TargetInfo : public TargetInfo {
     GK_NORTHERN_ISLANDS,
     GK_CAYMAN,
     GK_SOUTHERN_ISLANDS,
-    GK_SEA_ISLANDS
+    GK_SEA_ISLANDS,
+    GK_VOLCANIC_ISLANDS
   } GPU;
 
   bool hasFP64:1;
@@ -1711,8 +1718,8 @@ class R600TargetInfo : public TargetInfo {
   bool hasLDEXPF:1;
 
 public:
-  R600TargetInfo(const llvm::Triple &Triple)
-      : TargetInfo(Triple) {
+  AMDGPUTargetInfo(const llvm::Triple &Triple)
+    : TargetInfo(Triple) {
 
     if (Triple.getArch() == llvm::Triple::amdgcn) {
       DescriptionString = DescriptionStringSI;
@@ -1727,7 +1734,7 @@ public:
       hasFMAF = false;
       hasLDEXPF = false;
     }
-    AddrSpaceMap = &R600AddrSpaceMap;
+    AddrSpaceMap = &AMDGPUAddrSpaceMap;
     UseAddrSpaceMapMangling = true;
   }
 
@@ -1766,7 +1773,7 @@ public:
   void getTargetBuiltins(const Builtin::Info *&Records,
                          unsigned &NumRecords) const override {
     Records = BuiltinInfo;
-    NumRecords = clang::R600::LastTSBuiltin - Builtin::FirstTSBuiltin;
+    NumRecords = clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin;
   }
 
   void getTargetDefines(const LangOptions &Opts,
@@ -1822,6 +1829,9 @@ public:
       .Case("kaveri",   GK_SEA_ISLANDS)
       .Case("hawaii",   GK_SEA_ISLANDS)
       .Case("mullins",  GK_SEA_ISLANDS)
+      .Case("tonga",    GK_VOLCANIC_ISLANDS)
+      .Case("iceland",  GK_VOLCANIC_ISLANDS)
+      .Case("carrizo",  GK_VOLCANIC_ISLANDS)
       .Default(GK_NONE);
 
     if (GPU == GK_NONE) {
@@ -1851,6 +1861,7 @@ public:
       break;
     case GK_SOUTHERN_ISLANDS:
     case GK_SEA_ISLANDS:
+    case GK_VOLCANIC_ISLANDS:
       DescriptionString = DescriptionStringSI;
       hasFP64 = true;
       hasFMAF = true;
@@ -1862,12 +1873,12 @@ public:
   }
 };
 
-const Builtin::Info R600TargetInfo::BuiltinInfo[] = {
+const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
 #define BUILTIN(ID, TYPE, ATTRS)                \
   { #ID, TYPE, ATTRS, 0, ALL_LANGUAGES },
-#include "clang/Basic/BuiltinsR600.def"
+#include "clang/Basic/BuiltinsAMDGPU.def"
 };
-const char * const R600TargetInfo::GCCRegNames[] = {
+const char * const AMDGPUTargetInfo::GCCRegNames[] = {
   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
   "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
   "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
@@ -1920,8 +1931,8 @@ const char * const R600TargetInfo::GCCRegNames[] = {
   "vcc_lo", "vcc_hi", "flat_scr_lo", "flat_scr_hi"
 };
 
-void R600TargetInfo::getGCCRegNames(const char * const *&Names,
-                                    unsigned &NumNames) const {
+void AMDGPUTargetInfo::getGCCRegNames(const char * const *&Names,
+                                      unsigned &NumNames) const {
   Names = GCCRegNames;
   NumNames = llvm::array_lengthof(GCCRegNames);
 }
@@ -5688,6 +5699,10 @@ public:
       return "vector";
     return "";
   }
+
+  bool useFloat128ManglingForLongDouble() const override {
+    return true;
+  }
 };
 
 const Builtin::Info SystemZTargetInfo::BuiltinInfo[] = {
@@ -5890,6 +5905,60 @@ validateAsmConstraint(const char *&Name,
                           unsigned &NumAliases) const override {}
   };
 
+class BPFTargetInfo : public TargetInfo {
+public:
+  BPFTargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
+    LongWidth = LongAlign = PointerWidth = PointerAlign = 64;
+    SizeType    = UnsignedLong;
+    PtrDiffType = SignedLong;
+    IntPtrType  = SignedLong;
+    IntMaxType  = SignedLong;
+    Int64Type   = SignedLong;
+    RegParmMax = 5;
+    if (Triple.getArch() == llvm::Triple::bpfeb) {
+      BigEndian = true;
+      DescriptionString = "E-m:e-p:64:64-i64:64-n32:64-S128";
+    } else {
+      BigEndian = false;
+      DescriptionString = "e-m:e-p:64:64-i64:64-n32:64-S128";
+    }
+    MaxAtomicPromoteWidth = 64;
+    MaxAtomicInlineWidth = 64;
+    TLSSupported = false;
+  }
+  void getTargetDefines(const LangOptions &Opts,
+                        MacroBuilder &Builder) const override {
+    DefineStd(Builder, "bpf", Opts);
+    Builder.defineMacro("__BPF__");
+  }
+  bool hasFeature(StringRef Feature) const override {
+    return Feature == "bpf";
+  }
+
+  void getTargetBuiltins(const Builtin::Info *&Records,
+                         unsigned &NumRecords) const override {}
+  const char *getClobbers() const override {
+    return "";
+  }
+  BuiltinVaListKind getBuiltinVaListKind() const override {
+    return TargetInfo::VoidPtrBuiltinVaList;
+  }
+  void getGCCRegNames(const char * const *&Names,
+                      unsigned &NumNames) const override {
+    Names = nullptr;
+    NumNames = 0;
+  }
+  bool validateAsmConstraint(const char *&Name,
+                             TargetInfo::ConstraintInfo &info) const override {
+    return true;
+  }
+  void getGCCRegAliases(const GCCRegAlias *&Aliases,
+                        unsigned &NumAliases) const override {
+    Aliases = nullptr;
+    NumAliases = 0;
+  }
+};
+
 class MipsTargetInfoBase : public TargetInfo {
   virtual void setDescriptionString() = 0;
 
@@ -6888,6 +6957,10 @@ static TargetInfo *AllocateTarget(const llvm::Triple &Triple) {
       return new ARMbeTargetInfo(Triple);
     }
 
+  case llvm::Triple::bpfeb:
+  case llvm::Triple::bpfel:
+    return new BPFTargetInfo(Triple);
+
   case llvm::Triple::msp430:
     return new MSP430TargetInfo(Triple);
 
@@ -7015,7 +7088,7 @@ static TargetInfo *AllocateTarget(const llvm::Triple &Triple) {
 
   case llvm::Triple::amdgcn:
   case llvm::Triple::r600:
-    return new R600TargetInfo(Triple);
+    return new AMDGPUTargetInfo(Triple);
 
   case llvm::Triple::sparc:
     switch (os) {
@@ -7080,6 +7153,8 @@ static TargetInfo *AllocateTarget(const llvm::Triple &Triple) {
       return new DarwinI386TargetInfo(Triple);
 
     switch (os) {
+    case llvm::Triple::CloudABI:
+      return new CloudABITargetInfo<X86_32TargetInfo>(Triple);
     case llvm::Triple::Linux: {
       switch (Triple.getEnvironment()) {
       default:
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp b/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp
index 8cece0d..f5edea7 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp
@@ -201,8 +201,14 @@ static void addSanitizerCoveragePass(const PassManagerBuilder &Builder,
 
 static void addAddressSanitizerPasses(const PassManagerBuilder &Builder,
                                       legacy::PassManagerBase &PM) {
-  PM.add(createAddressSanitizerFunctionPass());
-  PM.add(createAddressSanitizerModulePass());
+  PM.add(createAddressSanitizerFunctionPass(/*CompileKernel*/false));
+  PM.add(createAddressSanitizerModulePass(/*CompileKernel*/false));
+}
+
+static void addKernelAddressSanitizerPasses(const PassManagerBuilder &Builder,
+                                            legacy::PassManagerBase &PM) {
+  PM.add(createAddressSanitizerFunctionPass(/*CompileKernel*/true));
+  PM.add(createAddressSanitizerModulePass(/*CompileKernel*/true));
 }
 
 static void addMemorySanitizerPass(const PassManagerBuilder &Builder,
@@ -283,7 +289,6 @@ void EmitAssemblyHelper::CreatePasses() {
   PMBuilder.SLPVectorize = CodeGenOpts.VectorizeSLP;
   PMBuilder.LoopVectorize = CodeGenOpts.VectorizeLoop;
 
-  PMBuilder.DisableTailCalls = CodeGenOpts.DisableTailCalls;
   PMBuilder.DisableUnitAtATime = !CodeGenOpts.UnitAtATime;
   PMBuilder.DisableUnrollLoops = !CodeGenOpts.UnrollLoops;
   PMBuilder.MergeFunctions = CodeGenOpts.MergeFunctions;
@@ -329,6 +334,13 @@ void EmitAssemblyHelper::CreatePasses() {
                            addAddressSanitizerPasses);
   }
 
+  if (LangOpts.Sanitize.has(SanitizerKind::KernelAddress)) {
+    PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast,
+                           addKernelAddressSanitizerPasses);
+    PMBuilder.addExtension(PassManagerBuilder::EP_EnabledOnOptLevel0,
+                           addKernelAddressSanitizerPasses);
+  }
+
   if (LangOpts.Sanitize.has(SanitizerKind::Memory)) {
     PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast,
                            addMemorySanitizerPass);
@@ -478,6 +490,9 @@ TargetMachine *EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
 
   llvm::TargetOptions Options;
 
+  if (!TargetOpts.Reciprocals.empty())
+    Options.Reciprocals = TargetRecip(TargetOpts.Reciprocals);
+
   Options.ThreadModel =
     llvm::StringSwitch<llvm::ThreadModel::Model>(CodeGenOpts.ThreadModel)
       .Case("posix", llvm::ThreadModel::POSIX)
@@ -521,7 +536,6 @@ TargetMachine *EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
   Options.NoZerosInBSS = CodeGenOpts.NoZeroInitializedInBSS;
   Options.UnsafeFPMath = CodeGenOpts.UnsafeFPMath;
   Options.StackAlignmentOverride = CodeGenOpts.StackAlignment;
-  Options.DisableTailCalls = CodeGenOpts.DisableTailCalls;
   Options.TrapFuncName = CodeGenOpts.TrapFuncName;
   Options.PositionIndependentExecutable = LangOpts.PIELevel != 0;
   Options.FunctionSections = CodeGenOpts.FunctionSections;
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp
index d86534d..a14daac 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp
@@ -1877,7 +1877,7 @@ Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
     return EmitPPCBuiltinExpr(BuiltinID, E);
   case llvm::Triple::r600:
   case llvm::Triple::amdgcn:
-    return EmitR600BuiltinExpr(BuiltinID, E);
+    return EmitAMDGPUBuiltinExpr(BuiltinID, E);
   case llvm::Triple::systemz:
     return EmitSystemZBuiltinExpr(BuiltinID, E);
   default:
@@ -3279,6 +3279,66 @@ Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
   }
 }
 
+// Generates the IR for the read/write special register builtin,
+// ValueType is the type of the value that is to be written or read,
+// RegisterType is the type of the register being written to or read from.
+static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
+                                         const CallExpr *E,
+                                         llvm::Type *RegisterType,
+                                         llvm::Type *ValueType, bool IsRead) {
+  // write and register intrinsics only support 32 and 64 bit operations.
+  assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
+          && "Unsupported size for register.");
+
+  CodeGen::CGBuilderTy &Builder = CGF.Builder;
+  CodeGen::CodeGenModule &CGM = CGF.CGM;
+  LLVMContext &Context = CGM.getLLVMContext();
+
+  const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
+  StringRef SysReg = cast<StringLiteral>(SysRegStrExpr)->getString();
+
+  llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
+  llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
+  llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
+
+  llvm::Type *Types[] = { RegisterType };
+
+  bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
+  assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
+            && "Can't fit 64-bit value in 32-bit register");
+
+  if (IsRead) {
+    llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
+    llvm::Value *Call = Builder.CreateCall(F, Metadata);
+
+    if (MixedTypes)
+      // Read into 64 bit register and then truncate result to 32 bit.
+      return Builder.CreateTrunc(Call, ValueType);
+
+    if (ValueType->isPointerTy())
+      // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
+      return Builder.CreateIntToPtr(Call, ValueType);
+
+    return Call;
+  }
+
+  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
+  llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
+  if (MixedTypes) {
+    // Extend 32 bit write value to 64 bit to pass to write.
+    ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
+    return Builder.CreateCall(F, { Metadata, ArgValue });
+  }
+
+  if (ValueType->isPointerTy()) {
+    // Have VoidPtrTy ArgValue but want to return an i32/i64.
+    ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
+    return Builder.CreateCall(F, { Metadata, ArgValue });
+  }
+
+  return Builder.CreateCall(F, { Metadata, ArgValue });
+}
+
 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
                                            const CallExpr *E) {
   if (auto Hint = GetValueForARMHint(BuiltinID))
@@ -3491,6 +3551,37 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
     }
   }
 
+  if (BuiltinID == ARM::BI__builtin_arm_rsr ||
+      BuiltinID == ARM::BI__builtin_arm_rsr64 ||
+      BuiltinID == ARM::BI__builtin_arm_rsrp ||
+      BuiltinID == ARM::BI__builtin_arm_wsr ||
+      BuiltinID == ARM::BI__builtin_arm_wsr64 ||
+      BuiltinID == ARM::BI__builtin_arm_wsrp) {
+
+    bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
+                  BuiltinID == ARM::BI__builtin_arm_rsr64 ||
+                  BuiltinID == ARM::BI__builtin_arm_rsrp;
+
+    bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
+                            BuiltinID == ARM::BI__builtin_arm_wsrp;
+
+    bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
+                   BuiltinID == ARM::BI__builtin_arm_wsr64;
+
+    llvm::Type *ValueType;
+    llvm::Type *RegisterType;
+    if (IsPointerBuiltin) {
+      ValueType = VoidPtrTy;
+      RegisterType = Int32Ty;
+    } else if (Is64Bit) {
+      ValueType = RegisterType = Int64Ty;
+    } else {
+      ValueType = RegisterType = Int32Ty;
+    }
+
+    return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
+  }
+
   // Find out if any arguments are required to be integer constant
   // expressions.
   unsigned ICEArguments = 0;
@@ -4239,6 +4330,36 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     return Builder.CreateCall(F, {Arg0, Arg1});
   }
 
+  if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
+      BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
+      BuiltinID == AArch64::BI__builtin_arm_rsrp ||
+      BuiltinID == AArch64::BI__builtin_arm_wsr ||
+      BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
+      BuiltinID == AArch64::BI__builtin_arm_wsrp) {
+
+    bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
+                  BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
+                  BuiltinID == AArch64::BI__builtin_arm_rsrp;
+
+    bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
+                            BuiltinID == AArch64::BI__builtin_arm_wsrp;
+
+    bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr &&
+                   BuiltinID != AArch64::BI__builtin_arm_wsr;
+
+    llvm::Type *ValueType;
+    llvm::Type *RegisterType = Int64Ty;
+    if (IsPointerBuiltin) {
+      ValueType = VoidPtrTy;
+    } else if (Is64Bit) {
+      ValueType = Int64Ty;
+    } else {
+      ValueType = Int32Ty;
+    }
+
+    return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
+  }
+
   // Find out if any arguments are required to be integer constant
   // expressions.
   unsigned ICEArguments = 0;
@@ -6427,11 +6548,11 @@ static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
   return CGF.Builder.CreateCall(F, {Src0, Src1});
 }
 
-Value *CodeGenFunction::EmitR600BuiltinExpr(unsigned BuiltinID,
-                                            const CallExpr *E) {
+Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
+                                              const CallExpr *E) {
   switch (BuiltinID) {
-  case R600::BI__builtin_amdgpu_div_scale:
-  case R600::BI__builtin_amdgpu_div_scalef: {
+  case AMDGPU::BI__builtin_amdgpu_div_scale:
+  case AMDGPU::BI__builtin_amdgpu_div_scalef: {
     // Translate from the intrinsics's struct return to the builtin's out
     // argument.
 
@@ -6458,8 +6579,8 @@ Value *CodeGenFunction::EmitR600BuiltinExpr(unsigned BuiltinID,
     FlagStore->setAlignment(FlagOutPtr.second);
     return Result;
   }
-  case R600::BI__builtin_amdgpu_div_fmas:
-  case R600::BI__builtin_amdgpu_div_fmasf: {
+  case AMDGPU::BI__builtin_amdgpu_div_fmas:
+  case AMDGPU::BI__builtin_amdgpu_div_fmasf: {
     llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
     llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
     llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
@@ -6470,26 +6591,26 @@ Value *CodeGenFunction::EmitR600BuiltinExpr(unsigned BuiltinID,
     llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
     return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
   }
-  case R600::BI__builtin_amdgpu_div_fixup:
-  case R600::BI__builtin_amdgpu_div_fixupf:
+  case AMDGPU::BI__builtin_amdgpu_div_fixup:
+  case AMDGPU::BI__builtin_amdgpu_div_fixupf:
     return emitTernaryFPBuiltin(*this, E, Intrinsic::AMDGPU_div_fixup);
-  case R600::BI__builtin_amdgpu_trig_preop:
-  case R600::BI__builtin_amdgpu_trig_preopf:
+  case AMDGPU::BI__builtin_amdgpu_trig_preop:
+  case AMDGPU::BI__builtin_amdgpu_trig_preopf:
     return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_trig_preop);
-  case R600::BI__builtin_amdgpu_rcp:
-  case R600::BI__builtin_amdgpu_rcpf:
+  case AMDGPU::BI__builtin_amdgpu_rcp:
+  case AMDGPU::BI__builtin_amdgpu_rcpf:
     return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rcp);
-  case R600::BI__builtin_amdgpu_rsq:
-  case R600::BI__builtin_amdgpu_rsqf:
+  case AMDGPU::BI__builtin_amdgpu_rsq:
+  case AMDGPU::BI__builtin_amdgpu_rsqf:
     return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq);
-  case R600::BI__builtin_amdgpu_rsq_clamped:
-  case R600::BI__builtin_amdgpu_rsq_clampedf:
+  case AMDGPU::BI__builtin_amdgpu_rsq_clamped:
+  case AMDGPU::BI__builtin_amdgpu_rsq_clampedf:
     return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq_clamped);
-  case R600::BI__builtin_amdgpu_ldexp:
-  case R600::BI__builtin_amdgpu_ldexpf:
+  case AMDGPU::BI__builtin_amdgpu_ldexp:
+  case AMDGPU::BI__builtin_amdgpu_ldexpf:
     return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_ldexp);
-  case R600::BI__builtin_amdgpu_class:
-  case R600::BI__builtin_amdgpu_classf:
+  case AMDGPU::BI__builtin_amdgpu_class:
+  case AMDGPU::BI__builtin_amdgpu_classf:
     return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_class);
    default:
     return nullptr;
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.h b/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.h
index 2c73921..b6b4ee6 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.h
@@ -366,7 +366,8 @@ public:
   virtual llvm::Value *getVirtualFunctionPointer(CodeGenFunction &CGF,
                                                  GlobalDecl GD,
                                                  llvm::Value *This,
-                                                 llvm::Type *Ty) = 0;
+                                                 llvm::Type *Ty,
+                                                 SourceLocation Loc) = 0;
 
   /// Emit the ABI-specific virtual destructor call.
   virtual llvm::Value *
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp
index e77539c..58ef171 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp
@@ -1465,6 +1465,8 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
       FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf");
     }
 
+    FuncAttrs.addAttribute("disable-tail-calls",
+                           llvm::toStringRef(CodeGenOpts.DisableTailCalls));
     FuncAttrs.addAttribute("less-precise-fpmad",
                            llvm::toStringRef(CodeGenOpts.LessPreciseFPMAD));
     FuncAttrs.addAttribute("no-infs-fp-math",
@@ -1481,24 +1483,62 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
     if (!CodeGenOpts.StackRealignment)
       FuncAttrs.addAttribute("no-realign-stack");
 
-    // Add target-cpu and target-features work if they differ from the defaults.
-    std::string &CPU = getTarget().getTargetOpts().CPU;
-    if (CPU != "")
-      FuncAttrs.addAttribute("target-cpu", CPU);
+    // Add target-cpu and target-features attributes to functions. If
+    // we have a decl for the function and it has a target attribute then
+    // parse that and add it to the feature set.
+    StringRef TargetCPU = getTarget().getTargetOpts().CPU;
 
     // TODO: Features gets us the features on the command line including
     // feature dependencies. For canonicalization purposes we might want to
-    // avoid putting features in the target-features set if we know it'll be one
-    // of the default features in the backend, e.g. corei7-avx and +avx or figure
-    // out non-explicit dependencies.
-    std::vector<std::string> &Features = getTarget().getTargetOpts().Features;
+    // avoid putting features in the target-features set if we know it'll be
+    // one of the default features in the backend, e.g. corei7-avx and +avx or
+    // figure out non-explicit dependencies.
+    std::vector<std::string> Features(getTarget().getTargetOpts().Features);
+
+    // TODO: The target attribute complicates this further by allowing multiple
+    // additional features to be tacked on to the feature string for a
+    // particular function. For now we simply append to the set of features and
+    // let backend resolution fix them up.
+    const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl);
+    if (FD) {
+      if (const TargetAttr *TD = FD->getAttr<TargetAttr>()) {
+        StringRef FeaturesStr = TD->getFeatures();
+        SmallVector<StringRef, 1> AttrFeatures;
+        FeaturesStr.split(AttrFeatures, ",");
+
+        // Grab the various features and prepend a "+" to turn on the feature to
+        // the backend and add them to our existing set of Features.
+        for (auto &Feature : AttrFeatures) {
+          // While we're here iterating check for a different target cpu.
+          if (Feature.startswith("arch="))
+            TargetCPU = Feature.split("=").second;
+	  else if (Feature.startswith("tune="))
+	    // We don't support cpu tuning this way currently.
+	    ;
+	  else if (Feature.startswith("fpmath="))
+	    // TODO: Support the fpmath option this way. It will require checking
+	    // overall feature validity for the function with the rest of the
+	    // attributes on the function.
+	    ;
+	  else if (Feature.startswith("mno-"))
+            Features.push_back("-" + Feature.split("-").second.str());
+          else
+            Features.push_back("+" + Feature.str());
+	}
+      }
+    }
+
+    // Now add the target-cpu and target-features to the function.
+    if (TargetCPU != "")
+      FuncAttrs.addAttribute("target-cpu", TargetCPU);
     if (!Features.empty()) {
-      std::stringstream S;
+      std::stringstream TargetFeatures;
       std::copy(Features.begin(), Features.end(),
-                std::ostream_iterator<std::string>(S, ","));
+                std::ostream_iterator<std::string>(TargetFeatures, ","));
+
       // The drop_back gets rid of the trailing space.
       FuncAttrs.addAttribute("target-features",
-                             StringRef(S.str()).drop_back(1));
+                             StringRef(TargetFeatures.str()).drop_back(1));
     }
   }
 
@@ -2231,11 +2271,10 @@ static llvm::StoreInst *findDominatingStoreToReturnValue(CodeGenFunction &CGF) {
         if (Intrinsic->getIntrinsicID() == llvm::Intrinsic::lifetime_end) {
           const llvm::Value *CastAddr = Intrinsic->getArgOperand(1);
           ++II;
-          if (isa<llvm::BitCastInst>(&*II)) {
-            if (CastAddr == &*II) {
-              continue;
-            }
-          }
+          if (II == IE)
+            break;
+          if (isa<llvm::BitCastInst>(&*II) && (CastAddr == &*II))
+            continue;
         }
       }
       I = &*II;
@@ -2571,7 +2610,7 @@ static void emitWritebackArg(CodeGenFunction &CGF, CallArgList &args,
   LValue srcLV;
 
   // Make an optimistic effort to emit the address as an l-value.
-  // This can fail if the the argument expression is more complicated.
+  // This can fail if the argument expression is more complicated.
   if (const Expr *lvExpr = maybeGetUnaryAddrOfOperand(CRE->getSubExpr())) {
     srcLV = CGF.EmitLValue(lvExpr);
 
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGClass.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGClass.cpp
index cd75da2..1d2b787 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGClass.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGClass.cpp
@@ -370,25 +370,25 @@ namespace {
 
   /// A visitor which checks whether an initializer uses 'this' in a
   /// way which requires the vtable to be properly set.
-  struct DynamicThisUseChecker : EvaluatedExprVisitor<DynamicThisUseChecker> {
-    typedef EvaluatedExprVisitor<DynamicThisUseChecker> super;
+  struct DynamicThisUseChecker : ConstEvaluatedExprVisitor<DynamicThisUseChecker> {
+    typedef ConstEvaluatedExprVisitor<DynamicThisUseChecker> super;
 
     bool UsesThis;
 
-    DynamicThisUseChecker(ASTContext &C) : super(C), UsesThis(false) {}
+    DynamicThisUseChecker(const ASTContext &C) : super(C), UsesThis(false) {}
 
     // Black-list all explicit and implicit references to 'this'.
     //
     // Do we need to worry about external references to 'this' derived
     // from arbitrary code?  If so, then anything which runs arbitrary
     // external code might potentially access the vtable.
-    void VisitCXXThisExpr(CXXThisExpr *E) { UsesThis = true; }
+    void VisitCXXThisExpr(const CXXThisExpr *E) { UsesThis = true; }
   };
 }
 
 static bool BaseInitializerUsesThis(ASTContext &C, const Expr *Init) {
   DynamicThisUseChecker Checker(C);
-  Checker.Visit(const_cast<Expr*>(Init));
+  Checker.Visit(Init);
   return Checker.UsesThis;
 }
 
@@ -2134,17 +2134,21 @@ LeastDerivedClassWithSameLayout(const CXXRecordDecl *RD) {
 }
 
 void CodeGenFunction::EmitVTablePtrCheckForCall(const CXXMethodDecl *MD,
-                                                llvm::Value *VTable) {
+                                                llvm::Value *VTable,
+                                                CFITypeCheckKind TCK,
+                                                SourceLocation Loc) {
   const CXXRecordDecl *ClassDecl = MD->getParent();
   if (!SanOpts.has(SanitizerKind::CFICastStrict))
     ClassDecl = LeastDerivedClassWithSameLayout(ClassDecl);
 
-  EmitVTablePtrCheck(ClassDecl, VTable);
+  EmitVTablePtrCheck(ClassDecl, VTable, TCK, Loc);
 }
 
 void CodeGenFunction::EmitVTablePtrCheckForCast(QualType T,
                                                 llvm::Value *Derived,
-                                                bool MayBeNull) {
+                                                bool MayBeNull,
+                                                CFITypeCheckKind TCK,
+                                                SourceLocation Loc) {
   if (!getLangOpts().CPlusPlus)
     return;
 
@@ -2184,7 +2188,7 @@ void CodeGenFunction::EmitVTablePtrCheckForCast(QualType T,
   }
 
   llvm::Value *VTable = GetVTablePtr(Derived, Int8PtrTy);
-  EmitVTablePtrCheck(ClassDecl, VTable);
+  EmitVTablePtrCheck(ClassDecl, VTable, TCK, Loc);
 
   if (MayBeNull) {
     Builder.CreateBr(ContBlock);
@@ -2193,11 +2197,15 @@ void CodeGenFunction::EmitVTablePtrCheckForCast(QualType T,
 }
 
 void CodeGenFunction::EmitVTablePtrCheck(const CXXRecordDecl *RD,
-                                         llvm::Value *VTable) {
+                                         llvm::Value *VTable,
+                                         CFITypeCheckKind TCK,
+                                         SourceLocation Loc) {
   // FIXME: Add blacklisting scheme.
   if (RD->isInStdNamespace())
     return;
 
+  SanitizerScope SanScope(this);
+
   std::string OutName;
   llvm::raw_string_ostream Out(OutName);
   CGM.getCXXABI().getMangleContext().mangleCXXVTableBitSet(RD, Out);
@@ -2205,20 +2213,34 @@ void CodeGenFunction::EmitVTablePtrCheck(const CXXRecordDecl *RD,
   llvm::Value *BitSetName = llvm::MetadataAsValue::get(
       getLLVMContext(), llvm::MDString::get(getLLVMContext(), Out.str()));
 
-  llvm::Value *BitSetTest = Builder.CreateCall(
-      CGM.getIntrinsic(llvm::Intrinsic::bitset_test),
-      {Builder.CreateBitCast(VTable, CGM.Int8PtrTy), BitSetName});
+  llvm::Value *CastedVTable = Builder.CreateBitCast(VTable, Int8PtrTy);
+  llvm::Value *BitSetTest =
+      Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::bitset_test),
+                         {CastedVTable, BitSetName});
 
-  llvm::BasicBlock *ContBlock = createBasicBlock("vtable.check.cont");
-  llvm::BasicBlock *TrapBlock = createBasicBlock("vtable.check.trap");
-
-  Builder.CreateCondBr(BitSetTest, ContBlock, TrapBlock);
-
-  EmitBlock(TrapBlock);
-  Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::trap), {});
-  Builder.CreateUnreachable();
+  SanitizerMask M;
+  switch (TCK) {
+  case CFITCK_VCall:
+    M = SanitizerKind::CFIVCall;
+    break;
+  case CFITCK_NVCall:
+    M = SanitizerKind::CFINVCall;
+    break;
+  case CFITCK_DerivedCast:
+    M = SanitizerKind::CFIDerivedCast;
+    break;
+  case CFITCK_UnrelatedCast:
+    M = SanitizerKind::CFIUnrelatedCast;
+    break;
+  }
 
-  EmitBlock(ContBlock);
+  llvm::Constant *StaticData[] = {
+    EmitCheckSourceLocation(Loc),
+    EmitCheckTypeDescriptor(QualType(RD->getTypeForDecl(), 0)),
+    llvm::ConstantInt::get(Int8Ty, TCK),
+  };
+  EmitCheck(std::make_pair(BitSetTest, M), "cfi_bad_type", StaticData,
+            CastedVTable);
 }
 
 // FIXME: Ideally Expr::IgnoreParenNoopCasts should do this, but it doesn't do
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp
index 48458db..a20e39f 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -590,18 +590,29 @@ llvm::DIType *CGDebugInfo::CreateType(const PointerType *Ty,
                                Ty->getPointeeType(), Unit);
 }
 
+/// \return whether a C++ mangling exists for the type defined by TD.
+static bool hasCXXMangling(const TagDecl *TD, llvm::DICompileUnit *TheCU) {
+  switch (TheCU->getSourceLanguage()) {
+  case llvm::dwarf::DW_LANG_C_plus_plus:
+    return true;
+  case llvm::dwarf::DW_LANG_ObjC_plus_plus:
+    return isa<CXXRecordDecl>(TD) || isa<EnumDecl>(TD);
+  default:
+    return false;
+  }
+}
+
 /// In C++ mode, types have linkage, so we can rely on the ODR and
 /// on their mangled names, if they're external.
 static SmallString<256> getUniqueTagTypeName(const TagType *Ty,
                                              CodeGenModule &CGM,
                                              llvm::DICompileUnit *TheCU) {
   SmallString<256> FullName;
-  // FIXME: ODR should apply to ObjC++ exactly the same wasy it does to C++.
-  // For now, only apply ODR with C++.
   const TagDecl *TD = Ty->getDecl();
-  if (TheCU->getSourceLanguage() != llvm::dwarf::DW_LANG_C_plus_plus ||
-      !TD->isExternallyVisible())
+
+  if (!hasCXXMangling(TD, TheCU) || !TD->isExternallyVisible())
     return FullName;
+
   // Microsoft Mangler does not have support for mangleCXXRTTIName yet.
   if (CGM.getTarget().getCXXABI().isMicrosoft())
     return FullName;
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGDecl.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGDecl.cpp
index 579a041..07dbce4 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGDecl.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGDecl.cpp
@@ -864,20 +864,17 @@ llvm::Value *CodeGenFunction::EmitLifetimeStart(uint64_t Size,
     return nullptr;
 
   llvm::Value *SizeV = llvm::ConstantInt::get(Int64Ty, Size);
-  llvm::Value *Args[] = {
-      SizeV,
-      new llvm::BitCastInst(Addr, Int8PtrTy, "", Builder.GetInsertBlock())};
-  llvm::CallInst *C = llvm::CallInst::Create(CGM.getLLVMLifetimeStartFn(), Args,
-                                             "", Builder.GetInsertBlock());
+  Addr = Builder.CreateBitCast(Addr, Int8PtrTy);
+  llvm::CallInst *C =
+      Builder.CreateCall(CGM.getLLVMLifetimeStartFn(), {SizeV, Addr});
   C->setDoesNotThrow();
   return SizeV;
 }
 
 void CodeGenFunction::EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr) {
-  llvm::Value *Args[] = {Size, new llvm::BitCastInst(Addr, Int8PtrTy, "",
-                                                     Builder.GetInsertBlock())};
-  llvm::CallInst *C = llvm::CallInst::Create(CGM.getLLVMLifetimeEndFn(), Args,
-                                             "", Builder.GetInsertBlock());
+  Addr = Builder.CreateBitCast(Addr, Int8PtrTy);
+  llvm::CallInst *C =
+      Builder.CreateCall(CGM.getLLVMLifetimeEndFn(), {Size, Addr});
   C->setDoesNotThrow();
 }
 
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGDeclCXX.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGDeclCXX.cpp
index 06d157b..00d6d5c 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGDeclCXX.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGDeclCXX.cpp
@@ -267,12 +267,15 @@ llvm::Function *CodeGenModule::CreateGlobalInitOrDestructFunction(
     Fn->setDoesNotThrow();
 
   if (!isInSanitizerBlacklist(Fn, Loc)) {
-    if (getLangOpts().Sanitize.has(SanitizerKind::Address))
+    if (getLangOpts().Sanitize.hasOneOf(SanitizerKind::Address |
+                                        SanitizerKind::KernelAddress))
       Fn->addFnAttr(llvm::Attribute::SanitizeAddress);
     if (getLangOpts().Sanitize.has(SanitizerKind::Thread))
       Fn->addFnAttr(llvm::Attribute::SanitizeThread);
     if (getLangOpts().Sanitize.has(SanitizerKind::Memory))
       Fn->addFnAttr(llvm::Attribute::SanitizeMemory);
+    if (getLangOpts().Sanitize.has(SanitizerKind::SafeStack))
+      Fn->addFnAttr(llvm::Attribute::SafeStack);
   }
 
   return Fn;
@@ -421,6 +424,7 @@ CodeGenModule::EmitCXXGlobalInitFunc() {
       CodeGenFunction(*this).GenerateCXXGlobalInitFunc(Fn, LocalCXXGlobalInits);
       AddGlobalCtor(Fn, Priority);
     }
+    PrioritizedCXXGlobalInits.clear();
   }
 
   SmallString<128> FileName;
@@ -448,7 +452,6 @@ CodeGenModule::EmitCXXGlobalInitFunc() {
   AddGlobalCtor(Fn);
 
   CXXGlobalInits.clear();
-  PrioritizedCXXGlobalInits.clear();
 }
 
 void CodeGenModule::EmitCXXGlobalDtorFunc() {
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGException.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGException.cpp
index d9a3f0b..4c85017 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGException.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGException.cpp
@@ -698,13 +698,15 @@ llvm::BasicBlock *CodeGenFunction::EmitLandingPad() {
 
   const EHPersonality &personality = EHPersonality::get(*this);
 
+  if (!CurFn->hasPersonalityFn())
+    CurFn->setPersonalityFn(getOpaquePersonalityFn(CGM, personality));
+
   // Create and configure the landing pad.
   llvm::BasicBlock *lpad = createBasicBlock("lpad");
   EmitBlock(lpad);
 
-  llvm::LandingPadInst *LPadInst =
-    Builder.CreateLandingPad(llvm::StructType::get(Int8PtrTy, Int32Ty, nullptr),
-                             getOpaquePersonalityFn(CGM, personality), 0);
+  llvm::LandingPadInst *LPadInst = Builder.CreateLandingPad(
+      llvm::StructType::get(Int8PtrTy, Int32Ty, nullptr), 0);
 
   llvm::Value *LPadExn = Builder.CreateExtractValue(LPadInst, 0);
   Builder.CreateStore(LPadExn, getExceptionSlot());
@@ -1193,9 +1195,12 @@ llvm::BasicBlock *CodeGenFunction::getTerminateLandingPad() {
 
   // Tell the backend that this is a landing pad.
   const EHPersonality &Personality = EHPersonality::get(*this);
-  llvm::LandingPadInst *LPadInst =
-    Builder.CreateLandingPad(llvm::StructType::get(Int8PtrTy, Int32Ty, nullptr),
-                             getOpaquePersonalityFn(CGM, Personality), 0);
+
+  if (!CurFn->hasPersonalityFn())
+    CurFn->setPersonalityFn(getOpaquePersonalityFn(CGM, Personality));
+
+  llvm::LandingPadInst *LPadInst = Builder.CreateLandingPad(
+      llvm::StructType::get(Int8PtrTy, Int32Ty, nullptr), 0);
   LPadInst->addClause(getCatchAllValue(*this));
 
   llvm::Value *Exn = 0;
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp
index 1ed45a3..1a76afa 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp
@@ -2300,15 +2300,24 @@ void CodeGenFunction::EmitCheck(
 
   llvm::Value *FatalCond = nullptr;
   llvm::Value *RecoverableCond = nullptr;
+  llvm::Value *TrapCond = nullptr;
   for (int i = 0, n = Checked.size(); i < n; ++i) {
     llvm::Value *Check = Checked[i].first;
+    // -fsanitize-trap= overrides -fsanitize-recover=.
     llvm::Value *&Cond =
-        CGM.getCodeGenOpts().SanitizeRecover.has(Checked[i].second)
-            ? RecoverableCond
-            : FatalCond;
+        CGM.getCodeGenOpts().SanitizeTrap.has(Checked[i].second)
+            ? TrapCond
+            : CGM.getCodeGenOpts().SanitizeRecover.has(Checked[i].second)
+                  ? RecoverableCond
+                  : FatalCond;
     Cond = Cond ? Builder.CreateAnd(Cond, Check) : Check;
   }
 
+  if (TrapCond)
+    EmitTrapCheck(TrapCond);
+  if (!FatalCond && !RecoverableCond)
+    return;
+
   llvm::Value *JointCond;
   if (FatalCond && RecoverableCond)
     JointCond = Builder.CreateAnd(FatalCond, RecoverableCond);
@@ -2326,15 +2335,6 @@ void CodeGenFunction::EmitCheck(
   }
 #endif
 
-  if (CGM.getCodeGenOpts().SanitizeUndefinedTrapOnError) {
-    assert(RecoverKind != CheckRecoverableKind::AlwaysRecoverable &&
-           "Runtime call required for AlwaysRecoverable kind!");
-    // Assume that -fsanitize-undefined-trap-on-error overrides
-    // -fsanitize-recover= options, as we can only print meaningful error
-    // message and recover if we have a runtime support.
-    return EmitTrapCheck(JointCond);
-  }
-
   llvm::BasicBlock *Cont = createBasicBlock("cont");
   llvm::BasicBlock *Handlers = createBasicBlock("handler." + CheckName);
   llvm::Instruction *Branch = Builder.CreateCondBr(JointCond, Cont, Handlers);
@@ -3035,7 +3035,8 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
                     Derived, E->getType());
 
     if (SanOpts.has(SanitizerKind::CFIDerivedCast))
-      EmitVTablePtrCheckForCast(E->getType(), Derived, /*MayBeNull=*/false);
+      EmitVTablePtrCheckForCast(E->getType(), Derived, /*MayBeNull=*/false,
+                                CFITCK_DerivedCast, E->getLocStart());
 
     return MakeAddrLValue(Derived, E->getType());
   }
@@ -3048,7 +3049,8 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
                                            ConvertType(CE->getTypeAsWritten()));
 
     if (SanOpts.has(SanitizerKind::CFIUnrelatedCast))
-      EmitVTablePtrCheckForCast(E->getType(), V, /*MayBeNull=*/false);
+      EmitVTablePtrCheckForCast(E->getType(), V, /*MayBeNull=*/false,
+                                CFITCK_UnrelatedCast, E->getLocStart());
 
     return MakeAddrLValue(V, E->getType());
   }
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp
index 8b1bc69..883b76b 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp
@@ -160,10 +160,12 @@ public:
     EmitAggLoadOfLValue(E);
   }
 
+  void VisitDesignatedInitUpdateExpr(DesignatedInitUpdateExpr *E);
   void VisitAbstractConditionalOperator(const AbstractConditionalOperator *CO);
   void VisitChooseExpr(const ChooseExpr *CE);
   void VisitInitListExpr(InitListExpr *E);
   void VisitImplicitValueInitExpr(ImplicitValueInitExpr *E);
+  void VisitNoInitExpr(NoInitExpr *E) { } // Do nothing.
   void VisitCXXDefaultArgExpr(CXXDefaultArgExpr *DAE) {
     Visit(DAE->getExpr());
   }
@@ -1056,6 +1058,9 @@ AggExprEmitter::EmitInitializationToLValue(Expr *E, LValue LV) {
     return;
   } else if (isa<ImplicitValueInitExpr>(E) || isa<CXXScalarValueInitExpr>(E)) {
     return EmitNullInitializationToLValue(LV);
+  } else if (isa<NoInitExpr>(E)) {
+    // Do nothing.
+    return;
   } else if (type->isReferenceType()) {
     RValue RV = CGF.EmitReferenceBindingToExpr(E);
     return CGF.EmitStoreThroughLValue(RV, LV);
@@ -1276,6 +1281,15 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
     cleanupDominator->eraseFromParent();
 }
 
+void AggExprEmitter::VisitDesignatedInitUpdateExpr(DesignatedInitUpdateExpr *E) {
+  AggValueSlot Dest = EnsureSlot(E->getType());
+
+  LValue DestLV = CGF.MakeAddrLValue(Dest.getAddr(), E->getType(),
+                                     Dest.getAlignment());
+  EmitInitializationToLValue(E->getBase(), DestLV);
+  VisitInitListExpr(E->getUpdater());
+}
+
 //===----------------------------------------------------------------------===//
 //                        Entry Points into this File
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprCXX.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprCXX.cpp
index 13dfbb3..f0f706d 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprCXX.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprCXX.cpp
@@ -254,12 +254,13 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
   if (const CXXConstructorDecl *Ctor = dyn_cast<CXXConstructorDecl>(MD)) {
     Callee = CGM.GetAddrOfFunction(GlobalDecl(Ctor, Ctor_Complete), Ty);
   } else if (UseVirtualCall) {
-    Callee = CGM.getCXXABI().getVirtualFunctionPointer(*this, MD, This, Ty);
+    Callee = CGM.getCXXABI().getVirtualFunctionPointer(*this, MD, This, Ty,
+                                                       CE->getLocStart());
   } else {
     if (SanOpts.has(SanitizerKind::CFINVCall) &&
         MD->getParent()->isDynamicClass()) {
       llvm::Value *VTable = GetVTablePtr(This, Int8PtrTy);
-      EmitVTablePtrCheckForCall(MD, VTable);
+      EmitVTablePtrCheckForCall(MD, VTable, CFITCK_NVCall, CE->getLocStart());
     }
 
     if (getLangOpts().AppleKext && MD->isVirtual() && HasQualifier)
@@ -958,6 +959,25 @@ void CodeGenFunction::EmitNewArrayInitializer(
     if (ILE->getNumInits() == 0 && TryMemsetInitialization())
       return;
 
+  // If we have a struct whose every field is value-initialized, we can
+  // usually use memset.
+  if (auto *ILE = dyn_cast<InitListExpr>(Init)) {
+    if (const RecordType *RType = ILE->getType()->getAs<RecordType>()) {
+      if (RType->getDecl()->isStruct()) {
+        unsigned NumFields = 0;
+        for (auto *Field : RType->getDecl()->fields())
+          if (!Field->isUnnamedBitfield())
+            ++NumFields;
+        if (ILE->getNumInits() == NumFields)
+          for (unsigned i = 0, e = ILE->getNumInits(); i != e; ++i)
+            if (!isa<ImplicitValueInitExpr>(ILE->getInit(i)))
+              --NumFields;
+        if (ILE->getNumInits() == NumFields && TryMemsetInitialization())
+          return;
+      }
+    }
+  }
+
   // Create the loop blocks.
   llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
   llvm::BasicBlock *LoopBB = createBasicBlock("new.loop");
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprConstant.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprConstant.cpp
index b90b3ab..acfb9b6 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprConstant.cpp
@@ -33,6 +33,7 @@ using namespace CodeGen;
 //===----------------------------------------------------------------------===//
 
 namespace {
+class ConstExprEmitter;
 class ConstStructBuilder {
   CodeGenModule &CGM;
   CodeGenFunction *CGF;
@@ -42,6 +43,10 @@ class ConstStructBuilder {
   CharUnits LLVMStructAlignment;
   SmallVector<llvm::Constant *, 32> Elements;
 public:
+  static llvm::Constant *BuildStruct(CodeGenModule &CGM, CodeGenFunction *CFG,
+                                     ConstExprEmitter *Emitter,
+                                     llvm::ConstantStruct *Base,
+                                     InitListExpr *Updater);
   static llvm::Constant *BuildStruct(CodeGenModule &CGM, CodeGenFunction *CGF,
                                      InitListExpr *ILE);
   static llvm::Constant *BuildStruct(CodeGenModule &CGM, CodeGenFunction *CGF,
@@ -68,6 +73,8 @@ private:
   void ConvertStructToPacked();
 
   bool Build(InitListExpr *ILE);
+  bool Build(ConstExprEmitter *Emitter, llvm::ConstantStruct *Base,
+             InitListExpr *Updater);
   void Build(const APValue &Val, const RecordDecl *RD, bool IsPrimaryBase,
              const CXXRecordDecl *VTableClass, CharUnits BaseOffset);
   llvm::Constant *Finalize(QualType Ty);
@@ -547,6 +554,17 @@ llvm::Constant *ConstStructBuilder::Finalize(QualType Ty) {
 
 llvm::Constant *ConstStructBuilder::BuildStruct(CodeGenModule &CGM,
                                                 CodeGenFunction *CGF,
+                                                ConstExprEmitter *Emitter,
+                                                llvm::ConstantStruct *Base,
+                                                InitListExpr *Updater) {
+  ConstStructBuilder Builder(CGM, CGF);
+  if (!Builder.Build(Emitter, Base, Updater))
+    return nullptr;
+  return Builder.Finalize(Updater->getType());
+}
+
+llvm::Constant *ConstStructBuilder::BuildStruct(CodeGenModule &CGM,
+                                                CodeGenFunction *CGF,
                                                 InitListExpr *ILE) {
   ConstStructBuilder Builder(CGM, CGF);
 
@@ -818,6 +836,82 @@ public:
     return nullptr;
   }
 
+  llvm::Constant *EmitDesignatedInitUpdater(llvm::Constant *Base,
+                                            InitListExpr *Updater) {
+    QualType ExprType = Updater->getType();
+
+    if (ExprType->isArrayType()) {
+      llvm::ArrayType *AType = cast<llvm::ArrayType>(ConvertType(ExprType));
+      llvm::Type *ElemType = AType->getElementType();
+
+      unsigned NumInitElements = Updater->getNumInits();
+      unsigned NumElements = AType->getNumElements();
+      
+      std::vector<llvm::Constant *> Elts;
+      Elts.reserve(NumElements);
+
+      if (llvm::ConstantDataArray *DataArray =
+            dyn_cast<llvm::ConstantDataArray>(Base))
+        for (unsigned i = 0; i != NumElements; ++i)
+          Elts.push_back(DataArray->getElementAsConstant(i));
+      else if (llvm::ConstantArray *Array =
+                 dyn_cast<llvm::ConstantArray>(Base))
+        for (unsigned i = 0; i != NumElements; ++i)
+          Elts.push_back(Array->getOperand(i));
+      else
+        return nullptr; // FIXME: other array types not implemented
+
+      llvm::Constant *fillC = nullptr;
+      if (Expr *filler = Updater->getArrayFiller())
+        if (!isa<NoInitExpr>(filler))
+          fillC = CGM.EmitConstantExpr(filler, filler->getType(), CGF);
+      bool RewriteType = (fillC && fillC->getType() != ElemType);
+
+      for (unsigned i = 0; i != NumElements; ++i) {
+        Expr *Init = nullptr;
+        if (i < NumInitElements)
+          Init = Updater->getInit(i);
+
+        if (!Init && fillC)
+          Elts[i] = fillC;
+        else if (!Init || isa<NoInitExpr>(Init))
+          ; // Do nothing.
+        else if (InitListExpr *ChildILE = dyn_cast<InitListExpr>(Init))
+          Elts[i] = EmitDesignatedInitUpdater(Elts[i], ChildILE);
+        else
+          Elts[i] = CGM.EmitConstantExpr(Init, Init->getType(), CGF);
+ 
+       if (!Elts[i])
+          return nullptr;
+        RewriteType |= (Elts[i]->getType() != ElemType);
+      }
+
+      if (RewriteType) {
+        std::vector<llvm::Type *> Types;
+        Types.reserve(NumElements);
+        for (unsigned i = 0; i != NumElements; ++i)
+          Types.push_back(Elts[i]->getType());
+        llvm::StructType *SType = llvm::StructType::get(AType->getContext(),
+                                                        Types, true);
+        return llvm::ConstantStruct::get(SType, Elts);
+      }
+
+      return llvm::ConstantArray::get(AType, Elts);
+    }
+
+    if (ExprType->isRecordType())
+      return ConstStructBuilder::BuildStruct(CGM, CGF, this,
+                 dyn_cast<llvm::ConstantStruct>(Base), Updater);
+
+    return nullptr;
+  }
+
+  llvm::Constant *VisitDesignatedInitUpdateExpr(DesignatedInitUpdateExpr *E) {
+    return EmitDesignatedInitUpdater(
+               CGM.EmitConstantExpr(E->getBase(), E->getType(), CGF),
+               E->getUpdater());
+  }  
+
   llvm::Constant *VisitCXXConstructExpr(CXXConstructExpr *E) {
     if (!E->getConstructor()->isTrivial())
       return nullptr;
@@ -1003,6 +1097,68 @@ public:
 
 }  // end anonymous namespace.
 
+bool ConstStructBuilder::Build(ConstExprEmitter *Emitter,
+                               llvm::ConstantStruct *Base,
+                               InitListExpr *Updater) {
+  assert(Base && "base expression should not be empty");
+
+  QualType ExprType = Updater->getType();
+  RecordDecl *RD = ExprType->getAs<RecordType>()->getDecl();
+  const ASTRecordLayout &Layout = CGM.getContext().getASTRecordLayout(RD);
+  const llvm::StructLayout *BaseLayout = CGM.getDataLayout().getStructLayout(
+                                           Base->getType());
+  unsigned FieldNo = -1;
+  unsigned ElementNo = 0;
+
+  for (FieldDecl *Field : RD->fields()) {
+    ++FieldNo;
+
+    if (RD->isUnion() && Updater->getInitializedFieldInUnion() != Field)
+      continue;
+
+    // Skip anonymous bitfields.
+    if (Field->isUnnamedBitfield())
+      continue;
+
+    llvm::Constant *EltInit = Base->getOperand(ElementNo);
+
+    // Bail out if the type of the ConstantStruct does not have the same layout
+    // as the type of the InitListExpr.
+    if (CGM.getTypes().ConvertType(Field->getType()) != EltInit->getType() ||
+        Layout.getFieldOffset(ElementNo) !=
+          BaseLayout->getElementOffsetInBits(ElementNo))
+      return false;
+
+    // Get the initializer. If we encounter an empty field or a NoInitExpr,
+    // we use values from the base expression.
+    Expr *Init = nullptr;
+    if (ElementNo < Updater->getNumInits())
+      Init = Updater->getInit(ElementNo);
+
+    if (!Init || isa<NoInitExpr>(Init))
+      ; // Do nothing.
+    else if (InitListExpr *ChildILE = dyn_cast<InitListExpr>(Init))
+      EltInit = Emitter->EmitDesignatedInitUpdater(EltInit, ChildILE);
+    else
+      EltInit = CGM.EmitConstantExpr(Init, Field->getType(), CGF);
+
+    ++ElementNo;
+
+    if (!EltInit)
+      return false;
+
+    if (!Field->isBitField())
+      AppendField(Field, Layout.getFieldOffset(FieldNo), EltInit);
+    else if (llvm::ConstantInt *CI = dyn_cast<llvm::ConstantInt>(EltInit))
+      AppendBitField(Field, Layout.getFieldOffset(FieldNo), CI);
+    else
+      // Initializing a bitfield with a non-trivial constant?
+      return false;
+  }
+
+  return true;
+}
+
 llvm::Constant *CodeGenModule::EmitConstantInit(const VarDecl &D,
                                                 CodeGenFunction *CGF) {
   // Make a quick check if variable can be default NULL initialized
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp
index 08c81c0..330a0df 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp
@@ -1386,7 +1386,9 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
     if (CGF.SanOpts.has(SanitizerKind::CFIUnrelatedCast)) {
       if (auto PT = DestTy->getAs<PointerType>())
         CGF.EmitVTablePtrCheckForCast(PT->getPointeeType(), Src,
-                                      /*MayBeNull=*/true);
+                                      /*MayBeNull=*/true,
+                                      CodeGenFunction::CFITCK_UnrelatedCast,
+                                      CE->getLocStart());
     }
 
     return Builder.CreateBitCast(Src, DstTy);
@@ -1420,7 +1422,9 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
 
     if (CGF.SanOpts.has(SanitizerKind::CFIDerivedCast))
       CGF.EmitVTablePtrCheckForCast(DestTy->getPointeeType(), Derived,
-                                    /*MayBeNull=*/true);
+                                    /*MayBeNull=*/true,
+                                    CodeGenFunction::CFITCK_DerivedCast,
+                                    CE->getLocStart());
 
     return Derived;
   }
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGLoopInfo.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGLoopInfo.cpp
index 0675544..1163d63 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGLoopInfo.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGLoopInfo.cpp
@@ -8,6 +8,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "CGLoopInfo.h"
+#include "clang/AST/Attr.h"
+#include "clang/Sema/LoopHint.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/InstrTypes.h"
@@ -76,7 +78,34 @@ LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs)
   LoopID = createMetadata(Header->getContext(), Attrs);
 }
 
-void LoopInfoStack::push(BasicBlock *Header) {
+void LoopInfoStack::push(BasicBlock *Header,
+                         ArrayRef<const clang::Attr *> Attrs) {
+  for (const auto *Attr : Attrs) {
+    const LoopHintAttr *LH = dyn_cast<LoopHintAttr>(Attr);
+
+    // Skip non loop hint attributes
+    if (!LH)
+      continue;
+
+    LoopHintAttr::OptionType Option = LH->getOption();
+    LoopHintAttr::LoopHintState State = LH->getState();
+    switch (Option) {
+    case LoopHintAttr::Vectorize:
+    case LoopHintAttr::Interleave:
+      if (State == LoopHintAttr::AssumeSafety) {
+        // Apply "llvm.mem.parallel_loop_access" metadata to load/stores.
+        setParallel(true);
+      }
+      break;
+    case LoopHintAttr::VectorizeWidth:
+    case LoopHintAttr::InterleaveCount:
+    case LoopHintAttr::Unroll:
+    case LoopHintAttr::UnrollCount:
+      // Nothing to do here for these loop hints.
+      break;
+    }
+  }
+
   Active.push_back(LoopInfo(Header, StagedAttrs));
   // Clear the attributes so nested loops do not inherit them.
   StagedAttrs.clear();
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGLoopInfo.h b/contrib/llvm/tools/clang/lib/CodeGen/CGLoopInfo.h
index aee1621..2249937 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGLoopInfo.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGLoopInfo.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_CLANG_LIB_CODEGEN_CGLOOPINFO_H
 #define LLVM_CLANG_LIB_CODEGEN_CGLOOPINFO_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/IR/Value.h"
@@ -27,6 +28,7 @@ class MDNode;
 } // end namespace llvm
 
 namespace clang {
+class Attr;
 namespace CodeGen {
 
 /// \brief Attributes that may be specified on loops.
@@ -86,7 +88,8 @@ public:
 
   /// \brief Begin a new structured loop. The set of staged attributes will be
   /// applied to the loop and then cleared.
-  void push(llvm::BasicBlock *Header);
+  void push(llvm::BasicBlock *Header,
+            llvm::ArrayRef<const Attr *> Attrs = llvm::None);
 
   /// \brief End the current loop.
   void pop();
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGObjC.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGObjC.cpp
index ef9a92d..9981fcc 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGObjC.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGObjC.cpp
@@ -2996,13 +2996,9 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction(
   
   SmallVector<Expr*, 4> ConstructorArgs;
   ConstructorArgs.push_back(&SRC);
-  CXXConstructExpr::arg_iterator A = CXXConstExpr->arg_begin();
-  ++A;
-  
-  for (CXXConstructExpr::arg_iterator AEnd = CXXConstExpr->arg_end();
-       A != AEnd; ++A)
-    ConstructorArgs.push_back(*A);
-  
+  ConstructorArgs.append(std::next(CXXConstExpr->arg_begin()),
+                         CXXConstExpr->arg_end());
+
   CXXConstructExpr *TheCXXConstructExpr =
     CXXConstructExpr::Create(C, Ty, SourceLocation(),
                              CXXConstExpr->getConstructor(),
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 1238acc..3161af3 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -741,7 +741,7 @@ CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
     break;
   }
   case OMPRTL__kmpc_end_ordered: {
-    // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
+    // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
     llvm::FunctionType *FnTy =
         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
@@ -756,6 +756,31 @@ CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
     break;
   }
+  case OMPRTL__kmpc_taskgroup: {
+    // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
+    break;
+  }
+  case OMPRTL__kmpc_end_taskgroup: {
+    // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
+    break;
+  }
+  case OMPRTL__kmpc_push_proc_bind: {
+    // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
+    // int proc_bind)
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
+    break;
+  }
   }
   return RTLFn;
 }
@@ -1240,6 +1265,25 @@ void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
 }
 
+void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
+                                          const RegionCodeGenTy &TaskgroupOpGen,
+                                          SourceLocation Loc) {
+  // __kmpc_taskgroup(ident_t *, gtid);
+  // TaskgroupOpGen();
+  // __kmpc_end_taskgroup(ident_t *, gtid);
+  // Prepare arguments and build a call to __kmpc_taskgroup
+  {
+    CodeGenFunction::RunCleanupsScope Scope(CGF);
+    llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
+    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args);
+    // Build a call to __kmpc_end_taskgroup
+    CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
+        NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
+        llvm::makeArrayRef(Args));
+    emitInlinedDirective(CGF, TaskgroupOpGen);
+  }
+}
+
 static llvm::Value *emitCopyprivateCopyFunction(
     CodeGenModule &CGM, llvm::Type *ArgsType,
     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
@@ -1600,6 +1644,39 @@ void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
                       Args);
 }
 
+void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
+                                         OpenMPProcBindClauseKind ProcBind,
+                                         SourceLocation Loc) {
+  // Constants for proc bind value accepted by the runtime.
+  enum ProcBindTy {
+    ProcBindFalse = 0,
+    ProcBindTrue,
+    ProcBindMaster,
+    ProcBindClose,
+    ProcBindSpread,
+    ProcBindIntel,
+    ProcBindDefault
+  } RuntimeProcBind;
+  switch (ProcBind) {
+  case OMPC_PROC_BIND_master:
+    RuntimeProcBind = ProcBindMaster;
+    break;
+  case OMPC_PROC_BIND_close:
+    RuntimeProcBind = ProcBindClose;
+    break;
+  case OMPC_PROC_BIND_spread:
+    RuntimeProcBind = ProcBindSpread;
+    break;
+  case OMPC_PROC_BIND_unknown:
+    llvm_unreachable("Unsupported proc_bind value.");
+  }
+  // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
+  llvm::Value *Args[] = {
+      emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
+      llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
+  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
+}
+
 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
                                 SourceLocation Loc) {
   // Build call void __kmpc_flush(ident_t *loc)
@@ -2242,7 +2319,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
                                     ArrayRef<const Expr *> LHSExprs,
                                     ArrayRef<const Expr *> RHSExprs,
                                     ArrayRef<const Expr *> ReductionOps,
-                                    bool WithNowait) {
+                                    bool WithNowait, bool SimpleReduction) {
   // Next code should be emitted for reduction:
   //
   // static kmp_critical_name lock = { 0 };
@@ -2272,9 +2349,22 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
   // break;
   // default:;
   // }
+  //
+  // if SimpleReduction is true, only the next code is generated:
+  //  ...
+  //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
+  //  ...
 
   auto &C = CGM.getContext();
 
+  if (SimpleReduction) {
+    CodeGenFunction::RunCleanupsScope Scope(CGF);
+    for (auto *E : ReductionOps) {
+      CGF.EmitIgnoredExpr(E);
+    }
+    return;
+  }
+
   // 1. Build a list of reduction variables.
   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
   llvm::APInt ArraySize(/*unsigned int numBits=*/32, RHSExprs.size());
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h
index f5aa4a5..d1efde2 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -131,6 +131,13 @@ private:
     // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
     // global_tid);
     OMPRTL__kmpc_omp_taskwait,
+    // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
+    OMPRTL__kmpc_taskgroup,
+    // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
+    OMPRTL__kmpc_end_taskgroup,
+    // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
+    // int proc_bind);
+    OMPRTL__kmpc_push_proc_bind,
   };
 
   /// \brief Values for bit flags used in the ident_t to describe the fields.
@@ -388,6 +395,13 @@ public:
   /// \brief Emits code for a taskyield directive.
   virtual void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc);
 
+  /// \brief Emit a taskgroup region.
+  /// \param TaskgroupOpGen Generator for the statement associated with the
+  /// given taskgroup region.
+  virtual void emitTaskgroupRegion(CodeGenFunction &CGF,
+                                   const RegionCodeGenTy &TaskgroupOpGen,
+                                   SourceLocation Loc);
+
   /// \brief Emits a single region.
   /// \param SingleOpGen Generator for the statement associated with the given
   /// single region.
@@ -401,7 +415,7 @@ public:
 
   /// \brief Emit an ordered region.
   /// \param OrderedOpGen Generator for the statement associated with the given
-  /// critical region.
+  /// ordered region.
   virtual void emitOrderedRegion(CodeGenFunction &CGF,
                                  const RegionCodeGenTy &OrderedOpGen,
                                  SourceLocation Loc);
@@ -504,6 +518,12 @@ public:
                                     llvm::Value *NumThreads,
                                     SourceLocation Loc);
 
+  /// \brief Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
+  /// global_tid, int proc_bind) to generate code for 'proc_bind' clause.
+  virtual void emitProcBindClause(CodeGenFunction &CGF,
+                                  OpenMPProcBindClauseKind ProcBind,
+                                  SourceLocation Loc);
+
   /// \brief Returns address of the threadprivate variable for the current
   /// thread.
   /// \param VD Threadprivate variable.
@@ -632,7 +652,7 @@ public:
                              ArrayRef<const Expr *> LHSExprs,
                              ArrayRef<const Expr *> RHSExprs,
                              ArrayRef<const Expr *> ReductionOps,
-                             bool WithNowait);
+                             bool WithNowait, bool SimpleReduction);
 
   /// \brief Emit code for 'taskwait' directive.
   virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc);
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp
index c879750..9286f03 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp
@@ -222,6 +222,9 @@ void CodeGenFunction::EmitStmt(const Stmt *S) {
   case Stmt::OMPTaskwaitDirectiveClass:
     EmitOMPTaskwaitDirective(cast<OMPTaskwaitDirective>(*S));
     break;
+  case Stmt::OMPTaskgroupDirectiveClass:
+    EmitOMPTaskgroupDirective(cast<OMPTaskgroupDirective>(*S));
+    break;
   case Stmt::OMPFlushDirectiveClass:
     EmitOMPFlushDirective(cast<OMPFlushDirective>(*S));
     break;
@@ -682,7 +685,7 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S,
   JumpDest LoopHeader = getJumpDestInCurrentScope("while.cond");
   EmitBlock(LoopHeader.getBlock());
 
-  LoopStack.push(LoopHeader.getBlock());
+  LoopStack.push(LoopHeader.getBlock(), WhileAttrs);
 
   // Create an exit block for when the condition fails, which will
   // also become the break target.
@@ -776,7 +779,7 @@ void CodeGenFunction::EmitDoStmt(const DoStmt &S,
   // Emit the body of the loop.
   llvm::BasicBlock *LoopBody = createBasicBlock("do.body");
 
-  LoopStack.push(LoopBody);
+  LoopStack.push(LoopBody, DoAttrs);
 
   EmitBlockWithFallThrough(LoopBody, &S);
   {
@@ -842,7 +845,7 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S,
   llvm::BasicBlock *CondBlock = Continue.getBlock();
   EmitBlock(CondBlock);
 
-  LoopStack.push(CondBlock);
+  LoopStack.push(CondBlock, ForAttrs);
 
   // If the for loop doesn't have an increment we can just use the
   // condition as the continue block.  Otherwise we'll need to create
@@ -940,7 +943,7 @@ CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S,
   llvm::BasicBlock *CondBlock = createBasicBlock("for.cond");
   EmitBlock(CondBlock);
 
-  LoopStack.push(CondBlock);
+  LoopStack.push(CondBlock, ForAttrs);
 
   // If there are any cleanups between here and the loop-exit scope,
   // create a block to stage a loop exit along.
@@ -1750,6 +1753,16 @@ llvm::Value* CodeGenFunction::EmitAsmInput(
                                          const TargetInfo::ConstraintInfo &Info,
                                            const Expr *InputExpr,
                                            std::string &ConstraintStr) {
+  // If this can't be a register or memory, i.e., has to be a constant
+  // (immediate or symbolic), try to emit it as such.
+  if (!Info.allowsRegister() && !Info.allowsMemory()) {
+    llvm::APSInt Result;
+    if (InputExpr->EvaluateAsInt(Result, getContext()))
+      return llvm::ConstantInt::get(getLLVMContext(), Result);
+    assert(!Info.requiresImmediateConstant() &&
+           "Required-immediate inlineasm arg isn't constant?");
+  }
+
   if (Info.allowsRegister() || !Info.allowsMemory())
     if (CodeGenFunction::hasScalarEvaluationKind(InputExpr->getType()))
       return EmitScalarExpr(InputExpr);
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 07fc6e9..5e94d56 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -316,26 +316,32 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal(
   //   ...
   //   orig_varn = private_orig_varn;
   // }
-  auto *ThenBB = createBasicBlock(".omp.lastprivate.then");
-  auto *DoneBB = createBasicBlock(".omp.lastprivate.done");
-  Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
-  EmitBlock(ThenBB);
+  llvm::BasicBlock *ThenBB = nullptr;
+  llvm::BasicBlock *DoneBB = nullptr;
+  if (IsLastIterCond) {
+    ThenBB = createBasicBlock(".omp.lastprivate.then");
+    DoneBB = createBasicBlock(".omp.lastprivate.done");
+    Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
+    EmitBlock(ThenBB);
+  }
   llvm::DenseMap<const Decl *, const Expr *> LoopCountersAndUpdates;
   const Expr *LastIterVal = nullptr;
   const Expr *IVExpr = nullptr;
   const Expr *IncExpr = nullptr;
   if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
-    LastIterVal =
-        cast<VarDecl>(cast<DeclRefExpr>(LoopDirective->getUpperBoundVariable())
-                          ->getDecl())
-            ->getAnyInitializer();
-    IVExpr = LoopDirective->getIterationVariable();
-    IncExpr = LoopDirective->getInc();
-    auto IUpdate = LoopDirective->updates().begin();
-    for (auto *E : LoopDirective->counters()) {
-      auto *D = cast<DeclRefExpr>(E)->getDecl()->getCanonicalDecl();
-      LoopCountersAndUpdates[D] = *IUpdate;
-      ++IUpdate;
+    if (isOpenMPWorksharingDirective(D.getDirectiveKind())) {
+      LastIterVal = cast<VarDecl>(cast<DeclRefExpr>(
+                                      LoopDirective->getUpperBoundVariable())
+                                      ->getDecl())
+                        ->getAnyInitializer();
+      IVExpr = LoopDirective->getIterationVariable();
+      IncExpr = LoopDirective->getInc();
+      auto IUpdate = LoopDirective->updates().begin();
+      for (auto *E : LoopDirective->counters()) {
+        auto *D = cast<DeclRefExpr>(E)->getDecl()->getCanonicalDecl();
+        LoopCountersAndUpdates[D] = *IUpdate;
+        ++IUpdate;
+      }
     }
   }
   {
@@ -355,7 +361,7 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal(
           // directive, update its value before copyin back to original
           // variable.
           if (auto *UpExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) {
-            if (FirstLCV) {
+            if (FirstLCV && LastIterVal) {
               EmitAnyExprToMem(LastIterVal, EmitLValue(IVExpr).getAddress(),
                                IVExpr->getType().getQualifiers(),
                                /*IsInitializer=*/false);
@@ -379,7 +385,9 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal(
       }
     }
   }
-  EmitBlock(DoneBB, /*IsFinished=*/true);
+  if (IsLastIterCond) {
+    EmitBlock(DoneBB, /*IsFinished=*/true);
+  }
 }
 
 void CodeGenFunction::EmitOMPReductionClauseInit(
@@ -435,7 +443,9 @@ void CodeGenFunction::EmitOMPReductionClauseFinal(
     CGM.getOpenMPRuntime().emitReduction(
         *this, D.getLocEnd(), LHSExprs, RHSExprs, ReductionOps,
         D.getSingleClause(OMPC_nowait) ||
-            isOpenMPParallelDirective(D.getDirectiveKind()));
+            isOpenMPParallelDirective(D.getDirectiveKind()) ||
+            D.getDirectiveKind() == OMPD_simd,
+        D.getDirectiveKind() == OMPD_simd);
   }
 }
 
@@ -454,6 +464,12 @@ static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
         CGF, NumThreads, NumThreadsClause->getLocStart());
   }
+  if (auto *C = S.getSingleClause(OMPC_proc_bind)) {
+    CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
+    auto *ProcBindClause = cast<OMPProcBindClause>(C);
+    CGF.CGM.getOpenMPRuntime().emitProcBindClause(
+        CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart());
+  }
   const Expr *IfCond = nullptr;
   if (auto C = S.getSingleClause(OMPC_if)) {
     IfCond = cast<OMPIfClause>(C)->getCondition();
@@ -489,15 +505,14 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
   emitCommonOMPParallelDirective(*this, S, CodeGen);
 }
 
-void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &S,
-                                      bool SeparateIter) {
+void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D) {
   RunCleanupsScope BodyScope(*this);
   // Update counters values on current iteration.
-  for (auto I : S.updates()) {
+  for (auto I : D.updates()) {
     EmitIgnoredExpr(I);
   }
   // Update the linear variables.
-  for (auto &&I = S.getClausesOfKind(OMPC_linear); I; ++I) {
+  for (auto &&I = D.getClausesOfKind(OMPC_linear); I; ++I) {
     auto *C = cast<OMPLinearClause>(*I);
     for (auto U : C->updates()) {
       EmitIgnoredExpr(U);
@@ -508,16 +523,14 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &S,
   auto Continue = getJumpDestInCurrentScope("omp.body.continue");
   BreakContinueStack.push_back(BreakContinue(JumpDest(), Continue));
   // Emit loop body.
-  EmitStmt(S.getBody());
+  EmitStmt(D.getBody());
   // The end (updates/cleanups).
   EmitBlock(Continue.getBlock());
   BreakContinueStack.pop_back();
-  if (SeparateIter) {
     // TODO: Update lastprivates if the SeparateIter flag is true.
     // This will be implemented in a follow-up OMPLastprivateClause patch, but
     // result should be still correct without it, as we do not make these
     // variables private yet.
-  }
 }
 
 void CodeGenFunction::EmitOMPInnerLoop(
@@ -567,70 +580,89 @@ void CodeGenFunction::EmitOMPInnerLoop(
   EmitBlock(LoopExit.getBlock());
 }
 
-void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &S) {
-  auto IC = S.counters().begin();
-  for (auto F : S.finals()) {
-    auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
-    if (LocalDeclMap.lookup(OrigVD)) {
+void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
+  // Emit inits for the linear variables.
+  for (auto &&I = D.getClausesOfKind(OMPC_linear); I; ++I) {
+    auto *C = cast<OMPLinearClause>(*I);
+    for (auto Init : C->inits()) {
+      auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
+      auto *OrigVD = cast<VarDecl>(
+          cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())->getDecl());
       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
-                      (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
-      auto *OrigAddr = EmitLValue(&DRE).getAddress();
-      OMPPrivateScope VarScope(*this);
-      VarScope.addPrivate(OrigVD,
-                          [OrigAddr]() -> llvm::Value *{ return OrigAddr; });
-      (void)VarScope.Privatize();
-      EmitIgnoredExpr(F);
+                      VD->getInit()->getType(), VK_LValue,
+                      VD->getInit()->getExprLoc());
+      AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
+      EmitExprAsInit(&DRE, VD,
+                     MakeAddrLValue(Emission.getAllocatedAddress(),
+                                    VD->getType(), Emission.Alignment),
+                     /*capturedByInit=*/false);
+      EmitAutoVarCleanups(Emission);
     }
-    ++IC;
+    // Emit the linear steps for the linear clauses.
+    // If a step is not constant, it is pre-calculated before the loop.
+    if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
+      if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
+        EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
+        // Emit calculation of the linear step.
+        EmitIgnoredExpr(CS);
+      }
   }
+}
+
+static void emitLinearClauseFinal(CodeGenFunction &CGF,
+                                  const OMPLoopDirective &D) {
   // Emit the final values of the linear variables.
-  for (auto &&I = S.getClausesOfKind(OMPC_linear); I; ++I) {
+  for (auto &&I = D.getClausesOfKind(OMPC_linear); I; ++I) {
     auto *C = cast<OMPLinearClause>(*I);
     auto IC = C->varlist_begin();
     for (auto F : C->finals()) {
       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
-                      CapturedStmtInfo->lookup(OrigVD) != nullptr,
+                      CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
-      auto *OrigAddr = EmitLValue(&DRE).getAddress();
-      OMPPrivateScope VarScope(*this);
+      auto *OrigAddr = CGF.EmitLValue(&DRE).getAddress();
+      CodeGenFunction::OMPPrivateScope VarScope(CGF);
       VarScope.addPrivate(OrigVD,
                           [OrigAddr]() -> llvm::Value *{ return OrigAddr; });
       (void)VarScope.Privatize();
-      EmitIgnoredExpr(F);
+      CGF.EmitIgnoredExpr(F);
       ++IC;
     }
   }
 }
 
-static void EmitOMPAlignedClause(CodeGenFunction &CGF, CodeGenModule &CGM,
-                                 const OMPAlignedClause &Clause) {
-  unsigned ClauseAlignment = 0;
-  if (auto AlignmentExpr = Clause.getAlignment()) {
-    auto AlignmentCI =
-        cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
-    ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
-  }
-  for (auto E : Clause.varlists()) {
-    unsigned Alignment = ClauseAlignment;
-    if (Alignment == 0) {
-      // OpenMP [2.8.1, Description]
-      // If no optional parameter is specified, implementation-defined default
-      // alignments for SIMD instructions on the target platforms are assumed.
-      Alignment = CGM.getTargetCodeGenInfo().getOpenMPSimdDefaultAlignment(
-          E->getType());
+static void emitAlignedClause(CodeGenFunction &CGF,
+                              const OMPExecutableDirective &D) {
+  for (auto &&I = D.getClausesOfKind(OMPC_aligned); I; ++I) {
+    auto *Clause = cast<OMPAlignedClause>(*I);
+    unsigned ClauseAlignment = 0;
+    if (auto AlignmentExpr = Clause->getAlignment()) {
+      auto AlignmentCI =
+          cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
+      ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
     }
-    assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
-           "alignment is not power of 2");
-    if (Alignment != 0) {
-      llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
-      CGF.EmitAlignmentAssumption(PtrValue, Alignment);
+    for (auto E : Clause->varlists()) {
+      unsigned Alignment = ClauseAlignment;
+      if (Alignment == 0) {
+        // OpenMP [2.8.1, Description]
+        // If no optional parameter is specified, implementation-defined default
+        // alignments for SIMD instructions on the target platforms are assumed.
+        Alignment =
+            CGF.CGM.getTargetCodeGenInfo().getOpenMPSimdDefaultAlignment(
+                E->getType());
+      }
+      assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
+             "alignment is not power of 2");
+      if (Alignment != 0) {
+        llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
+        CGF.EmitAlignmentAssumption(PtrValue, Alignment);
+      }
     }
   }
 }
 
-static void EmitPrivateLoopCounters(CodeGenFunction &CGF,
+static void emitPrivateLoopCounters(CodeGenFunction &CGF,
                                     CodeGenFunction::OMPPrivateScope &LoopScope,
                                     ArrayRef<Expr *> Counters) {
   for (auto *E : Counters) {
@@ -647,37 +679,39 @@ static void EmitPrivateLoopCounters(CodeGenFunction &CGF,
 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
                         const Expr *Cond, llvm::BasicBlock *TrueBlock,
                         llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
-  CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
-  EmitPrivateLoopCounters(CGF, PreCondScope, S.counters());
-  const VarDecl *IVDecl =
-      cast<VarDecl>(cast<DeclRefExpr>(S.getIterationVariable())->getDecl());
-  bool IsRegistered = PreCondScope.addPrivate(IVDecl, [&]() -> llvm::Value *{
-    // Emit var without initialization.
-    auto VarEmission = CGF.EmitAutoVarAlloca(*IVDecl);
-    CGF.EmitAutoVarCleanups(VarEmission);
-    return VarEmission.getAllocatedAddress();
-  });
-  assert(IsRegistered && "counter already registered as private");
-  // Silence the warning about unused variable.
-  (void)IsRegistered;
-  (void)PreCondScope.Privatize();
-  // Initialize internal counter to 0 to calculate initial values of real
-  // counters.
-  LValue IV = CGF.EmitLValue(S.getIterationVariable());
-  CGF.EmitStoreOfScalar(
-      llvm::ConstantInt::getNullValue(
-          IV.getAddress()->getType()->getPointerElementType()),
-      CGF.EmitLValue(S.getIterationVariable()), /*isInit=*/true);
-  // Get initial values of real counters.
-  for (auto I : S.updates()) {
-    CGF.EmitIgnoredExpr(I);
+  {
+    CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
+    emitPrivateLoopCounters(CGF, PreCondScope, S.counters());
+    const VarDecl *IVDecl =
+        cast<VarDecl>(cast<DeclRefExpr>(S.getIterationVariable())->getDecl());
+    bool IsRegistered = PreCondScope.addPrivate(IVDecl, [&]() -> llvm::Value *{
+      // Emit var without initialization.
+      auto VarEmission = CGF.EmitAutoVarAlloca(*IVDecl);
+      CGF.EmitAutoVarCleanups(VarEmission);
+      return VarEmission.getAllocatedAddress();
+    });
+    assert(IsRegistered && "counter already registered as private");
+    // Silence the warning about unused variable.
+    (void)IsRegistered;
+    (void)PreCondScope.Privatize();
+    // Initialize internal counter to 0 to calculate initial values of real
+    // counters.
+    LValue IV = CGF.EmitLValue(S.getIterationVariable());
+    CGF.EmitStoreOfScalar(
+        llvm::ConstantInt::getNullValue(
+            IV.getAddress()->getType()->getPointerElementType()),
+        CGF.EmitLValue(S.getIterationVariable()), /*isInit=*/true);
+    // Get initial values of real counters.
+    for (auto I : S.updates()) {
+      CGF.EmitIgnoredExpr(I);
+    }
   }
   // Check that loop is executed at least one time.
   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
 }
 
 static void
-EmitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D,
+emitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D,
                       CodeGenFunction::OMPPrivateScope &PrivateScope) {
   for (auto &&I = D.getClausesOfKind(OMPC_linear); I; ++I) {
     auto *C = cast<OMPLinearClause>(*I);
@@ -696,19 +730,50 @@ EmitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D,
   }
 }
 
+static void emitSafelenClause(CodeGenFunction &CGF,
+                              const OMPExecutableDirective &D) {
+  if (auto *C =
+          cast_or_null<OMPSafelenClause>(D.getSingleClause(OMPC_safelen))) {
+    RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
+                                 /*ignoreResult=*/true);
+    llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
+    CGF.LoopStack.setVectorizerWidth(Val->getZExtValue());
+    // In presence of finite 'safelen', it may be unsafe to mark all
+    // the memory instructions parallel, because loop-carried
+    // dependences of 'safelen' iterations are possible.
+    CGF.LoopStack.setParallel(false);
+  }
+}
+
+void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) {
+  // Walk clauses and process safelen/lastprivate.
+  LoopStack.setParallel();
+  LoopStack.setVectorizerEnable(true);
+  emitSafelenClause(*this, D);
+}
+
+void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &D) {
+  auto IC = D.counters().begin();
+  for (auto F : D.finals()) {
+    auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
+    if (LocalDeclMap.lookup(OrigVD) || CapturedStmtInfo->lookup(OrigVD)) {
+      DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
+                      CapturedStmtInfo->lookup(OrigVD) != nullptr,
+                      (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
+      auto *OrigAddr = EmitLValue(&DRE).getAddress();
+      OMPPrivateScope VarScope(*this);
+      VarScope.addPrivate(OrigVD,
+                          [OrigAddr]() -> llvm::Value *{ return OrigAddr; });
+      (void)VarScope.Privatize();
+      EmitIgnoredExpr(F);
+    }
+    ++IC;
+  }
+  emitLinearClauseFinal(*this, D);
+}
+
 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
-    // Pragma 'simd' code depends on presence of 'lastprivate'.
-    // If present, we have to separate last iteration of the loop:
-    //
-    // if (PreCond) {
-    //   for (IV in 0..LastIteration-1) BODY;
-    //   BODY with updates of lastprivate vars;
-    //   <Final counter/linear vars updates>;
-    // }
-    //
-    // otherwise (when there's no lastprivate):
-    //
     // if (PreCond) {
     //   for (IV in 0..LastIteration) BODY;
     //   <Final counter/linear vars updates>;
@@ -731,43 +796,6 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
       CGF.EmitBlock(ThenBlock);
       CGF.incrementProfileCounter(&S);
     }
-    // Walk clauses and process safelen/lastprivate.
-    bool SeparateIter = false;
-    CGF.LoopStack.setParallel();
-    CGF.LoopStack.setVectorizerEnable(true);
-    for (auto C : S.clauses()) {
-      switch (C->getClauseKind()) {
-      case OMPC_safelen: {
-        RValue Len = CGF.EmitAnyExpr(cast<OMPSafelenClause>(C)->getSafelen(),
-                                     AggValueSlot::ignored(), true);
-        llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
-        CGF.LoopStack.setVectorizerWidth(Val->getZExtValue());
-        // In presence of finite 'safelen', it may be unsafe to mark all
-        // the memory instructions parallel, because loop-carried
-        // dependences of 'safelen' iterations are possible.
-        CGF.LoopStack.setParallel(false);
-        break;
-      }
-      case OMPC_aligned:
-        EmitOMPAlignedClause(CGF, CGF.CGM, cast<OMPAlignedClause>(*C));
-        break;
-      case OMPC_lastprivate:
-        SeparateIter = true;
-        break;
-      default:
-        // Not handled yet
-        ;
-      }
-    }
-
-    // Emit inits for the linear variables.
-    for (auto &&I = S.getClausesOfKind(OMPC_linear); I; ++I) {
-      auto *C = cast<OMPLinearClause>(*I);
-      for (auto Init : C->inits()) {
-        auto *D = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
-        CGF.EmitVarDecl(*D);
-      }
-    }
 
     // Emit the loop iteration variable.
     const Expr *IVExpr = S.getIterationVariable();
@@ -784,34 +812,31 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
     }
 
-    // Emit the linear steps for the linear clauses.
-    // If a step is not constant, it is pre-calculated before the loop.
-    for (auto &&I = S.getClausesOfKind(OMPC_linear); I; ++I) {
-      auto *C = cast<OMPLinearClause>(*I);
-      if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
-        if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
-          CGF.EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
-          // Emit calculation of the linear step.
-          CGF.EmitIgnoredExpr(CS);
-        }
-    }
+    CGF.EmitOMPSimdInit(S);
 
+    emitAlignedClause(CGF, S);
+    CGF.EmitOMPLinearClauseInit(S);
+    bool HasLastprivateClause;
     {
       OMPPrivateScope LoopScope(CGF);
-      EmitPrivateLoopCounters(CGF, LoopScope, S.counters());
-      EmitPrivateLinearVars(CGF, S, LoopScope);
+      emitPrivateLoopCounters(CGF, LoopScope, S.counters());
+      emitPrivateLinearVars(CGF, S, LoopScope);
       CGF.EmitOMPPrivateClause(S, LoopScope);
+      CGF.EmitOMPReductionClauseInit(S, LoopScope);
+      HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
       (void)LoopScope.Privatize();
       CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
-                           S.getCond(SeparateIter), S.getInc(),
+                           S.getCond(), S.getInc(),
                            [&S](CodeGenFunction &CGF) {
                              CGF.EmitOMPLoopBody(S);
                              CGF.EmitStopPoint(&S);
                            },
                            [](CodeGenFunction &) {});
-      if (SeparateIter) {
-        CGF.EmitOMPLoopBody(S, /*SeparateIter=*/true);
+      // Emit final copy of the lastprivate variables at the end of loops.
+      if (HasLastprivateClause) {
+        CGF.EmitOMPLastprivateClauseFinal(S);
       }
+      CGF.EmitOMPReductionClauseFinal(S);
     }
     CGF.EmitOMPSimdFinal(S);
     // Emit: if (PreCond) - end.
@@ -912,7 +937,7 @@ void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
     // IV = LB
     EmitIgnoredExpr(S.getInit());
     // IV < UB
-    BoolCondVal = EvaluateExprAsBool(S.getCond(false));
+    BoolCondVal = EvaluateExprAsBool(S.getCond());
   } else {
     BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned,
                                     IL, LB, UB, ST);
@@ -941,14 +966,19 @@ void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
 
+  // Generate !llvm.loop.parallel metadata for loads and stores for loops
+  // with dynamic/guided scheduling and without ordered clause.
+  if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
+    LoopStack.setParallel((ScheduleKind == OMPC_SCHEDULE_dynamic ||
+                           ScheduleKind == OMPC_SCHEDULE_guided) &&
+                          !Ordered);
+  } else {
+    EmitOMPSimdInit(S);
+  }
+
   SourceLocation Loc = S.getLocStart();
-  // Generate !llvm.loop.parallel metadata for loads and stores for loops with
-  // dynamic/guided scheduling and without ordered clause.
-  LoopStack.setParallel((ScheduleKind == OMPC_SCHEDULE_dynamic ||
-                         ScheduleKind == OMPC_SCHEDULE_guided) &&
-                        !Ordered);
   EmitOMPInnerLoop(
-      S, LoopScope.requiresCleanups(), S.getCond(/*SeparateIter=*/false),
+      S, LoopScope.requiresCleanups(), S.getCond(),
       S.getInc(),
       [&S](CodeGenFunction &CGF) {
         CGF.EmitOMPLoopBody(S);
@@ -1055,6 +1085,9 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
       EmitBlock(ThenBlock);
       incrementProfileCounter(&S);
     }
+
+    emitAlignedClause(*this, S);
+    EmitOMPLinearClauseInit(S);
     // Emit 'then' code.
     {
       // Emit helper vars inits.
@@ -1077,7 +1110,8 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
       EmitOMPPrivateClause(S, LoopScope);
       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
       EmitOMPReductionClauseInit(S, LoopScope);
-      EmitPrivateLoopCounters(*this, LoopScope, S.counters());
+      emitPrivateLoopCounters(*this, LoopScope, S.counters());
+      emitPrivateLinearVars(*this, S, LoopScope);
       (void)LoopScope.Privatize();
 
       // Detect the loop schedule kind and chunk.
@@ -1093,6 +1127,9 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
       if (RT.isStaticNonchunked(ScheduleKind,
                                 /* Chunked */ Chunk != nullptr) &&
           !Ordered) {
+        if (isOpenMPSimdDirective(S.getDirectiveKind())) {
+          EmitOMPSimdInit(S);
+        }
         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
         // When no chunk_size is specified, the iteration space is divided into
         // chunks that are approximately equal in size, and at most one chunk is
@@ -1106,8 +1143,8 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
         // IV = LB;
         EmitIgnoredExpr(S.getInit());
         // while (idx <= UB) { BODY; ++idx; }
-        EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
-                         S.getCond(/*SeparateIter=*/false), S.getInc(),
+        EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
+                         S.getInc(),
                          [&S](CodeGenFunction &CGF) {
                            CGF.EmitOMPLoopBody(S);
                            CGF.EmitStopPoint(&S);
@@ -1128,6 +1165,9 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
         EmitOMPLastprivateClauseFinal(
             S, Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
     }
+    if (isOpenMPSimdDirective(S.getDirectiveKind())) {
+      EmitOMPSimdFinal(S);
+    }
     // We're now done with the loop, so jump to the continuation block.
     if (ContBlock) {
       EmitBranch(ContBlock);
@@ -1151,8 +1191,18 @@ void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
   }
 }
 
-void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &) {
-  llvm_unreachable("CodeGen for 'omp for simd' is not supported yet.");
+void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
+  LexicalScope Scope(*this, S.getSourceRange());
+  bool HasLastprivates = false;
+  auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) {
+    HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
+  };
+  CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
+
+  // Emit an implicit barrier at the end.
+  if (!S.getSingleClause(OMPC_nowait) || HasLastprivates) {
+    CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
+  }
 }
 
 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
@@ -1407,8 +1457,20 @@ void CodeGenFunction::EmitOMPParallelForDirective(
 }
 
 void CodeGenFunction::EmitOMPParallelForSimdDirective(
-    const OMPParallelForSimdDirective &) {
-  llvm_unreachable("CodeGen for 'omp parallel for simd' is not supported yet.");
+    const OMPParallelForSimdDirective &S) {
+  // Emit directive as a combined directive that consists of two implicit
+  // directives: 'parallel' with 'for' directive.
+  LexicalScope Scope(*this, S.getSourceRange());
+  (void)emitScheduleClause(*this, S, /*OuterRegion=*/true);
+  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
+    CGF.EmitOMPWorksharingLoop(S);
+    // Emit implicit barrier at the end of parallel region, but this barrier
+    // is at the end of 'for' directive, so emit it as the implicit barrier for
+    // this 'for' directive.
+    CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
+                                               OMPD_parallel);
+  };
+  emitCommonOMPParallelDirective(*this, S, CodeGen);
 }
 
 void CodeGenFunction::EmitOMPParallelSectionsDirective(
@@ -1556,6 +1618,16 @@ void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
   CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart());
 }
 
+void CodeGenFunction::EmitOMPTaskgroupDirective(
+    const OMPTaskgroupDirective &S) {
+  LexicalScope Scope(*this, S.getSourceRange());
+  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
+    CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+    CGF.EnsureInsertPoint();
+  };
+  CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
+}
+
 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
   CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
     if (auto C = S.getSingleClause(/*K*/ OMPC_flush)) {
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGVTables.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGVTables.cpp
index 57370a6..17db401 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGVTables.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGVTables.cpp
@@ -848,7 +848,8 @@ void CodeGenModule::EmitVTableBitSetEntries(llvm::GlobalVariable *VTable,
       !LangOpts.Sanitize.has(SanitizerKind::CFIUnrelatedCast))
     return;
 
-  llvm::Metadata *VTableMD = llvm::ConstantAsMetadata::get(VTable);
+  CharUnits PointerWidth =
+      Context.toCharUnitsFromBits(Context.getTargetInfo().getPointerWidth(0));
 
   std::vector<llvm::MDTuple *> BitsetEntries;
   // Create a bit set entry for each address point.
@@ -857,23 +858,8 @@ void CodeGenModule::EmitVTableBitSetEntries(llvm::GlobalVariable *VTable,
     if (AP.first.getBase()->isInStdNamespace())
       continue;
 
-    std::string OutName;
-    llvm::raw_string_ostream Out(OutName);
-    getCXXABI().getMangleContext().mangleCXXVTableBitSet(AP.first.getBase(),
-                                                         Out);
-
-    CharUnits PointerWidth =
-        Context.toCharUnitsFromBits(Context.getTargetInfo().getPointerWidth(0));
-    uint64_t AddrPointOffset = AP.second * PointerWidth.getQuantity();
-
-    llvm::Metadata *BitsetOps[] = {
-        llvm::MDString::get(getLLVMContext(), Out.str()),
-        VTableMD,
-        llvm::ConstantAsMetadata::get(
-            llvm::ConstantInt::get(Int64Ty, AddrPointOffset))};
-    llvm::MDTuple *BitsetEntry =
-        llvm::MDTuple::get(getLLVMContext(), BitsetOps);
-    BitsetEntries.push_back(BitsetEntry);
+    BitsetEntries.push_back(CreateVTableBitSetEntry(
+        VTable, PointerWidth * AP.second, AP.first.getBase()));
   }
 
   // Sort the bit set entries for determinism.
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp
index 7e82fcc..54e6b73 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp
@@ -649,14 +649,14 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
       return nullptr;
     }
 
-    ErrorOr<llvm::Module *> ModuleOrErr =
+    ErrorOr<std::unique_ptr<llvm::Module>> ModuleOrErr =
         getLazyBitcodeModule(std::move(*BCBuf), *VMContext);
     if (std::error_code EC = ModuleOrErr.getError()) {
       CI.getDiagnostics().Report(diag::err_cannot_open_file)
         << LinkBCFile << EC.message();
       return nullptr;
     }
-    LinkModuleToUse = ModuleOrErr.get();
+    LinkModuleToUse = ModuleOrErr.get().release();
   }
 
   CoverageSourceInfo *CoverageInfo = nullptr;
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp
index 01da750..bece41e 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -615,12 +615,14 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
   }
 
   // Apply sanitizer attributes to the function.
-  if (SanOpts.has(SanitizerKind::Address))
+  if (SanOpts.hasOneOf(SanitizerKind::Address | SanitizerKind::KernelAddress))
     Fn->addFnAttr(llvm::Attribute::SanitizeAddress);
   if (SanOpts.has(SanitizerKind::Thread))
     Fn->addFnAttr(llvm::Attribute::SanitizeThread);
   if (SanOpts.has(SanitizerKind::Memory))
     Fn->addFnAttr(llvm::Attribute::SanitizeMemory);
+  if (SanOpts.has(SanitizerKind::SafeStack))
+    Fn->addFnAttr(llvm::Attribute::SafeStack);
 
   // Pass inline keyword to optimizer if it appears explicitly on any
   // declaration. Also, in the case of -fno-inline attach NoInline
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h
index 469022d..45475d1 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h
@@ -1311,19 +1311,29 @@ public:
   /// to by This.
   llvm::Value *GetVTablePtr(llvm::Value *This, llvm::Type *Ty);
 
+  enum CFITypeCheckKind {
+    CFITCK_VCall,
+    CFITCK_NVCall,
+    CFITCK_DerivedCast,
+    CFITCK_UnrelatedCast,
+  };
+
   /// \brief Derived is the presumed address of an object of type T after a
   /// cast. If T is a polymorphic class type, emit a check that the virtual
   /// table for Derived belongs to a class derived from T.
   void EmitVTablePtrCheckForCast(QualType T, llvm::Value *Derived,
-                                 bool MayBeNull);
+                                 bool MayBeNull, CFITypeCheckKind TCK,
+                                 SourceLocation Loc);
 
   /// EmitVTablePtrCheckForCall - Virtual method MD is being called via VTable.
   /// If vptr CFI is enabled, emit a check that VTable is valid.
-  void EmitVTablePtrCheckForCall(const CXXMethodDecl *MD, llvm::Value *VTable);
+  void EmitVTablePtrCheckForCall(const CXXMethodDecl *MD, llvm::Value *VTable,
+                                 CFITypeCheckKind TCK, SourceLocation Loc);
 
   /// EmitVTablePtrCheck - Emit a check that VTable is a valid virtual table for
   /// RD using llvm.bitset.test.
-  void EmitVTablePtrCheck(const CXXRecordDecl *RD, llvm::Value *VTable);
+  void EmitVTablePtrCheck(const CXXRecordDecl *RD, llvm::Value *VTable,
+                          CFITypeCheckKind TCK, SourceLocation Loc);
 
   /// CanDevirtualizeMemberFunctionCalls - Checks whether virtual calls on given
   /// expr can be devirtualized.
@@ -2130,9 +2140,9 @@ public:
   /// \param D Directive that has at least one 'lastprivate' directives.
   /// \param IsLastIterCond Boolean condition that must be set to 'i1 true' if
   /// it is the last iteration of the loop code in associated directive, or to
-  /// 'i1 false' otherwise.
+  /// 'i1 false' otherwise. If this item is nullptr, no final check is required.
   void EmitOMPLastprivateClauseFinal(const OMPExecutableDirective &D,
-                                     llvm::Value *IsLastIterCond);
+                                     llvm::Value *IsLastIterCond = nullptr);
   /// \brief Emit initial code for reduction variables. Creates reduction copies
   /// and initializes them with the values according to OpenMP standard.
   ///
@@ -2147,6 +2157,11 @@ public:
   ///
   /// \param D Directive that has at least one 'reduction' directives.
   void EmitOMPReductionClauseFinal(const OMPExecutableDirective &D);
+  /// \brief Emit initial code for linear variables. Creates private copies
+  /// and initializes them with the values according to OpenMP standard.
+  ///
+  /// \param D Directive (possibly) with the 'linear' clause.
+  void EmitOMPLinearClauseInit(const OMPLoopDirective &D);
 
   void EmitOMPParallelDirective(const OMPParallelDirective &S);
   void EmitOMPSimdDirective(const OMPSimdDirective &S);
@@ -2164,6 +2179,7 @@ public:
   void EmitOMPTaskyieldDirective(const OMPTaskyieldDirective &S);
   void EmitOMPBarrierDirective(const OMPBarrierDirective &S);
   void EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S);
+  void EmitOMPTaskgroupDirective(const OMPTaskgroupDirective &S);
   void EmitOMPFlushDirective(const OMPFlushDirective &S);
   void EmitOMPOrderedDirective(const OMPOrderedDirective &S);
   void EmitOMPAtomicDirective(const OMPAtomicDirective &S);
@@ -2189,9 +2205,9 @@ public:
 private:
 
   /// Helpers for the OpenMP loop directives.
-  void EmitOMPLoopBody(const OMPLoopDirective &Directive,
-                       bool SeparateIter = false);
-  void EmitOMPSimdFinal(const OMPLoopDirective &S);
+  void EmitOMPLoopBody(const OMPLoopDirective &D);
+  void EmitOMPSimdInit(const OMPLoopDirective &D);
+  void EmitOMPSimdFinal(const OMPLoopDirective &D);
   /// \brief Emit code for the worksharing loop-based directive.
   /// \return true, if this construct has any lastprivate clause, false -
   /// otherwise.
@@ -2568,7 +2584,7 @@ public:
   llvm::Value *BuildVector(ArrayRef<llvm::Value*> Ops);
   llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
   llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
-  llvm::Value *EmitR600BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+  llvm::Value *EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
   llvm::Value *EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
 
   llvm::Value *EmitObjCProtocolExpr(const ObjCProtocolExpr *E);
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp
index 9496831..2dd5414 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp
@@ -205,11 +205,9 @@ void CodeGenModule::addReplacement(StringRef Name, llvm::Constant *C) {
 }
 
 void CodeGenModule::applyReplacements() {
-  for (ReplacementsTy::iterator I = Replacements.begin(),
-                                E = Replacements.end();
-       I != E; ++I) {
-    StringRef MangledName = I->first();
-    llvm::Constant *Replacement = I->second;
+  for (auto &I : Replacements) {
+    StringRef MangledName = I.first();
+    llvm::Constant *Replacement = I.second;
     llvm::GlobalValue *Entry = GetGlobalValue(MangledName);
     if (!Entry)
       continue;
@@ -261,9 +259,7 @@ void CodeGenModule::checkAliases() {
   // and aliases during codegen.
   bool Error = false;
   DiagnosticsEngine &Diags = getDiags();
-  for (std::vector<GlobalDecl>::iterator I = Aliases.begin(),
-         E = Aliases.end(); I != E; ++I) {
-    const GlobalDecl &GD = *I;
+  for (const GlobalDecl &GD : Aliases) {
     const auto *D = cast<ValueDecl>(GD.getDecl());
     const AliasAttr *AA = D->getAttr<AliasAttr>();
     StringRef MangledName = getMangledName(GD);
@@ -310,9 +306,7 @@ void CodeGenModule::checkAliases() {
   if (!Error)
     return;
 
-  for (std::vector<GlobalDecl>::iterator I = Aliases.begin(),
-         E = Aliases.end(); I != E; ++I) {
-    const GlobalDecl &GD = *I;
+  for (const GlobalDecl &GD : Aliases) {
     StringRef MangledName = getMangledName(GD);
     llvm::GlobalValue *Entry = GetGlobalValue(MangledName);
     auto *Alias = cast<llvm::GlobalAlias>(Entry);
@@ -637,15 +631,14 @@ void CodeGenModule::EmitCtorList(const CtorList &Fns, const char *GlobalName) {
       Int32Ty, llvm::PointerType::getUnqual(CtorFTy), VoidPtrTy, nullptr);
 
   // Construct the constructor and destructor arrays.
-  SmallVector<llvm::Constant*, 8> Ctors;
-  for (CtorList::const_iterator I = Fns.begin(), E = Fns.end(); I != E; ++I) {
+  SmallVector<llvm::Constant *, 8> Ctors;
+  for (const auto &I : Fns) {
     llvm::Constant *S[] = {
-      llvm::ConstantInt::get(Int32Ty, I->Priority, false),
-      llvm::ConstantExpr::getBitCast(I->Initializer, CtorPFTy),
-      (I->AssociatedData
-           ? llvm::ConstantExpr::getBitCast(I->AssociatedData, VoidPtrTy)
-           : llvm::Constant::getNullValue(VoidPtrTy))
-    };
+        llvm::ConstantInt::get(Int32Ty, I.Priority, false),
+        llvm::ConstantExpr::getBitCast(I.Initializer, CtorPFTy),
+        (I.AssociatedData
+             ? llvm::ConstantExpr::getBitCast(I.AssociatedData, VoidPtrTy)
+             : llvm::Constant::getNullValue(VoidPtrTy))};
     Ctors.push_back(llvm::ConstantStruct::get(CtorStructTy, S));
   }
 
@@ -1029,12 +1022,9 @@ void CodeGenModule::EmitModuleLinkOptions() {
   SmallVector<clang::Module *, 16> Stack;
 
   // Seed the stack with imported modules.
-  for (llvm::SetVector<clang::Module *>::iterator M = ImportedModules.begin(),
-                                               MEnd = ImportedModules.end();
-       M != MEnd; ++M) {
-    if (Visited.insert(*M).second)
-      Stack.push_back(*M);
-  }
+  for (Module *M : ImportedModules)
+    if (Visited.insert(M).second)
+      Stack.push_back(M);
 
   // Find all of the modules to import, making a little effort to prune
   // non-leaf modules.
@@ -1070,12 +1060,9 @@ void CodeGenModule::EmitModuleLinkOptions() {
   // to linker options inserted by things like #pragma comment().
   SmallVector<llvm::Metadata *, 16> MetadataArgs;
   Visited.clear();
-  for (llvm::SetVector<clang::Module *>::iterator M = LinkModules.begin(),
-                                               MEnd = LinkModules.end();
-       M != MEnd; ++M) {
-    if (Visited.insert(*M).second)
-      addLinkOptionsPostorder(*this, *M, MetadataArgs, Visited);
-  }
+  for (Module *M : LinkModules)
+    if (Visited.insert(M).second)
+      addLinkOptionsPostorder(*this, M, MetadataArgs, Visited);
   std::reverse(MetadataArgs.begin(), MetadataArgs.end());
   LinkerOptionsMetadata.append(MetadataArgs.begin(), MetadataArgs.end());
 
@@ -1231,8 +1218,9 @@ bool CodeGenModule::isInSanitizerBlacklist(llvm::Function *Fn,
 bool CodeGenModule::isInSanitizerBlacklist(llvm::GlobalVariable *GV,
                                            SourceLocation Loc, QualType Ty,
                                            StringRef Category) const {
-  // For now globals can be blacklisted only in ASan.
-  if (!LangOpts.Sanitize.has(SanitizerKind::Address))
+  // For now globals can be blacklisted only in ASan and KASan.
+  if (!LangOpts.Sanitize.hasOneOf(
+          SanitizerKind::Address | SanitizerKind::KernelAddress))
     return false;
   const auto &SanitizerBL = getContext().getSanitizerBlacklist();
   if (SanitizerBL.isBlacklistedGlobal(GV->getName(), Category))
@@ -3510,11 +3498,9 @@ static void EmitGlobalDeclMetadata(CodeGenModule &CGM,
 /// to such functions with an unmangled name from inline assembly within the
 /// same translation unit.
 void CodeGenModule::EmitStaticExternCAliases() {
-  for (StaticExternCMap::iterator I = StaticExternCValues.begin(),
-                                  E = StaticExternCValues.end();
-       I != E; ++I) {
-    IdentifierInfo *Name = I->first;
-    llvm::GlobalValue *Val = I->second;
+  for (auto &I : StaticExternCValues) {
+    IdentifierInfo *Name = I.first;
+    llvm::GlobalValue *Val = I.second;
     if (Val && !getModule().getNamedValue(Name->getName()))
       addUsedGlobal(llvm::GlobalAlias::create(Name->getName(), Val));
   }
@@ -3674,3 +3660,17 @@ void CodeGenModule::EmitOMPThreadPrivateDecl(const OMPThreadPrivateDecl *D) {
       CXXGlobalInits.push_back(InitFunction);
   }
 }
+
+llvm::MDTuple *CodeGenModule::CreateVTableBitSetEntry(
+    llvm::GlobalVariable *VTable, CharUnits Offset, const CXXRecordDecl *RD) {
+  std::string OutName;
+  llvm::raw_string_ostream Out(OutName);
+  getCXXABI().getMangleContext().mangleCXXVTableBitSet(RD, Out);
+
+  llvm::Metadata *BitsetOps[] = {
+      llvm::MDString::get(getLLVMContext(), Out.str()),
+      llvm::ConstantAsMetadata::get(VTable),
+      llvm::ConstantAsMetadata::get(
+          llvm::ConstantInt::get(Int64Ty, Offset.getQuantity()))};
+  return llvm::MDTuple::get(getLLVMContext(), BitsetOps);
+}
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.h b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.h
index edde426..8e671fa 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.h
@@ -1115,6 +1115,11 @@ public:
   void EmitVTableBitSetEntries(llvm::GlobalVariable *VTable,
                                const VTableLayout &VTLayout);
 
+  /// Create a bitset entry for the given vtable.
+  llvm::MDTuple *CreateVTableBitSetEntry(llvm::GlobalVariable *VTable,
+                                         CharUnits Offset,
+                                         const CXXRecordDecl *RD);
+
   /// \breif Get the declaration of std::terminate for the platform.
   llvm::Constant *getTerminateFn();
 
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.cpp
index 024a45d..9ad5d14 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.cpp
@@ -806,6 +806,9 @@ struct CounterCoverageMappingBuilder
 
   void VisitIfStmt(const IfStmt *S) {
     extendRegion(S);
+    // Extend into the condition before we propagate through it below - this is
+    // needed to handle macros that generate the "if" but not the condition.
+    extendRegion(S->getCond());
 
     Counter ParentCount = getRegion().getCounter();
     Counter ThenCount = getRegionCounter(S);
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp b/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp
index 0a1a4ce..3f5ad5d 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -204,7 +204,8 @@ public:
 
   llvm::Value *getVirtualFunctionPointer(CodeGenFunction &CGF, GlobalDecl GD,
                                          llvm::Value *This,
-                                         llvm::Type *Ty) override;
+                                         llvm::Type *Ty,
+                                         SourceLocation Loc) override;
 
   llvm::Value *EmitVirtualDestructorCall(CodeGenFunction &CGF,
                                          const CXXDestructorDecl *Dtor,
@@ -1439,13 +1440,15 @@ llvm::GlobalVariable *ItaniumCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
 llvm::Value *ItaniumCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF,
                                                       GlobalDecl GD,
                                                       llvm::Value *This,
-                                                      llvm::Type *Ty) {
+                                                      llvm::Type *Ty,
+                                                      SourceLocation Loc) {
   GD = GD.getCanonicalDecl();
   Ty = Ty->getPointerTo()->getPointerTo();
   llvm::Value *VTable = CGF.GetVTablePtr(This, Ty);
 
   if (CGF.SanOpts.has(SanitizerKind::CFIVCall))
-    CGF.EmitVTablePtrCheckForCall(cast<CXXMethodDecl>(GD.getDecl()), VTable);
+    CGF.EmitVTablePtrCheckForCall(cast<CXXMethodDecl>(GD.getDecl()), VTable,
+                                  CodeGenFunction::CFITCK_VCall, Loc);
 
   uint64_t VTableIndex = CGM.getItaniumVTableContext().getMethodVTableIndex(GD);
   llvm::Value *VFuncPtr =
@@ -1463,7 +1466,8 @@ llvm::Value *ItaniumCXXABI::EmitVirtualDestructorCall(
       Dtor, getFromDtorType(DtorType));
   llvm::Type *Ty = CGF.CGM.getTypes().GetFunctionType(*FInfo);
   llvm::Value *Callee =
-      getVirtualFunctionPointer(CGF, GlobalDecl(Dtor, DtorType), This, Ty);
+      getVirtualFunctionPointer(CGF, GlobalDecl(Dtor, DtorType), This, Ty,
+                                CE ? CE->getLocStart() : SourceLocation());
 
   CGF.EmitCXXMemberOrOperatorCall(Dtor, Callee, ReturnValueSlot(), This,
                                   /*ImplicitParam=*/nullptr, QualType(), CE);
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp
index de30883..679516b 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -205,6 +205,9 @@ public:
                           CXXDtorType Type, bool ForVirtualBase,
                           bool Delegating, llvm::Value *This) override;
 
+  void emitVTableBitSetEntries(VPtrInfo *Info, const CXXRecordDecl *RD,
+                               llvm::GlobalVariable *VTable);
+
   void emitVTableDefinitions(CodeGenVTables &CGVT,
                              const CXXRecordDecl *RD) override;
 
@@ -221,8 +224,8 @@ public:
                                         CharUnits VPtrOffset) override;
 
   llvm::Value *getVirtualFunctionPointer(CodeGenFunction &CGF, GlobalDecl GD,
-                                         llvm::Value *This,
-                                         llvm::Type *Ty) override;
+                                         llvm::Value *This, llvm::Type *Ty,
+                                         SourceLocation Loc) override;
 
   llvm::Value *EmitVirtualDestructorCall(CodeGenFunction &CGF,
                                          const CXXDestructorDecl *Dtor,
@@ -1401,6 +1404,58 @@ void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
                           getFromDtorType(Type));
 }
 
+void MicrosoftCXXABI::emitVTableBitSetEntries(VPtrInfo *Info,
+                                              const CXXRecordDecl *RD,
+                                              llvm::GlobalVariable *VTable) {
+  if (!getContext().getLangOpts().Sanitize.has(SanitizerKind::CFIVCall) &&
+      !getContext().getLangOpts().Sanitize.has(SanitizerKind::CFINVCall) &&
+      !getContext().getLangOpts().Sanitize.has(SanitizerKind::CFIDerivedCast) &&
+      !getContext().getLangOpts().Sanitize.has(SanitizerKind::CFIUnrelatedCast))
+    return;
+
+  llvm::NamedMDNode *BitsetsMD =
+      CGM.getModule().getOrInsertNamedMetadata("llvm.bitsets");
+  CharUnits PointerWidth = getContext().toCharUnitsFromBits(
+      getContext().getTargetInfo().getPointerWidth(0));
+
+  // FIXME: Add blacklisting scheme.
+
+  if (Info->PathToBaseWithVPtr.empty()) {
+    BitsetsMD->addOperand(
+        CGM.CreateVTableBitSetEntry(VTable, PointerWidth, RD));
+    return;
+  }
+
+  // Add a bitset entry for the least derived base belonging to this vftable.
+  BitsetsMD->addOperand(CGM.CreateVTableBitSetEntry(
+      VTable, PointerWidth, Info->PathToBaseWithVPtr.back()));
+
+  // Add a bitset entry for each derived class that is laid out at the same
+  // offset as the least derived base.
+  for (unsigned I = Info->PathToBaseWithVPtr.size() - 1; I != 0; --I) {
+    const CXXRecordDecl *DerivedRD = Info->PathToBaseWithVPtr[I - 1];
+    const CXXRecordDecl *BaseRD = Info->PathToBaseWithVPtr[I];
+
+    const ASTRecordLayout &Layout =
+        getContext().getASTRecordLayout(DerivedRD);
+    CharUnits Offset;
+    auto VBI = Layout.getVBaseOffsetsMap().find(BaseRD);
+    if (VBI == Layout.getVBaseOffsetsMap().end())
+      Offset = Layout.getBaseClassOffset(BaseRD);
+    else
+      Offset = VBI->second.VBaseOffset;
+    if (!Offset.isZero())
+      return;
+    BitsetsMD->addOperand(
+        CGM.CreateVTableBitSetEntry(VTable, PointerWidth, DerivedRD));
+  }
+
+  // Finally do the same for the most derived class.
+  if (Info->FullOffsetInMDC.isZero())
+    BitsetsMD->addOperand(
+        CGM.CreateVTableBitSetEntry(VTable, PointerWidth, RD));
+}
+
 void MicrosoftCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
                                             const CXXRecordDecl *RD) {
   MicrosoftVTableContext &VFTContext = CGM.getMicrosoftVTableContext();
@@ -1423,6 +1478,8 @@ void MicrosoftCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
         VTLayout.getNumVTableThunks(), RTTI);
 
     VTable->setInitializer(Init);
+
+    emitVTableBitSetEntries(Info, RD, VTable);
   }
 }
 
@@ -1585,10 +1642,54 @@ llvm::GlobalVariable *MicrosoftCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
   return VTable;
 }
 
+// Compute the identity of the most derived class whose virtual table is located
+// at the given offset into RD.
+static const CXXRecordDecl *getClassAtVTableLocation(ASTContext &Ctx,
+                                                     const CXXRecordDecl *RD,
+                                                     CharUnits Offset) {
+  if (Offset.isZero())
+    return RD;
+
+  const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
+  const CXXRecordDecl *MaxBase = nullptr;
+  CharUnits MaxBaseOffset;
+  for (auto &&B : RD->bases()) {
+    const CXXRecordDecl *Base = B.getType()->getAsCXXRecordDecl();
+    CharUnits BaseOffset = Layout.getBaseClassOffset(Base);
+    if (BaseOffset <= Offset && BaseOffset > MaxBaseOffset) {
+      MaxBase = Base;
+      MaxBaseOffset = BaseOffset;
+    }
+  }
+  for (auto &&B : RD->vbases()) {
+    const CXXRecordDecl *Base = B.getType()->getAsCXXRecordDecl();
+    CharUnits BaseOffset = Layout.getVBaseClassOffset(Base);
+    if (BaseOffset <= Offset && BaseOffset > MaxBaseOffset) {
+      MaxBase = Base;
+      MaxBaseOffset = BaseOffset;
+    }
+  }
+  assert(MaxBase);
+  return getClassAtVTableLocation(Ctx, MaxBase, Offset - MaxBaseOffset);
+}
+
+// Compute the identity of the most derived class whose virtual table is located
+// at the MethodVFTableLocation ML.
+static const CXXRecordDecl *
+getClassAtVTableLocation(ASTContext &Ctx, GlobalDecl GD,
+                         MicrosoftVTableContext::MethodVFTableLocation &ML) {
+  const CXXRecordDecl *RD = ML.VBase;
+  if (!RD)
+    RD = cast<CXXMethodDecl>(GD.getDecl())->getParent();
+
+  return getClassAtVTableLocation(Ctx, RD, ML.VFPtrOffset);
+}
+
 llvm::Value *MicrosoftCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF,
                                                         GlobalDecl GD,
                                                         llvm::Value *This,
-                                                        llvm::Type *Ty) {
+                                                        llvm::Type *Ty,
+                                                        SourceLocation Loc) {
   GD = GD.getCanonicalDecl();
   CGBuilderTy &Builder = CGF.Builder;
 
@@ -1599,6 +1700,10 @@ llvm::Value *MicrosoftCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF,
 
   MicrosoftVTableContext::MethodVFTableLocation ML =
       CGM.getMicrosoftVTableContext().getMethodVFTableLocation(GD);
+  if (CGF.SanOpts.has(SanitizerKind::CFIVCall))
+    CGF.EmitVTablePtrCheck(getClassAtVTableLocation(getContext(), GD, ML),
+                           VTable, CodeGenFunction::CFITCK_VCall, Loc);
+
   llvm::Value *VFuncPtr =
       Builder.CreateConstInBoundsGEP1_64(VTable, ML.Index, "vfn");
   return Builder.CreateLoad(VFuncPtr);
@@ -1616,7 +1721,8 @@ llvm::Value *MicrosoftCXXABI::EmitVirtualDestructorCall(
   const CGFunctionInfo *FInfo = &CGM.getTypes().arrangeCXXStructorDeclaration(
       Dtor, StructorType::Deleting);
   llvm::Type *Ty = CGF.CGM.getTypes().GetFunctionType(*FInfo);
-  llvm::Value *Callee = getVirtualFunctionPointer(CGF, GD, This, Ty);
+  llvm::Value *Callee = getVirtualFunctionPointer(
+      CGF, GD, This, Ty, CE ? CE->getLocStart() : SourceLocation());
 
   ASTContext &Context = getContext();
   llvm::Value *ImplicitParam = llvm::ConstantInt::get(
@@ -2351,7 +2457,7 @@ MicrosoftCXXABI::EmitFullMemberPointer(llvm::Constant *FirstField,
 
   if (MSInheritanceAttr::hasVBPtrOffsetField(Inheritance)) {
     CharUnits Offs = CharUnits::Zero();
-    if (RD->getNumVBases())
+    if (VBTableIndex && RD->getNumVBases())
       Offs = getContext().getASTRecordLayout(RD).getVBPtrOffset();
     fields.push_back(llvm::ConstantInt::get(CGM.IntTy, Offs.getQuantity()));
   }
@@ -2423,25 +2529,15 @@ MicrosoftCXXABI::BuildMemberPointer(const CXXRecordDecl *RD,
     FirstField = CGM.GetAddrOfFunction(MD, Ty);
     FirstField = llvm::ConstantExpr::getBitCast(FirstField, CGM.VoidPtrTy);
   } else {
-    if (!CGM.getTypes().isFuncTypeConvertible(
-            MD->getType()->castAs<FunctionType>())) {
-      CGM.ErrorUnsupported(MD, "pointer to virtual member function with "
-                               "incomplete return or parameter type");
-      FirstField = llvm::Constant::getNullValue(CGM.VoidPtrTy);
-    } else if (FPT->getCallConv() == CC_X86FastCall) {
-      CGM.ErrorUnsupported(MD, "pointer to fastcall virtual member function");
-      FirstField = llvm::Constant::getNullValue(CGM.VoidPtrTy);
-    } else {
-      auto &VTableContext = CGM.getMicrosoftVTableContext();
-      MicrosoftVTableContext::MethodVFTableLocation ML =
-          VTableContext.getMethodVFTableLocation(MD);
-      llvm::Function *Thunk = EmitVirtualMemPtrThunk(MD, ML);
-      FirstField = llvm::ConstantExpr::getBitCast(Thunk, CGM.VoidPtrTy);
-      // Include the vfptr adjustment if the method is in a non-primary vftable.
-      NonVirtualBaseAdjustment += ML.VFPtrOffset;
-      if (ML.VBase)
-        VBTableIndex = VTableContext.getVBTableIndex(RD, ML.VBase) * 4;
-    }
+    auto &VTableContext = CGM.getMicrosoftVTableContext();
+    MicrosoftVTableContext::MethodVFTableLocation ML =
+        VTableContext.getMethodVFTableLocation(MD);
+    llvm::Function *Thunk = EmitVirtualMemPtrThunk(MD, ML);
+    FirstField = llvm::ConstantExpr::getBitCast(Thunk, CGM.VoidPtrTy);
+    // Include the vfptr adjustment if the method is in a non-primary vftable.
+    NonVirtualBaseAdjustment += ML.VFPtrOffset;
+    if (ML.VBase)
+      VBTableIndex = VTableContext.getVBTableIndex(RD, ML.VBase) * 4;
   }
 
   // The rest of the fields are common with data member pointers.
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/SanitizerMetadata.cpp b/contrib/llvm/tools/clang/lib/CodeGen/SanitizerMetadata.cpp
index 7c38b28..2a338ba 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/SanitizerMetadata.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/SanitizerMetadata.cpp
@@ -25,7 +25,8 @@ void SanitizerMetadata::reportGlobalToASan(llvm::GlobalVariable *GV,
                                            SourceLocation Loc, StringRef Name,
                                            QualType Ty, bool IsDynInit,
                                            bool IsBlacklisted) {
-  if (!CGM.getLangOpts().Sanitize.has(SanitizerKind::Address))
+  if (!CGM.getLangOpts().Sanitize.hasOneOf(SanitizerKind::Address |
+                                           SanitizerKind::KernelAddress))
     return;
   IsDynInit &= !CGM.isInSanitizerBlacklist(GV, Loc, Ty, "init");
   IsBlacklisted |= CGM.isInSanitizerBlacklist(GV, Loc, Ty);
@@ -56,7 +57,8 @@ void SanitizerMetadata::reportGlobalToASan(llvm::GlobalVariable *GV,
 
 void SanitizerMetadata::reportGlobalToASan(llvm::GlobalVariable *GV,
                                            const VarDecl &D, bool IsDynInit) {
-  if (!CGM.getLangOpts().Sanitize.has(SanitizerKind::Address))
+  if (!CGM.getLangOpts().Sanitize.hasOneOf(SanitizerKind::Address |
+                                           SanitizerKind::KernelAddress))
     return;
   std::string QualName;
   llvm::raw_string_ostream OS(QualName);
@@ -67,7 +69,8 @@ void SanitizerMetadata::reportGlobalToASan(llvm::GlobalVariable *GV,
 void SanitizerMetadata::disableSanitizerForGlobal(llvm::GlobalVariable *GV) {
   // For now, just make sure the global is not modified by the ASan
   // instrumentation.
-  if (CGM.getLangOpts().Sanitize.has(SanitizerKind::Address))
+  if (CGM.getLangOpts().Sanitize.hasOneOf(SanitizerKind::Address |
+                                          SanitizerKind::KernelAddress))
     reportGlobalToASan(GV, SourceLocation(), "", QualType(), false, true);
 }
 
diff --git a/contrib/llvm/tools/clang/lib/Driver/Driver.cpp b/contrib/llvm/tools/clang/lib/Driver/Driver.cpp
index 8cca1de..0f3ebef 100644
--- a/contrib/llvm/tools/clang/lib/Driver/Driver.cpp
+++ b/contrib/llvm/tools/clang/lib/Driver/Driver.cpp
@@ -277,7 +277,8 @@ DerivedArgList *Driver::TranslateInputArgs(const InputArgList &Args) const {
   // Add a default value of -mlinker-version=, if one was given and the user
   // didn't specify one.
 #if defined(HOST_LINK_VERSION)
-  if (!Args.hasArg(options::OPT_mlinker_version_EQ)) {
+  if (!Args.hasArg(options::OPT_mlinker_version_EQ) &&
+      strlen(HOST_LINK_VERSION) > 0) {
     DAL->AddJoinedArg(0, Opts->getOption(options::OPT_mlinker_version_EQ),
                       HOST_LINK_VERSION);
     DAL->getLastArg(options::OPT_mlinker_version_EQ)->claim();
@@ -814,9 +815,12 @@ bool Driver::HandleImmediateArgs(const Compilation &C) {
   return true;
 }
 
+// Display an action graph human-readably.  Action A is the "sink" node
+// and latest-occuring action. Traversal is in pre-order, visiting the
+// inputs to each action before printing the action itself.
 static unsigned PrintActions1(const Compilation &C, Action *A,
                               std::map<Action*, unsigned> &Ids) {
-  if (Ids.count(A))
+  if (Ids.count(A)) // A was already visited.
     return Ids[A];
 
   std::string str;
@@ -847,6 +851,8 @@ static unsigned PrintActions1(const Compilation &C, Action *A,
   return Id;
 }
 
+// Print the action graphs in a compilation C.
+// For example "clang -c file1.c file2.c" is composed of two subgraphs.
 void Driver::PrintActions(const Compilation &C) const {
   std::map<Action*, unsigned> Ids;
   for (ActionList::const_iterator it = C.getActions().begin(),
@@ -1356,7 +1362,7 @@ Driver::ConstructPhaseAction(const ToolChain &TC, const ArgList &Args,
                                                types::TY_LLVM_BC);
   }
   case phases::Backend: {
-    if (IsUsingLTO(TC, Args)) {
+    if (IsUsingLTO(Args)) {
       types::ID Output =
         Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC;
       return llvm::make_unique<BackendJobAction>(std::move(Input), Output);
@@ -1377,14 +1383,8 @@ Driver::ConstructPhaseAction(const ToolChain &TC, const ArgList &Args,
   llvm_unreachable("invalid phase in ConstructPhaseAction");
 }
 
-bool Driver::IsUsingLTO(const ToolChain &TC, const ArgList &Args) const {
-  if (TC.getSanitizerArgs().needsLTO())
-    return true;
-
-  if (Args.hasFlag(options::OPT_flto, options::OPT_fno_lto, false))
-    return true;
-
-  return false;
+bool Driver::IsUsingLTO(const ArgList &Args) const {
+  return Args.hasFlag(options::OPT_flto, options::OPT_fno_lto, false);
 }
 
 void Driver::BuildJobs(Compilation &C) const {
@@ -1563,8 +1563,9 @@ void Driver::BuildJobsForAction(Compilation &C,
     if (Input.getOption().matches(options::OPT_INPUT)) {
       const char *Name = Input.getValue();
       Result = InputInfo(Name, A->getType(), Name);
-    } else
+    } else {
       Result = InputInfo(&Input, A->getType(), "");
+    }
     return;
   }
 
@@ -1738,7 +1739,7 @@ const char *Driver::GetNamedOutputPath(Compilation &C,
   // Determine what the derived output name should be.
   const char *NamedOutput;
 
-  if ((JA.getType() == types::TY_Object || JA.getType() == types::TY_LTO_BC) &&
+  if (JA.getType() == types::TY_Object &&
       C.getArgs().hasArg(options::OPT__SLASH_Fo, options::OPT__SLASH_o)) {
     // The /Fo or /o flag decides the object filename.
     StringRef Val = C.getArgs().getLastArg(options::OPT__SLASH_Fo,
@@ -2099,6 +2100,8 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
         TC = new toolchains::Hexagon_TC(*this, Target, Args);
       else if (Target.getArch() == llvm::Triple::xcore)
         TC = new toolchains::XCore(*this, Target, Args);
+      else if (Target.getArch() == llvm::Triple::shave)
+        TC = new toolchains::SHAVEToolChain(*this, Target, Args);
       else if (Target.isOSBinFormatELF())
         TC = new toolchains::Generic_ELF(*this, Target, Args);
       else if (Target.isOSBinFormatMachO())
@@ -2112,13 +2115,12 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
 }
 
 bool Driver::ShouldUseClangCompiler(const JobAction &JA) const {
-  // Check if user requested no clang, or clang doesn't understand this type (we
-  // only handle single inputs for now).
+  // Say "no" if there is not exactly one input of a type clang understands.
   if (JA.size() != 1 ||
       !types::isAcceptedByClang((*JA.begin())->getType()))
     return false;
 
-  // Otherwise make sure this is an action clang understands.
+  // And say "no" if this is not a kind of action clang understands.
   if (!isa<PreprocessJobAction>(JA) && !isa<PrecompileJobAction>(JA) &&
       !isa<CompileJobAction>(JA) && !isa<BackendJobAction>(JA))
     return false;
diff --git a/contrib/llvm/tools/clang/lib/Driver/MSVCToolChain.cpp b/contrib/llvm/tools/clang/lib/Driver/MSVCToolChain.cpp
index d824fe4..7216121 100644
--- a/contrib/llvm/tools/clang/lib/Driver/MSVCToolChain.cpp
+++ b/contrib/llvm/tools/clang/lib/Driver/MSVCToolChain.cpp
@@ -522,3 +522,12 @@ MSVCToolChain::ComputeEffectiveClangTriple(const ArgList &Args,
   }
   return Triple.getTriple();
 }
+
+SanitizerMask MSVCToolChain::getSupportedSanitizers() const {
+  SanitizerMask Res = ToolChain::getSupportedSanitizers();
+  Res |= SanitizerKind::Address;
+  // CFI checks are not implemented for MSVC ABI for now.
+  Res &= ~SanitizerKind::CFI;
+  Res &= ~SanitizerKind::CFICastStrict;
+  return Res;
+}
diff --git a/contrib/llvm/tools/clang/lib/Driver/SanitizerArgs.cpp b/contrib/llvm/tools/clang/lib/Driver/SanitizerArgs.cpp
index 72530b4..14c3702 100644
--- a/contrib/llvm/tools/clang/lib/Driver/SanitizerArgs.cpp
+++ b/contrib/llvm/tools/clang/lib/Driver/SanitizerArgs.cpp
@@ -25,7 +25,7 @@ using namespace clang::driver;
 using namespace llvm::opt;
 
 enum : SanitizerMask {
-  NeedsUbsanRt = Undefined | Integer,
+  NeedsUbsanRt = Undefined | Integer | CFI,
   NotAllowedWithTrap = Vptr,
   RequiresPIE = Memory | DataFlow,
   NeedsUnwindTables = Address | Thread | Memory | DataFlow,
@@ -34,6 +34,9 @@ enum : SanitizerMask {
   Unrecoverable = Address | Unreachable | Return,
   LegacyFsanitizeRecoverMask = Undefined | Integer,
   NeedsLTO = CFI,
+  TrappingSupported =
+      (Undefined & ~Vptr) | UnsignedIntegerOverflow | LocalBounds | CFI,
+  TrappingDefault = CFI,
 };
 
 enum CoverageFeature {
@@ -74,27 +77,6 @@ static std::string describeSanitizeArg(const llvm::opt::Arg *A,
 /// Sanitizers set.
 static std::string toString(const clang::SanitizerSet &Sanitizers);
 
-static SanitizerMask getToolchainUnsupportedKinds(const ToolChain &TC) {
-  bool IsFreeBSD = TC.getTriple().getOS() == llvm::Triple::FreeBSD;
-  bool IsLinux = TC.getTriple().getOS() == llvm::Triple::Linux;
-  bool IsX86 = TC.getTriple().getArch() == llvm::Triple::x86;
-  bool IsX86_64 = TC.getTriple().getArch() == llvm::Triple::x86_64;
-  bool IsMIPS64 = TC.getTriple().getArch() == llvm::Triple::mips64 ||
-                  TC.getTriple().getArch() == llvm::Triple::mips64el;
-
-  SanitizerMask Unsupported = 0;
-  if (!(IsLinux && (IsX86_64 || IsMIPS64))) {
-    Unsupported |= Memory | DataFlow;
-  }
-  if (!((IsLinux || IsFreeBSD) && (IsX86_64 || IsMIPS64))) {
-    Unsupported |= Thread;
-  }
-  if (!(IsLinux && (IsX86 || IsX86_64))) {
-    Unsupported |= Function;
-  }
-  return Unsupported;
-}
-
 static bool getDefaultBlacklist(const Driver &D, SanitizerMask Kinds,
                                 std::string &BLPath) {
   const char *BlacklistFile = nullptr;
@@ -116,8 +98,62 @@ static bool getDefaultBlacklist(const Driver &D, SanitizerMask Kinds,
   return false;
 }
 
+/// Sets group bits for every group that has at least one representative already
+/// enabled in \p Kinds.
+static SanitizerMask setGroupBits(SanitizerMask Kinds) {
+#define SANITIZER(NAME, ID)
+#define SANITIZER_GROUP(NAME, ID, ALIAS)                                       \
+  if (Kinds & SanitizerKind::ID)                                               \
+    Kinds |= SanitizerKind::ID##Group;
+#include "clang/Basic/Sanitizers.def"
+  return Kinds;
+}
+
+static SanitizerMask parseSanitizeTrapArgs(const Driver &D,
+                                           const llvm::opt::ArgList &Args) {
+  SanitizerMask TrapRemove = 0; // During the loop below, the accumulated set of
+                                // sanitizers disabled by the current sanitizer
+                                // argument or any argument after it.
+  SanitizerMask TrappingKinds = 0;
+  SanitizerMask TrappingSupportedWithGroups = setGroupBits(TrappingSupported);
+
+  for (ArgList::const_reverse_iterator I = Args.rbegin(), E = Args.rend();
+       I != E; ++I) {
+    const auto *Arg = *I;
+    if (Arg->getOption().matches(options::OPT_fsanitize_trap_EQ)) {
+      Arg->claim();
+      SanitizerMask Add = parseArgValues(D, Arg, true);
+      Add &= ~TrapRemove;
+      if (SanitizerMask InvalidValues = Add & ~TrappingSupportedWithGroups) {
+        SanitizerSet S;
+        S.Mask = InvalidValues;
+        D.Diag(diag::err_drv_unsupported_option_argument) << "-fsanitize-trap"
+                                                          << toString(S);
+      }
+      TrappingKinds |= expandSanitizerGroups(Add) & ~TrapRemove;
+    } else if (Arg->getOption().matches(options::OPT_fno_sanitize_trap_EQ)) {
+      Arg->claim();
+      TrapRemove |= expandSanitizerGroups(parseArgValues(D, Arg, true));
+    } else if (Arg->getOption().matches(
+                   options::OPT_fsanitize_undefined_trap_on_error)) {
+      Arg->claim();
+      TrappingKinds |=
+          expandSanitizerGroups(UndefinedGroup & ~TrapRemove) & ~TrapRemove;
+    } else if (Arg->getOption().matches(
+                   options::OPT_fno_sanitize_undefined_trap_on_error)) {
+      Arg->claim();
+      TrapRemove |= expandSanitizerGroups(UndefinedGroup);
+    }
+  }
+
+  // Apply default trapping behavior.
+  TrappingKinds |= TrappingDefault & ~TrapRemove;
+
+  return TrappingKinds;
+}
+
 bool SanitizerArgs::needsUbsanRt() const {
-  return !UbsanTrapOnError && (Sanitizers.Mask & NeedsUbsanRt) &&
+  return (Sanitizers.Mask & NeedsUbsanRt & ~TrapSanitizers.Mask) &&
          !Sanitizers.has(Address) &&
          !Sanitizers.has(Memory) &&
          !Sanitizers.has(Thread);
@@ -131,19 +167,15 @@ bool SanitizerArgs::needsUnwindTables() const {
   return Sanitizers.Mask & NeedsUnwindTables;
 }
 
-bool SanitizerArgs::needsLTO() const {
-  return Sanitizers.Mask & NeedsLTO;
-}
-
 void SanitizerArgs::clear() {
   Sanitizers.clear();
   RecoverableSanitizers.clear();
+  TrapSanitizers.clear();
   BlacklistFiles.clear();
   CoverageFeatures = 0;
   MsanTrackOrigins = 0;
   AsanFieldPadding = 0;
   AsanZeroBaseShadow = false;
-  UbsanTrapOnError = false;
   AsanSharedRuntime = false;
   LinkCXXRuntimes = false;
 }
@@ -162,10 +194,13 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
   SanitizerMask DiagnosedKinds = 0;  // All Kinds we have diagnosed up to now.
                                      // Used to deduplicate diagnostics.
   SanitizerMask Kinds = 0;
-  SanitizerMask NotSupported = getToolchainUnsupportedKinds(TC);
+  SanitizerMask Supported = setGroupBits(TC.getSupportedSanitizers());
   ToolChain::RTTIMode RTTIMode = TC.getRTTIMode();
 
   const Driver &D = TC.getDriver();
+  SanitizerMask TrappingKinds = parseSanitizeTrapArgs(D, Args);
+  SanitizerMask InvalidTrappingKinds = TrappingKinds & NotAllowedWithTrap;
+
   for (ArgList::const_reverse_iterator I = Args.rbegin(), E = Args.rend();
        I != E; ++I) {
     const auto *Arg = *I;
@@ -180,14 +215,20 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
       // sanitizers in Add are those which have been explicitly enabled.
       // Diagnose them.
       if (SanitizerMask KindsToDiagnose =
-              Add & NotSupported & ~DiagnosedKinds) {
-        // Only diagnose the new kinds.
+              Add & InvalidTrappingKinds & ~DiagnosedKinds) {
+        std::string Desc = describeSanitizeArg(*I, KindsToDiagnose);
+        D.Diag(diag::err_drv_argument_not_allowed_with)
+            << Desc << "-fsanitize-trap=undefined";
+        DiagnosedKinds |= KindsToDiagnose;
+      }
+      Add &= ~InvalidTrappingKinds;
+      if (SanitizerMask KindsToDiagnose = Add & ~Supported & ~DiagnosedKinds) {
         std::string Desc = describeSanitizeArg(*I, KindsToDiagnose);
         D.Diag(diag::err_drv_unsupported_opt_for_target)
             << Desc << TC.getTriple().str();
         DiagnosedKinds |= KindsToDiagnose;
       }
-      Add &= ~NotSupported;
+      Add &= Supported;
 
       // Test for -fno-rtti + explicit -fsanitizer=vptr before expanding groups
       // so we don't error out if -fno-rtti and -fsanitize=undefined were
@@ -216,7 +257,8 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
       Add &= ~AllRemove;
       // Silently discard any unsupported sanitizers implicitly enabled through
       // group expansion.
-      Add &= ~NotSupported;
+      Add &= ~InvalidTrappingKinds;
+      Add &= Supported;
 
       Kinds |= Add;
     } else if (Arg->getOption().matches(options::OPT_fno_sanitize_EQ)) {
@@ -234,22 +276,20 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
     Kinds &= ~Vptr;
   }
 
-  // Warn about undefined sanitizer options that require runtime support.
-  UbsanTrapOnError =
-    Args.hasFlag(options::OPT_fsanitize_undefined_trap_on_error,
-                 options::OPT_fno_sanitize_undefined_trap_on_error, false);
-  if (UbsanTrapOnError && (Kinds & NotAllowedWithTrap)) {
-    D.Diag(clang::diag::err_drv_argument_not_allowed_with)
-        << lastArgumentForMask(D, Args, NotAllowedWithTrap)
-        << "-fsanitize-undefined-trap-on-error";
-    Kinds &= ~NotAllowedWithTrap;
+  // Check that LTO is enabled if we need it.
+  if ((Kinds & NeedsLTO) && !D.IsUsingLTO(Args)) {
+    D.Diag(diag::err_drv_argument_only_allowed_with)
+        << lastArgumentForMask(D, Args, Kinds & NeedsLTO) << "-flto";
   }
 
   // Warn about incompatible groups of sanitizers.
   std::pair<SanitizerMask, SanitizerMask> IncompatibleGroups[] = {
       std::make_pair(Address, Thread), std::make_pair(Address, Memory),
       std::make_pair(Thread, Memory), std::make_pair(Leak, Thread),
-      std::make_pair(Leak, Memory)};
+      std::make_pair(Leak, Memory), std::make_pair(KernelAddress, Address),
+      std::make_pair(KernelAddress, Leak),
+      std::make_pair(KernelAddress, Thread),
+      std::make_pair(KernelAddress, Memory)};
   for (auto G : IncompatibleGroups) {
     SanitizerMask Group = G.first;
     if (Kinds & Group) {
@@ -305,6 +345,8 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
   RecoverableKinds &= Kinds;
   RecoverableKinds &= ~Unrecoverable;
 
+  TrappingKinds &= Kinds;
+
   // Setup blacklist files.
   // Add default blacklist from resource directory.
   {
@@ -460,6 +502,7 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
   // Finally, initialize the set of available and recoverable sanitizers.
   Sanitizers.Mask |= Kinds;
   RecoverableSanitizers.Mask |= RecoverableKinds;
+  TrapSanitizers.Mask |= TrappingKinds;
 }
 
 static std::string toString(const clang::SanitizerSet &Sanitizers) {
@@ -484,8 +527,9 @@ void SanitizerArgs::addArgs(const llvm::opt::ArgList &Args,
     CmdArgs.push_back(Args.MakeArgString("-fsanitize-recover=" +
                                          toString(RecoverableSanitizers)));
 
-  if (UbsanTrapOnError)
-    CmdArgs.push_back("-fsanitize-undefined-trap-on-error");
+  if (!TrapSanitizers.empty())
+    CmdArgs.push_back(
+        Args.MakeArgString("-fsanitize-trap=" + toString(TrapSanitizers)));
 
   for (const auto &BLPath : BlacklistFiles) {
     SmallString<64> BlacklistOpt("-fsanitize-blacklist=");
@@ -528,7 +572,9 @@ SanitizerMask parseArgValues(const Driver &D, const llvm::opt::Arg *A,
   assert((A->getOption().matches(options::OPT_fsanitize_EQ) ||
           A->getOption().matches(options::OPT_fno_sanitize_EQ) ||
           A->getOption().matches(options::OPT_fsanitize_recover_EQ) ||
-          A->getOption().matches(options::OPT_fno_sanitize_recover_EQ)) &&
+          A->getOption().matches(options::OPT_fno_sanitize_recover_EQ) ||
+          A->getOption().matches(options::OPT_fsanitize_trap_EQ) ||
+          A->getOption().matches(options::OPT_fno_sanitize_trap_EQ)) &&
          "Invalid argument in parseArgValues!");
   SanitizerMask Kinds = 0;
   for (int i = 0, n = A->getNumValues(); i != n; ++i) {
diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChain.cpp b/contrib/llvm/tools/clang/lib/Driver/ToolChain.cpp
index 82eb854..da020a2 100644
--- a/contrib/llvm/tools/clang/lib/Driver/ToolChain.cpp
+++ b/contrib/llvm/tools/clang/lib/Driver/ToolChain.cpp
@@ -481,3 +481,12 @@ bool ToolChain::AddFastMathRuntimeIfAvailable(const ArgList &Args,
   CmdArgs.push_back(Args.MakeArgString(Path));
   return true;
 }
+
+SanitizerMask ToolChain::getSupportedSanitizers() const {
+  // Return sanitizers which don't require runtime support and are not
+  // platform or architecture-dependent.
+  using namespace SanitizerKind;
+  return (Undefined & ~Vptr & ~Function) | CFI | CFICastStrict |
+         UnsignedIntegerOverflow | LocalBounds;
+}
+
diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains.cpp b/contrib/llvm/tools/clang/lib/Driver/ToolChains.cpp
index 115a16f..987e063 100644
--- a/contrib/llvm/tools/clang/lib/Driver/ToolChains.cpp
+++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains.cpp
@@ -42,10 +42,6 @@ using namespace llvm::opt;
 MachO::MachO(const Driver &D, const llvm::Triple &Triple,
                        const ArgList &Args)
   : ToolChain(D, Triple, Args) {
-  getProgramPaths().push_back(getDriver().getInstalledDir());
-  if (getDriver().getInstalledDir() != getDriver().Dir)
-    getProgramPaths().push_back(getDriver().Dir);
-
   // We expect 'as', 'ld', etc. to be adjacent to our install dir.
   getProgramPaths().push_back(getDriver().getInstalledDir());
   if (getDriver().getInstalledDir() != getDriver().Dir)
@@ -53,28 +49,8 @@ MachO::MachO(const Driver &D, const llvm::Triple &Triple,
 }
 
 /// Darwin - Darwin tool chain for i386 and x86_64.
-Darwin::Darwin(const Driver & D, const llvm::Triple & Triple,
-               const ArgList & Args)
-  : MachO(D, Triple, Args), TargetInitialized(false) {
-  // Compute the initial Darwin version from the triple
-  unsigned Major, Minor, Micro;
-  if (!Triple.getMacOSXVersion(Major, Minor, Micro))
-    getDriver().Diag(diag::err_drv_invalid_darwin_version) <<
-      Triple.getOSName();
-  llvm::raw_string_ostream(MacosxVersionMin)
-    << Major << '.' << Minor << '.' << Micro;
-
-  // FIXME: DarwinVersion is only used to find GCC's libexec directory.
-  // It should be removed when we stop supporting that.
-  DarwinVersion[0] = Minor + 4;
-  DarwinVersion[1] = Micro;
-  DarwinVersion[2] = 0;
-
-  // Compute the initial iOS version from the triple
-  Triple.getiOSVersion(Major, Minor, Micro);
-  llvm::raw_string_ostream(iOSVersionMin)
-    << Major << '.' << Minor << '.' << Micro;
-}
+Darwin::Darwin(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
+    : MachO(D, Triple, Args), TargetInitialized(false) {}
 
 types::ID MachO::LookupTypeForExtension(const char *Ext) const {
   types::ID Ty = types::lookupTypeForExtension(Ext);
@@ -403,26 +379,10 @@ void DarwinClang::AddLinkRuntimeLibArgs(const ArgList &Args,
 
 
   const SanitizerArgs &Sanitize = getSanitizerArgs();
-
-  if (Sanitize.needsAsanRt()) {
-    if (!isTargetMacOS() && !isTargetIOSSimulator()) {
-      // FIXME: Move this check to SanitizerArgs::filterUnsupportedKinds.
-      getDriver().Diag(diag::err_drv_clang_unsupported_per_platform)
-          << "-fsanitize=address";
-    } else {
-      AddLinkSanitizerLibArgs(Args, CmdArgs, "asan");
-    }
-  }
-
-  if (Sanitize.needsUbsanRt()) {
-    if (!isTargetMacOS() && !isTargetIOSSimulator()) {
-      // FIXME: Move this check to SanitizerArgs::filterUnsupportedKinds.
-      getDriver().Diag(diag::err_drv_clang_unsupported_per_platform)
-          << "-fsanitize=undefined";
-    } else {
-      AddLinkSanitizerLibArgs(Args, CmdArgs, "ubsan");
-    }
-  }
+  if (Sanitize.needsAsanRt())
+    AddLinkSanitizerLibArgs(Args, CmdArgs, "asan");
+  if (Sanitize.needsUbsanRt())
+    AddLinkSanitizerLibArgs(Args, CmdArgs, "ubsan");
 
   // Otherwise link libSystem, then the dynamic runtime library, and finally any
   // target specific static runtime library.
@@ -499,8 +459,8 @@ void Darwin::AddDeploymentTarget(DerivedArgList &Args) const {
   } else if (!OSXVersion && !iOSVersion) {
     // If no deployment target was specified on the command line, check for
     // environment defines.
-    StringRef OSXTarget;
-    StringRef iOSTarget;
+    std::string OSXTarget;
+    std::string iOSTarget;
     if (char *env = ::getenv("MACOSX_DEPLOYMENT_TARGET"))
       OSXTarget = env;
     if (char *env = ::getenv("IPHONEOS_DEPLOYMENT_TARGET"))
@@ -519,13 +479,26 @@ void Darwin::AddDeploymentTarget(DerivedArgList &Args) const {
       }
     }
 
-    // If no OSX or iOS target has been specified and we're compiling for armv7,
-    // go ahead as assume we're targeting iOS.
-    StringRef MachOArchName = getMachOArchName(Args);
-    if (OSXTarget.empty() && iOSTarget.empty() &&
-        (MachOArchName == "armv7" || MachOArchName == "armv7s" ||
-         MachOArchName == "arm64"))
-        iOSTarget = iOSVersionMin;
+    // If no OSX or iOS target has been specified, try to guess platform
+    // from arch name and compute the version from the triple.
+    if (OSXTarget.empty() && iOSTarget.empty()) {
+      StringRef MachOArchName = getMachOArchName(Args);
+      unsigned Major, Minor, Micro;
+      if (MachOArchName == "armv7" || MachOArchName == "armv7s" ||
+          MachOArchName == "arm64") {
+        getTriple().getiOSVersion(Major, Minor, Micro);
+        llvm::raw_string_ostream(iOSTarget) << Major << '.' << Minor << '.'
+                                            << Micro;
+      } else if (MachOArchName != "armv6m" && MachOArchName != "armv7m" &&
+                 MachOArchName != "armv7em") {
+        if (!getTriple().getMacOSXVersion(Major, Minor, Micro)) {
+          getDriver().Diag(diag::err_drv_invalid_darwin_version)
+              << getTriple().getOSName();
+        }
+        llvm::raw_string_ostream(OSXTarget) << Major << '.' << Minor << '.'
+                                            << Micro;
+      }
+    }
 
     // Allow conflicts among OSX and iOS for historical reasons, but choose the
     // default platform.
@@ -546,12 +519,6 @@ void Darwin::AddDeploymentTarget(DerivedArgList &Args) const {
       const Option O = Opts.getOption(options::OPT_miphoneos_version_min_EQ);
       iOSVersion = Args.MakeJoinedArg(nullptr, O, iOSTarget);
       Args.append(iOSVersion);
-    } else if (MachOArchName != "armv6m" && MachOArchName != "armv7m" &&
-               MachOArchName != "armv7em") {
-      // Otherwise, assume we are targeting OS X.
-      const Option O = Opts.getOption(options::OPT_mmacosx_version_min_EQ);
-      OSXVersion = Args.MakeJoinedArg(nullptr, O, MacosxVersionMin);
-      Args.append(OSXVersion);
     }
   }
 
@@ -1101,6 +1068,18 @@ void Darwin::CheckObjCARC() const {
   getDriver().Diag(diag::err_arc_unsupported_on_toolchain);
 }
 
+SanitizerMask Darwin::getSupportedSanitizers() const {
+  SanitizerMask Res = ToolChain::getSupportedSanitizers();
+  if (isTargetMacOS() || isTargetIOSSimulator()) {
+    // ASan and UBSan are available on Mac OS and on iOS simulator.
+    Res |= SanitizerKind::Address;
+    Res |= SanitizerKind::Vptr;
+  }
+  if (isTargetMacOS())
+    Res |= SanitizerKind::SafeStack;
+  return Res;
+}
+
 /// Generic_GCC - A tool chain using the 'gcc' command to perform
 /// all subcommands; this relies on gcc translating the majority of
 /// command line options.
@@ -2084,6 +2063,8 @@ bool Generic_GCC::IsIntegratedAssemblerDefault() const {
   case llvm::Triple::aarch64_be:
   case llvm::Triple::arm:
   case llvm::Triple::armeb:
+  case llvm::Triple::bpfel:
+  case llvm::Triple::bpfeb:
   case llvm::Triple::thumb:
   case llvm::Triple::thumbeb:
   case llvm::Triple::ppc:
@@ -2739,6 +2720,24 @@ bool FreeBSD::isPIEDefault() const {
   return getSanitizerArgs().requiresPIE();
 }
 
+SanitizerMask FreeBSD::getSupportedSanitizers() const {
+  const bool IsX86 = getTriple().getArch() == llvm::Triple::x86;
+  const bool IsX86_64 = getTriple().getArch() == llvm::Triple::x86_64;
+  const bool IsMIPS64 = getTriple().getArch() == llvm::Triple::mips64 ||
+                        getTriple().getArch() == llvm::Triple::mips64el;
+  SanitizerMask Res = ToolChain::getSupportedSanitizers();
+  Res |= SanitizerKind::Address;
+  Res |= SanitizerKind::Vptr;
+  if (IsX86_64 || IsMIPS64) {
+    Res |= SanitizerKind::Leak;
+    Res |= SanitizerKind::Thread;
+  }
+  if (IsX86 || IsX86_64) {
+    Res |= SanitizerKind::SafeStack;
+  }
+  return Res;
+}
+
 /// NetBSD - NetBSD tool chain which can call as(1) and ld(1) directly.
 
 NetBSD::NetBSD(const Driver &D, const llvm::Triple& Triple, const ArgList &Args)
@@ -3643,6 +3642,28 @@ bool Linux::isPIEDefault() const {
   return getSanitizerArgs().requiresPIE();
 }
 
+SanitizerMask Linux::getSupportedSanitizers() const {
+  const bool IsX86 = getTriple().getArch() == llvm::Triple::x86;
+  const bool IsX86_64 = getTriple().getArch() == llvm::Triple::x86_64;
+  const bool IsMIPS64 = getTriple().getArch() == llvm::Triple::mips64 ||
+                        getTriple().getArch() == llvm::Triple::mips64el;
+  SanitizerMask Res = ToolChain::getSupportedSanitizers();
+  Res |= SanitizerKind::Address;
+  Res |= SanitizerKind::KernelAddress;
+  Res |= SanitizerKind::Vptr;
+  if (IsX86_64 || IsMIPS64) {
+    Res |= SanitizerKind::DataFlow;
+    Res |= SanitizerKind::Leak;
+    Res |= SanitizerKind::Memory;
+    Res |= SanitizerKind::Thread;
+  }
+  if (IsX86 || IsX86_64) {
+    Res |= SanitizerKind::Function;
+    Res |= SanitizerKind::SafeStack;
+  }
+  return Res;
+}
+
 /// DragonFly - DragonFly tool chain which can call as(1) and ld(1) directly.
 
 DragonFly::DragonFly(const Driver &D, const llvm::Triple& Triple, const ArgList &Args)
@@ -3742,3 +3763,46 @@ void XCore::AddCXXStdlibLibArgs(const ArgList &Args,
                                 ArgStringList &CmdArgs) const {
   // We don't output any lib args. This is handled by xcc.
 }
+
+// SHAVEToolChain does not call Clang's C compiler.
+// We override SelectTool to avoid testing ShouldUseClangCompiler().
+Tool *SHAVEToolChain::SelectTool(const JobAction &JA) const {
+  switch (JA.getKind()) {
+  case Action::CompileJobClass:
+    if (!Compiler)
+      Compiler.reset(new tools::SHAVE::Compile(*this));
+    return Compiler.get();
+  case Action::AssembleJobClass:
+    if (!Assembler)
+      Assembler.reset(new tools::SHAVE::Assemble(*this));
+    return Assembler.get();
+  default:
+    return ToolChain::getTool(JA.getKind());
+  }
+}
+
+SHAVEToolChain::SHAVEToolChain(const Driver &D, const llvm::Triple &Triple,
+                     const ArgList &Args)
+    : Generic_GCC(D, Triple, Args) {}
+
+SHAVEToolChain::~SHAVEToolChain() {}
+
+/// Following are methods necessary to avoid having moviClang be an abstract
+/// class.
+
+Tool *SHAVEToolChain::getTool(Action::ActionClass AC) const {
+  // SelectTool() must find a tool using the method in the superclass.
+  // There's nothing we can do if that fails.
+  llvm_unreachable("SHAVEToolChain can't getTool");
+}
+
+Tool *SHAVEToolChain::buildLinker() const {
+  // SHAVEToolChain executables can not be linked except by the vendor tools.
+  llvm_unreachable("SHAVEToolChain can't buildLinker");
+}
+
+Tool *SHAVEToolChain::buildAssembler() const {
+  // This one you'd think should be reachable since we expose an
+  // assembler to the driver, except not the way it expects.
+  llvm_unreachable("SHAVEToolChain can't buildAssembler");
+}
diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains.h b/contrib/llvm/tools/clang/lib/Driver/ToolChains.h
index 0b7073f..a48bb5f 100644
--- a/contrib/llvm/tools/clang/lib/Driver/ToolChains.h
+++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains.h
@@ -316,9 +316,6 @@ public:
   /// Darwin - The base Darwin tool chain.
 class LLVM_LIBRARY_VISIBILITY Darwin : public MachO {
 public:
-  /// The host version.
-  unsigned DarwinVersion[3];
-
   /// Whether the information on the target has been initialized.
   //
   // FIXME: This should be eliminated. What we want to do is make this part of
@@ -338,15 +335,6 @@ public:
   mutable VersionTuple TargetVersion;
 
 private:
-  /// The default macosx-version-min of this tool chain; empty until
-  /// initialized.
-  std::string MacosxVersionMin;
-
-  /// The default ios-version-min of this tool chain; empty until
-  /// initialized.
-  std::string iOSVersionMin;
-
-private:
   void AddDeploymentTarget(llvm::opt::DerivedArgList &Args) const;
 
 public:
@@ -412,6 +400,7 @@ protected:
   }
 
   bool isTargetMacOS() const {
+    assert(TargetInitialized && "Target not initialized!");
     return TargetPlatform == MacOS;
   }
 
@@ -474,6 +463,8 @@ public:
   void CheckObjCARC() const override;
 
   bool UseSjLjExceptions() const override;
+
+  SanitizerMask getSupportedSanitizers() const override;
 };
 
 /// DarwinClang - The Darwin toolchain used by Clang.
@@ -616,6 +607,7 @@ public:
 
   bool UseSjLjExceptions() const override;
   bool isPIEDefault() const override;
+  SanitizerMask getSupportedSanitizers() const override;
 protected:
   Tool *buildAssembler() const override;
   Tool *buildLinker() const override;
@@ -679,6 +671,7 @@ public:
   AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &DriverArgs,
                                llvm::opt::ArgStringList &CC1Args) const override;
   bool isPIEDefault() const override;
+  SanitizerMask getSupportedSanitizers() const override;
 
   std::string Linker;
   std::vector<std::string> ExtraOpts;
@@ -809,6 +802,7 @@ public:
 
   std::string ComputeEffectiveClangTriple(const llvm::opt::ArgList &Args,
                                           types::ID InputType) const override;
+  SanitizerMask getSupportedSanitizers() const override;
 
 protected:
   void AddSystemIncludeWithSubfolder(const llvm::opt::ArgList &DriverArgs,
@@ -872,6 +866,27 @@ public:
                            llvm::opt::ArgStringList &CmdArgs) const override;
 };
 
+/// SHAVEToolChain - A tool chain using the compiler installed by the the
+// Movidius SDK into MV_TOOLS_DIR (which we assume will be copied to llvm's
+// installation dir) to perform all subcommands.
+class LLVM_LIBRARY_VISIBILITY SHAVEToolChain : public Generic_GCC {
+public:
+  SHAVEToolChain(const Driver &D, const llvm::Triple &Triple,
+            const llvm::opt::ArgList &Args);
+  ~SHAVEToolChain() override;
+
+  virtual Tool *SelectTool(const JobAction &JA) const override;
+
+protected:
+  Tool *getTool(Action::ActionClass AC) const override;
+  Tool *buildAssembler() const override;
+  Tool *buildLinker() const override;
+
+private:
+  mutable std::unique_ptr<Tool> Compiler;
+  mutable std::unique_ptr<Tool> Assembler;
+};
+
 } // end namespace toolchains
 } // end namespace driver
 } // end namespace clang
diff --git a/contrib/llvm/tools/clang/lib/Driver/Tools.cpp b/contrib/llvm/tools/clang/lib/Driver/Tools.cpp
index f4eca44..74bf864 100644
--- a/contrib/llvm/tools/clang/lib/Driver/Tools.cpp
+++ b/contrib/llvm/tools/clang/lib/Driver/Tools.cpp
@@ -679,7 +679,7 @@ static void getARMTargetFeatures(const Driver &D, const llvm::Triple &Triple,
   // getARMArch is used here instead of just checking the -march value in order
   // to handle -march=native correctly.
   if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) {
-    StringRef Arch = arm::getARMArch(Args, Triple);
+    std::string Arch = arm::getARMArch(Args, Triple);
     if (llvm::ARMTargetParser::parseArch(Arch) == llvm::ARM::AK_INVALID)
       D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args);
   }
@@ -689,7 +689,7 @@ static void getARMTargetFeatures(const Driver &D, const llvm::Triple &Triple,
   // getLLVMArchSuffixForARM which also needs an architecture.
   if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
     std::string CPU = arm::getARMTargetCPU(Args, Triple);
-    StringRef Arch = arm::getARMArch(Args, Triple);
+    std::string Arch = arm::getARMArch(Args, Triple);
     if (strcmp(arm::getLLVMArchSuffixForARM(CPU, Arch), "") == 0)
       D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args);
   }
@@ -850,7 +850,7 @@ static std::string getAArch64TargetCPU(const ArgList &Args) {
     CPU = A->getValue();
   } else if ((A = Args.getLastArg(options::OPT_mcpu_EQ))) {
     StringRef Mcpu = A->getValue();
-    CPU = Mcpu.split("+").first;
+    CPU = Mcpu.split("+").first.lower();
   }
 
   // Handle CPU name is 'native'.
@@ -1116,7 +1116,7 @@ static void getMIPSTargetFeatures(const Driver &D, const llvm::Triple &Triple,
       Features.push_back(Args.MakeArgString("+nooddspreg"));
     } else
       Features.push_back(Args.MakeArgString("+fp64"));
-  } else if (mips::isFPXXDefault(Triple, CPUName, ABIName)) {
+  } else if (mips::shouldUseFPXX(Args, Triple, CPUName, ABIName, FloatABI)) {
     Features.push_back(Args.MakeArgString("+fpxx"));
     Features.push_back(Args.MakeArgString("+nooddspreg"));
   }
@@ -1334,47 +1334,26 @@ static std::string getR600TargetGPU(const ArgList &Args) {
   return "";
 }
 
-static void getSparcTargetFeatures(const ArgList &Args,
-                                   std::vector<const char *> &Features) {
-  bool SoftFloatABI = true;
-  if (Arg *A =
-          Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float)) {
-    if (A->getOption().matches(options::OPT_mhard_float))
-      SoftFloatABI = false;
-  }
-  if (SoftFloatABI)
-    Features.push_back("+soft-float");
-}
-
 void Clang::AddSparcTargetArgs(const ArgList &Args,
                              ArgStringList &CmdArgs) const {
   const Driver &D = getToolChain().getDriver();
+  std::string Triple = getToolChain().ComputeEffectiveClangTriple(Args);
 
-  // Select the float ABI as determined by -msoft-float and -mhard-float.
-  StringRef FloatABI;
-  if (Arg *A = Args.getLastArg(options::OPT_msoft_float,
-                               options::OPT_mhard_float)) {
+  bool SoftFloatABI = false;
+  if (Arg *A =
+          Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float)) {
     if (A->getOption().matches(options::OPT_msoft_float))
-      FloatABI = "soft";
-    else if (A->getOption().matches(options::OPT_mhard_float))
-      FloatABI = "hard";
+      SoftFloatABI = true;
   }
 
-  // If unspecified, choose the default based on the platform.
-  if (FloatABI.empty()) {
-    // Assume "soft", but warn the user we are guessing.
-    FloatABI = "soft";
-    D.Diag(diag::warn_drv_assuming_mfloat_abi_is) << "soft";
-  }
-
-  if (FloatABI == "soft") {
-    // Floating point operations and argument passing are soft.
-    //
-    // FIXME: This changes CPP defines, we need -target-soft-float.
-    CmdArgs.push_back("-msoft-float");
-  } else {
-    assert(FloatABI == "hard" && "Invalid float abi!");
-    CmdArgs.push_back("-mhard-float");
+  // Only the hard-float ABI on Sparc is standardized, and it is the
+  // default. GCC also supports a nonstandard soft-float ABI mode, and
+  // perhaps LLVM should implement that, too. However, since llvm
+  // currently does not support Sparc soft-float, at all, display an
+  // error if it's requested.
+  if (SoftFloatABI) {
+    D.Diag(diag::err_drv_unsupported_opt_for_target)
+        << "-msoft-float" << Triple;
   }
 }
 
@@ -1550,6 +1529,137 @@ static void AddGoldPlugin(const ToolChain &ToolChain, const ArgList &Args,
     CmdArgs.push_back(Args.MakeArgString(Twine("-plugin-opt=mcpu=") + CPU));
 }
 
+/// This is a helper function for validating the optional refinement step
+/// parameter in reciprocal argument strings. Return false if there is an error
+/// parsing the refinement step. Otherwise, return true and set the Position
+/// of the refinement step in the input string.
+static bool getRefinementStep(const StringRef &In, const Driver &D,
+                                const Arg &A, size_t &Position) {
+  const char RefinementStepToken = ':';
+  Position = In.find(RefinementStepToken);
+  if (Position != StringRef::npos) {
+    StringRef Option = A.getOption().getName();
+    StringRef RefStep = In.substr(Position + 1);
+    // Allow exactly one numeric character for the additional refinement
+    // step parameter. This is reasonable for all currently-supported
+    // operations and architectures because we would expect that a larger value
+    // of refinement steps would cause the estimate "optimization" to
+    // under-perform the native operation. Also, if the estimate does not
+    // converge quickly, it probably will not ever converge, so further
+    // refinement steps will not produce a better answer.
+    if (RefStep.size() != 1) {
+      D.Diag(diag::err_drv_invalid_value) << Option << RefStep;
+      return false;
+    }
+    char RefStepChar = RefStep[0];
+    if (RefStepChar < '0' || RefStepChar > '9') {
+      D.Diag(diag::err_drv_invalid_value) << Option << RefStep;
+      return false;
+    }
+  }
+  return true;
+}
+
+/// The -mrecip flag requires processing of many optional parameters.
+static void ParseMRecip(const Driver &D, const ArgList &Args,
+                        ArgStringList &OutStrings) {
+  StringRef DisabledPrefixIn = "!";
+  StringRef DisabledPrefixOut = "!";
+  StringRef EnabledPrefixOut = "";
+  StringRef Out = "-mrecip=";
+
+  Arg *A = Args.getLastArg(options::OPT_mrecip, options::OPT_mrecip_EQ);
+  if (!A)
+    return;
+
+  unsigned NumOptions = A->getNumValues();
+  if (NumOptions == 0) {
+    // No option is the same as "all".
+    OutStrings.push_back(Args.MakeArgString(Out + "all"));
+    return;
+  }
+
+  // Pass through "all", "none", or "default" with an optional refinement step.
+  if (NumOptions == 1) {
+    StringRef Val = A->getValue(0);
+    size_t RefStepLoc;
+    if (!getRefinementStep(Val, D, *A, RefStepLoc))
+      return;
+    StringRef ValBase = Val.slice(0, RefStepLoc);
+    if (ValBase == "all" || ValBase == "none" || ValBase == "default") {
+      OutStrings.push_back(Args.MakeArgString(Out + Val));
+      return;
+    }
+  }
+
+  // Each reciprocal type may be enabled or disabled individually.
+  // Check each input value for validity, concatenate them all back together,
+  // and pass through.
+
+  llvm::StringMap<bool> OptionStrings;
+  OptionStrings.insert(std::make_pair("divd",       false));
+  OptionStrings.insert(std::make_pair("divf",       false));
+  OptionStrings.insert(std::make_pair("vec-divd",   false));
+  OptionStrings.insert(std::make_pair("vec-divf",   false));
+  OptionStrings.insert(std::make_pair("sqrtd",      false));
+  OptionStrings.insert(std::make_pair("sqrtf",      false));
+  OptionStrings.insert(std::make_pair("vec-sqrtd",  false));
+  OptionStrings.insert(std::make_pair("vec-sqrtf",  false));
+
+  for (unsigned i = 0; i != NumOptions; ++i) {
+    StringRef Val = A->getValue(i);
+
+    bool IsDisabled = Val.startswith(DisabledPrefixIn);
+    // Ignore the disablement token for string matching.
+    if (IsDisabled)
+      Val = Val.substr(1);
+
+    size_t RefStep;
+    if (!getRefinementStep(Val, D, *A, RefStep))
+      return;
+
+    StringRef ValBase = Val.slice(0, RefStep);
+    llvm::StringMap<bool>::iterator OptionIter = OptionStrings.find(ValBase);
+    if (OptionIter == OptionStrings.end()) {
+      // Try again specifying float suffix.
+      OptionIter = OptionStrings.find(ValBase.str() + 'f');
+      if (OptionIter == OptionStrings.end()) {
+        // The input name did not match any known option string.
+        D.Diag(diag::err_drv_unknown_argument) << Val;
+        return;
+      }
+      // The option was specified without a float or double suffix.
+      // Make sure that the double entry was not already specified.
+      // The float entry will be checked below.
+      if (OptionStrings[ValBase.str() + 'd']) {
+        D.Diag(diag::err_drv_invalid_value) << A->getOption().getName() << Val;
+        return;
+      }
+    }
+    
+    if (OptionIter->second == true) {
+      // Duplicate option specified.
+      D.Diag(diag::err_drv_invalid_value) << A->getOption().getName() << Val;
+      return;
+    }
+
+    // Mark the matched option as found. Do not allow duplicate specifiers.
+    OptionIter->second = true;
+
+    // If the precision was not specified, also mark the double entry as found.
+    if (ValBase.back() != 'f' && ValBase.back() != 'd')
+      OptionStrings[ValBase.str() + 'd'] = true;
+
+    // Build the output string.
+    StringRef Prefix = IsDisabled ? DisabledPrefixOut : EnabledPrefixOut;
+    Out = Args.MakeArgString(Out + Prefix + Val);
+    if (i != NumOptions - 1)
+      Out = Args.MakeArgString(Out + ",");
+  }
+
+  OutStrings.push_back(Args.MakeArgString(Out));
+}
+
 static void getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
                                  const ArgList &Args,
                                  std::vector<const char *> &Features) {
@@ -1738,7 +1848,8 @@ static bool
 getAArch64ArchFeaturesFromMarch(const Driver &D, StringRef March,
                                 const ArgList &Args,
                                 std::vector<const char *> &Features) {
-  std::pair<StringRef, StringRef> Split = March.split("+");
+  std::string MarchLowerCase = March.lower();
+  std::pair<StringRef, StringRef> Split = StringRef(MarchLowerCase).split("+");
 
   if (Split.first == "armv8-a" ||
       Split.first == "armv8a") {
@@ -1762,7 +1873,8 @@ getAArch64ArchFeaturesFromMcpu(const Driver &D, StringRef Mcpu,
                                const ArgList &Args,
                                std::vector<const char *> &Features) {
   StringRef CPU;
-  if (!DecodeAArch64Mcpu(D, Mcpu, CPU, Features))
+  std::string McpuLowerCase = Mcpu.lower();
+  if (!DecodeAArch64Mcpu(D, McpuLowerCase, CPU, Features))
     return false;
 
   return true;
@@ -1788,7 +1900,8 @@ getAArch64MicroArchFeaturesFromMcpu(const Driver &D, StringRef Mcpu,
                                     std::vector<const char *> &Features) {
   StringRef CPU;
   std::vector<const char *> DecodedFeature;
-  if (!DecodeAArch64Mcpu(D, Mcpu, CPU, DecodedFeature))
+  std::string McpuLowerCase = Mcpu.lower();
+  if (!DecodeAArch64Mcpu(D, McpuLowerCase, CPU, DecodedFeature))
     return false;
 
   return getAArch64MicroArchFeaturesFromMtune(D, CPU, Args, Features);
@@ -1863,11 +1976,6 @@ static void getTargetFeatures(const Driver &D, const llvm::Triple &Triple,
   case llvm::Triple::ppc64le:
     getPPCTargetFeatures(Args, Features);
     break;
-  case llvm::Triple::sparc:
-  case llvm::Triple::sparcel:
-  case llvm::Triple::sparcv9:
-    getSparcTargetFeatures(Args, Features);
-    break;
   case llvm::Triple::systemz:
     getSystemZTargetFeatures(Args, Features);
     break;
@@ -2326,6 +2434,8 @@ collectSanitizerRuntimes(const ToolChain &TC, const ArgList &Args,
     if (SanArgs.linkCXXRuntimes())
       StaticRuntimes.push_back("ubsan_standalone_cxx");
   }
+  if (SanArgs.needsSafeStackRt())
+    StaticRuntimes.push_back("safestack");
 }
 
 // Should be called before we add system libraries (C++ ABI, libstdc++/libc++,
@@ -3151,6 +3261,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
       CmdArgs.push_back(Args.MakeArgString("-ffp-contract=fast"));
     }
   }
+  
+  ParseMRecip(getToolChain().getDriver(), Args, CmdArgs);
 
   // We separately look for the '-ffast-math' and '-ffinite-math-only' flags,
   // and if we find them, tell the frontend to provide the appropriate
@@ -3892,7 +4004,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
   // -stack-protector=0 is default.
   unsigned StackProtectorLevel = 0;
-  if (Arg *A = Args.getLastArg(options::OPT_fno_stack_protector,
+  if (getToolChain().getSanitizerArgs().needsSafeStackRt()) {
+    Args.ClaimAllArgs(options::OPT_fno_stack_protector);
+    Args.ClaimAllArgs(options::OPT_fstack_protector_all);
+    Args.ClaimAllArgs(options::OPT_fstack_protector_strong);
+    Args.ClaimAllArgs(options::OPT_fstack_protector);
+  } else if (Arg *A = Args.getLastArg(options::OPT_fno_stack_protector,
                                options::OPT_fstack_protector_all,
                                options::OPT_fstack_protector_strong,
                                options::OPT_fstack_protector)) {
@@ -4019,9 +4136,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
       CmdArgs.push_back("-fblocks-runtime-optional");
   }
 
-  // -fmodules enables modules (off by default).
+  // -fmodules enables the use of precompiled modules (off by default).
   // Users can pass -fno-cxx-modules to turn off modules support for
-  // C++/Objective-C++ programs, which is a little less mature.
+  // C++/Objective-C++ programs.
   bool HaveModules = false;
   if (Args.hasFlag(options::OPT_fmodules, options::OPT_fno_modules, false)) {
     bool AllowedInCXX = Args.hasFlag(options::OPT_fcxx_modules, 
@@ -4033,11 +4150,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     }
   }
 
-  // -fmodule-maps enables module map processing (off by default) for header
-  // checking.  It is implied by -fmodules.
-  if (Args.hasFlag(options::OPT_fmodule_maps, options::OPT_fno_module_maps,
-                   false)) {
-    CmdArgs.push_back("-fmodule-maps");
+  // -fmodule-maps enables implicit reading of module map files. By default,
+  // this is enabled if we are using precompiled modules.
+  if (Args.hasFlag(options::OPT_fimplicit_module_maps,
+                   options::OPT_fno_implicit_module_maps, HaveModules)) {
+    CmdArgs.push_back("-fimplicit-module-maps");
   }
 
   // -fmodules-decluse checks that modules used are declared so (off by
@@ -5627,9 +5744,9 @@ void hexagon::Link::ConstructJob(Compilation &C, const JobAction &JA,
 }
 // Hexagon tools end.
 
-const StringRef arm::getARMArch(const ArgList &Args,
-                                const llvm::Triple &Triple) {
-  StringRef MArch;
+const std::string arm::getARMArch(const ArgList &Args,
+                                  const llvm::Triple &Triple) {
+  std::string MArch;
   if (Arg *A = Args.getLastArg(options::OPT_march_EQ)) {
     // Otherwise, if we have -march= choose the base CPU for that arch.
     MArch = A->getValue();
@@ -5637,6 +5754,7 @@ const StringRef arm::getARMArch(const ArgList &Args,
     // Otherwise, use the Arch from the triple.
     MArch = Triple.getArchName();
   }
+  MArch = StringRef(MArch).lower();
 
   // Handle -march=native.
   if (MArch == "native") {
@@ -5658,7 +5776,7 @@ const StringRef arm::getARMArch(const ArgList &Args,
 /// Get the (LLVM) name of the minimum ARM CPU for the arch we are targeting.
 const char *arm::getARMCPUForMArch(const ArgList &Args,
                                    const llvm::Triple &Triple) {
-  StringRef MArch = getARMArch(Args, Triple);
+  std::string MArch = getARMArch(Args, Triple);
   // getARMCPUForArch defaults to the triple if MArch is empty, but empty MArch
   // here means an -march=native that we can't handle, so instead return no CPU.
   if (MArch.empty())
@@ -5761,7 +5879,7 @@ bool mips::isNaN2008(const ArgList &Args, const llvm::Triple &Triple) {
 }
 
 bool mips::isFPXXDefault(const llvm::Triple &Triple, StringRef CPUName,
-                         StringRef ABIName) {
+                         StringRef ABIName, StringRef FloatABI) {
   if (Triple.getVendor() != llvm::Triple::ImaginationTechnologies &&
       Triple.getVendor() != llvm::Triple::MipsTechnologies)
     return false;
@@ -5769,6 +5887,11 @@ bool mips::isFPXXDefault(const llvm::Triple &Triple, StringRef CPUName,
   if (ABIName != "32")
     return false;
 
+  // FPXX shouldn't be used if either -msoft-float or -mfloat-abi=soft is
+  // present.
+  if (FloatABI == "soft")
+    return false;
+
   return llvm::StringSwitch<bool>(CPUName)
              .Cases("mips2", "mips3", "mips4", "mips5", true)
              .Cases("mips32", "mips32r2", "mips32r3", "mips32r5", true)
@@ -5776,6 +5899,20 @@ bool mips::isFPXXDefault(const llvm::Triple &Triple, StringRef CPUName,
              .Default(false);
 }
 
+bool mips::shouldUseFPXX(const ArgList &Args, const llvm::Triple &Triple,
+                         StringRef CPUName, StringRef ABIName,
+                         StringRef FloatABI) {
+  bool UseFPXX = isFPXXDefault(Triple, CPUName, ABIName, FloatABI);
+
+  // FPXX shouldn't be used if -msingle-float is present.
+  if (Arg *A = Args.getLastArg(options::OPT_msingle_float,
+                               options::OPT_mdouble_float))
+    if (A->getOption().matches(options::OPT_msingle_float))
+      UseFPXX = false;
+
+  return UseFPXX;
+}
+
 llvm::Triple::ArchType darwin::getArchTypeForMachOArchName(StringRef Str) {
   // See arch(3) and llvm-gcc's driver-driver.c. We don't implement support for
   // archs which Darwin doesn't use.
@@ -5901,7 +6038,7 @@ void cloudabi::Link::ConstructJob(Compilation &C, const JobAction &JA,
   Args.AddAllArgs(CmdArgs, options::OPT_Z_Flag);
   Args.AddAllArgs(CmdArgs, options::OPT_r);
 
-  if (D.IsUsingLTO(ToolChain, Args))
+  if (D.IsUsingLTO(Args))
     AddGoldPlugin(ToolChain, Args, CmdArgs);
 
   AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs);
@@ -6052,8 +6189,7 @@ void darwin::Link::AddLinkArgs(Compilation &C,
   // If we are using LTO, then automatically create a temporary file path for
   // the linker to use, so that it's lifetime will extend past a possible
   // dsymutil step.
-  if (Version[0] >= 116 && D.IsUsingLTO(getToolChain(), Args) &&
-      NeedsTempPath(Inputs)) {
+  if (Version[0] >= 116 && D.IsUsingLTO(Args) && NeedsTempPath(Inputs)) {
     const char *TmpPath = C.getArgs().MakeArgString(
       D.GetTemporaryPath("cc", types::getTypeTempSuffix(types::TY_Object)));
     C.addTempFile(TmpPath);
@@ -6254,6 +6390,15 @@ void darwin::Link::ConstructJob(Compilation &C, const JobAction &JA,
       !Args.hasArg(options::OPT_nostartfiles))
     getMachOToolChain().addStartObjectFileArgs(Args, CmdArgs);
 
+  // SafeStack requires its own runtime libraries
+  // These libraries should be linked first, to make sure the
+  // __safestack_init constructor executes before everything else
+  if (getToolChain().getSanitizerArgs().needsSafeStackRt()) {
+    getMachOToolChain().AddLinkRuntimeLib(Args, CmdArgs,
+                                          "libclang_rt.safestack_osx.a",
+                                          /*AlwaysLink=*/true);
+  }
+
   Args.AddAllArgs(CmdArgs, options::OPT_L);
 
   if (Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ,
@@ -7091,7 +7236,7 @@ void freebsd::Link::ConstructJob(Compilation &C, const JobAction &JA,
   Args.AddAllArgs(CmdArgs, options::OPT_Z_Flag);
   Args.AddAllArgs(CmdArgs, options::OPT_r);
 
-  if (D.IsUsingLTO(getToolChain(), Args))
+  if (D.IsUsingLTO(Args))
     AddGoldPlugin(ToolChain, Args, CmdArgs);
 
   bool NeedsSanitizerDeps = addSanitizerRuntimes(ToolChain, Args, CmdArgs);
@@ -7588,12 +7733,13 @@ void gnutools::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
     }
 
     // Add the last -mfp32/-mfpxx/-mfp64 or -mfpxx if it is enabled by default.
+    StringRef MIPSFloatABI = getMipsFloatABI(getToolChain().getDriver(), Args);
     if (Arg *A = Args.getLastArg(options::OPT_mfp32, options::OPT_mfpxx,
                                  options::OPT_mfp64)) {
       A->claim();
       A->render(Args, CmdArgs);
-    } else if (mips::isFPXXDefault(getToolChain().getTriple(), CPUName,
-                                   ABIName))
+    } else if (mips::shouldUseFPXX(Args, getToolChain().getTriple(), CPUName,
+                                   ABIName, MIPSFloatABI))
       CmdArgs.push_back("-mfpxx");
 
     // Pass on -mmips16 or -mno-mips16. However, the assembler equivalent of
@@ -7624,6 +7770,9 @@ void gnutools::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
     Args.AddLastArg(CmdArgs, options::OPT_mhard_float,
                     options::OPT_msoft_float);
 
+    Args.AddLastArg(CmdArgs, options::OPT_mdouble_float,
+                    options::OPT_msingle_float);
+
     Args.AddLastArg(CmdArgs, options::OPT_modd_spreg,
                     options::OPT_mno_odd_spreg);
 
@@ -7947,7 +8096,7 @@ void gnutools::Link::ConstructJob(Compilation &C, const JobAction &JA,
   for (const auto &Path : Paths)
     CmdArgs.push_back(Args.MakeArgString(StringRef("-L") + Path));
 
-  if (D.IsUsingLTO(getToolChain(), Args))
+  if (D.IsUsingLTO(Args))
     AddGoldPlugin(ToolChain, Args, CmdArgs);
 
   if (Args.hasArg(options::OPT_Z_Xlinker__no_demangle))
@@ -8959,3 +9108,81 @@ void CrossWindows::Link::ConstructJob(Compilation &C, const JobAction &JA,
 
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
 }
+
+void tools::SHAVE::Compile::ConstructJob(Compilation &C, const JobAction &JA,
+                                         const InputInfo &Output,
+                                         const InputInfoList &Inputs,
+                                         const ArgList &Args,
+                                         const char *LinkingOutput) const {
+
+  ArgStringList CmdArgs;
+
+  assert(Inputs.size() == 1);
+  const InputInfo &II = Inputs[0];
+  assert(II.getType() == types::TY_C || II.getType() == types::TY_CXX);
+  assert(Output.getType() == types::TY_PP_Asm); // Require preprocessed asm.
+
+  // Append all -I, -iquote, -isystem paths.
+  Args.AddAllArgs(CmdArgs, options::OPT_clang_i_Group);
+  // These are spelled the same way in clang and moviCompile.
+  Args.AddAllArgs(CmdArgs, options::OPT_D, options::OPT_U);
+
+  CmdArgs.push_back("-DMYRIAD2");
+  CmdArgs.push_back("-mcpu=myriad2");
+  CmdArgs.push_back("-S");
+
+  // Any -O option passes through without translation. What about -Ofast ?
+  if (Arg *A = Args.getLastArg(options::OPT_O_Group))
+    A->render(Args, CmdArgs);
+
+  if (Args.hasFlag(options::OPT_ffunction_sections,
+                   options::OPT_fno_function_sections)) {
+    CmdArgs.push_back("-ffunction-sections");
+  }
+  if (Args.hasArg(options::OPT_fno_inline_functions))
+    CmdArgs.push_back("-fno-inline-functions");
+
+  CmdArgs.push_back("-fno-exceptions"); // Always do this even if unspecified.
+
+  CmdArgs.push_back(II.getFilename());
+  CmdArgs.push_back("-o");
+  CmdArgs.push_back(Output.getFilename());
+
+  std::string Exec =
+      Args.MakeArgString(getToolChain().GetProgramPath("moviCompile"));
+  C.addCommand(
+      llvm::make_unique<Command>(JA, *this, Args.MakeArgString(Exec), CmdArgs));
+}
+
+void tools::SHAVE::Assemble::ConstructJob(Compilation &C,
+                                          const JobAction &JA,
+                                          const InputInfo &Output,
+                                          const InputInfoList &Inputs,
+                                          const ArgList &Args,
+                                          const char *LinkingOutput) const {
+  ArgStringList CmdArgs;
+
+  assert(Inputs.size() == 1);
+  const InputInfo &II = Inputs[0];
+  assert(II.getType() == types::TY_PP_Asm); // Require preprocessed asm input.
+  assert(Output.getType() == types::TY_Object);
+
+  CmdArgs.push_back("-no6thSlotCompression");
+  CmdArgs.push_back("-cv:myriad2"); // Chip Version ?
+  CmdArgs.push_back("-noSPrefixing");
+  CmdArgs.push_back("-a"); // Mystery option.
+  for (auto Arg : Args.filtered(options::OPT_I)) {
+    Arg->claim();
+    CmdArgs.push_back(
+        Args.MakeArgString(std::string("-i:") + Arg->getValue(0)));
+  }
+  CmdArgs.push_back("-elf"); // Output format.
+  CmdArgs.push_back(II.getFilename());
+  CmdArgs.push_back(
+      Args.MakeArgString(std::string("-o:") + Output.getFilename()));
+
+  std::string Exec =
+      Args.MakeArgString(getToolChain().GetProgramPath("moviAsm"));
+  C.addCommand(
+      llvm::make_unique<Command>(JA, *this, Args.MakeArgString(Exec), CmdArgs));
+}
diff --git a/contrib/llvm/tools/clang/lib/Driver/Tools.h b/contrib/llvm/tools/clang/lib/Driver/Tools.h
index 133a389..753f542 100644
--- a/contrib/llvm/tools/clang/lib/Driver/Tools.h
+++ b/contrib/llvm/tools/clang/lib/Driver/Tools.h
@@ -227,8 +227,8 @@ namespace hexagon {
 namespace arm {
   std::string getARMTargetCPU(const llvm::opt::ArgList &Args,
                               const llvm::Triple &Triple);
-  const StringRef getARMArch(const llvm::opt::ArgList &Args,
-                             const llvm::Triple &Triple);
+  const std::string getARMArch(const llvm::opt::ArgList &Args,
+                               const llvm::Triple &Triple);
   const char* getARMCPUForMArch(const llvm::opt::ArgList &Args,
                                 const llvm::Triple &Triple);
   const char* getLLVMArchSuffixForARM(StringRef CPU, StringRef Arch);
@@ -249,7 +249,9 @@ namespace mips {
   bool isUCLibc(const llvm::opt::ArgList &Args);
   bool isNaN2008(const llvm::opt::ArgList &Args, const llvm::Triple &Triple);
   bool isFPXXDefault(const llvm::Triple &Triple, StringRef CPUName,
-                     StringRef ABIName);
+                     StringRef ABIName, StringRef FloatABI);
+  bool shouldUseFPXX(const llvm::opt::ArgList &Args, const llvm::Triple &Triple,
+                     StringRef CPUName, StringRef ABIName, StringRef FloatABI);
 }
 
 namespace ppc {
@@ -731,6 +733,33 @@ public:
 };
 }
 
+/// SHAVE tools -- Directly call moviCompile and moviAsm
+namespace SHAVE {
+class LLVM_LIBRARY_VISIBILITY Compile : public Tool {
+public:
+  Compile(const ToolChain &TC) : Tool("moviCompile", "movicompile", TC) {}
+
+  bool hasIntegratedCPP() const override { return true; }
+
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
+
+class LLVM_LIBRARY_VISIBILITY Assemble : public Tool {
+public:
+  Assemble(const ToolChain &TC) : Tool("moviAsm", "moviAsm", TC) {}
+
+  bool hasIntegratedCPP() const override { return false; } // not sure.
+
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
+} // end namespace SHAVE
+
 } // end namespace tools
 } // end namespace driver
 } // end namespace clang
diff --git a/contrib/llvm/tools/clang/lib/Driver/Types.cpp b/contrib/llvm/tools/clang/lib/Driver/Types.cpp
index 0b0878f..7b28145 100644
--- a/contrib/llvm/tools/clang/lib/Driver/Types.cpp
+++ b/contrib/llvm/tools/clang/lib/Driver/Types.cpp
@@ -45,7 +45,7 @@ types::ID types::getPreprocessedType(ID Id) {
 }
 
 const char *types::getTypeTempSuffix(ID Id, bool CLMode) {
-  if ((Id == TY_Object || Id == TY_LTO_BC) && CLMode)
+  if (Id == TY_Object && CLMode)
     return "obj";
   if (Id == TY_Image && CLMode)
     return "exe";
diff --git a/contrib/llvm/tools/clang/lib/Format/BreakableToken.cpp b/contrib/llvm/tools/clang/lib/Format/BreakableToken.cpp
index e3e162d..36a8c4d 100644
--- a/contrib/llvm/tools/clang/lib/Format/BreakableToken.cpp
+++ b/contrib/llvm/tools/clang/lib/Format/BreakableToken.cpp
@@ -183,7 +183,7 @@ void BreakableStringLiteral::insertBreak(unsigned LineIndex,
 }
 
 static StringRef getLineCommentIndentPrefix(StringRef Comment) {
-  static const char *const KnownPrefixes[] = { "///", "//", "//!" };
+  static const char *const KnownPrefixes[] = {"///", "//", "//!"};
   StringRef LongestPrefix;
   for (StringRef KnownPrefix : KnownPrefixes) {
     if (Comment.startswith(KnownPrefix)) {
@@ -239,9 +239,8 @@ void BreakableLineComment::replaceWhitespace(unsigned LineIndex,
       /*Spaces=*/1);
 }
 
-void
-BreakableLineComment::replaceWhitespaceBefore(unsigned LineIndex,
-                                              WhitespaceManager &Whitespaces) {
+void BreakableLineComment::replaceWhitespaceBefore(
+    unsigned LineIndex, WhitespaceManager &Whitespaces) {
   if (OriginalPrefix != Prefix) {
     Whitespaces.replaceWhitespaceInToken(Tok, OriginalPrefix.size(), 0, "", "",
                                          /*InPPDirective=*/false,
@@ -345,7 +344,7 @@ void BreakableBlockComment::adjustWhitespace(unsigned LineIndex,
   // Calculate the start of the non-whitespace text in the current line.
   size_t StartOfLine = Lines[LineIndex].find_first_not_of(Blanks);
   if (StartOfLine == StringRef::npos)
-    StartOfLine = Lines[LineIndex].size();
+    StartOfLine = Lines[LineIndex].rtrim("\r\n").size();
 
   StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine);
   // Adjust Lines to only contain relevant text.
@@ -415,9 +414,8 @@ void BreakableBlockComment::replaceWhitespace(unsigned LineIndex,
       /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1);
 }
 
-void
-BreakableBlockComment::replaceWhitespaceBefore(unsigned LineIndex,
-                                               WhitespaceManager &Whitespaces) {
+void BreakableBlockComment::replaceWhitespaceBefore(
+    unsigned LineIndex, WhitespaceManager &Whitespaces) {
   if (LineIndex == 0)
     return;
   StringRef Prefix = Decoration;
diff --git a/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp b/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp
index 91bc64b..2357abd 100644
--- a/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp
+++ b/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp
@@ -150,12 +150,6 @@ bool ContinuationIndenter::mustBreak(const LineState &State) {
       State.Stack.back().BreakBeforeParameter && !Current.isTrailingComment() &&
       !Current.isOneOf(tok::r_paren, tok::r_brace))
     return true;
-  if (Style.AlwaysBreakBeforeMultilineStrings &&
-      State.Column > State.Stack.back().Indent && // Breaking saves columns.
-      !Previous.isOneOf(tok::kw_return, tok::lessless, tok::at) &&
-      !Previous.isOneOf(TT_InlineASMColon, TT_ConditionalExpr) &&
-      nextIsMultilineString(State))
-    return true;
   if (((Previous.is(TT_DictLiteral) && Previous.is(tok::l_brace)) ||
        Previous.is(TT_ArrayInitializerLSquare)) &&
       Style.ColumnLimit > 0 &&
@@ -170,9 +164,18 @@ bool ContinuationIndenter::mustBreak(const LineState &State) {
       State.Stack.back().BreakBeforeParameter)
     return true;
 
-  if (State.Column < getNewLineColumn(State))
+  unsigned NewLineColumn = getNewLineColumn(State);
+  if (State.Column < NewLineColumn)
     return false;
 
+  if (Style.AlwaysBreakBeforeMultilineStrings &&
+      (NewLineColumn == State.FirstIndent + Style.ContinuationIndentWidth ||
+       Previous.is(tok::comma) || Current.NestingLevel < 2) &&
+      !Previous.isOneOf(tok::kw_return, tok::lessless, tok::at) &&
+      !Previous.isOneOf(TT_InlineASMColon, TT_ConditionalExpr) &&
+      nextIsMultilineString(State))
+    return true;
+
   // Using CanBreakBefore here and below takes care of the decision whether the
   // current style uses wrapping before or after operators for the given
   // operator.
@@ -416,7 +419,21 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,
     Penalty += Style.PenaltyBreakFirstLessLess;
 
   State.Column = getNewLineColumn(State);
-  State.Stack.back().NestedBlockIndent = State.Column;
+
+  // Indent nested blocks relative to this column, unless in a very specific
+  // JavaScript special case where:
+  //
+  //   var loooooong_name =
+  //       function() {
+  //     // code
+  //   }
+  //
+  // is common and should be formatted like a free-standing function.
+  if (Style.Language != FormatStyle::LK_JavaScript ||
+      Current.NestingLevel != 0 || !PreviousNonComment->is(tok::equal) ||
+      !Current.is(Keywords.kw_function))
+    State.Stack.back().NestedBlockIndent = State.Column;
+
   if (NextNonComment->isMemberAccess()) {
     if (State.Stack.back().CallContinuation == 0)
       State.Stack.back().CallContinuation = State.Column;
@@ -688,7 +705,8 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
   //     foo();
   //     bar();
   //   }, a, b, c);
-  if (Current.isNot(tok::comment) && Previous && Previous->is(tok::l_brace) &&
+  if (Current.isNot(tok::comment) && Previous &&
+      Previous->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) &&
       State.Stack.size() > 1) {
     if (State.Stack[State.Stack.size() - 2].NestedBlockInlined && Newline)
       for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i)
@@ -713,7 +731,7 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
   if (Current.is(TT_ObjCStringLiteral) && State.StartOfStringLiteral == 0)
     State.StartOfStringLiteral = State.Column + 1;
   else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash) &&
-             !Current.isStringLiteral())
+           !Current.isStringLiteral())
     State.StartOfStringLiteral = 0;
 
   State.Column += Current.ColumnWidth;
@@ -891,7 +909,7 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State,
         // be a line break within this call.
         for (const FormatToken *Tok = &Current;
              Tok && Tok != Current.MatchingParen; Tok = Tok->Next) {
-          if (Tok->MustBreakBefore || 
+          if (Tok->MustBreakBefore ||
               (Tok->CanBreakBefore && Tok->NewlinesBefore > 0)) {
             BreakBeforeParameter = true;
             break;
diff --git a/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.h b/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.h
index 1da6bd9..9b9154e 100644
--- a/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.h
+++ b/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.h
@@ -148,13 +148,10 @@ struct ParenState {
   ParenState(unsigned Indent, unsigned IndentLevel, unsigned LastSpace,
              bool AvoidBinPacking, bool NoLineBreak)
       : Indent(Indent), IndentLevel(IndentLevel), LastSpace(LastSpace),
-        NestedBlockIndent(Indent), FirstLessLess(0),
-        BreakBeforeClosingBrace(false), QuestionColumn(0),
+        NestedBlockIndent(Indent), BreakBeforeClosingBrace(false),
         AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
-        NoLineBreak(NoLineBreak), LastOperatorWrapped(true), ColonPos(0),
-        StartOfFunctionCall(0), StartOfArraySubscripts(0),
-        NestedNameSpecifierContinuation(0), CallContinuation(0), VariablePos(0),
-        ContainsLineBreak(false), ContainsUnwrappedBuilder(0),
+        NoLineBreak(NoLineBreak), LastOperatorWrapped(true),
+        ContainsLineBreak(false), ContainsUnwrappedBuilder(false),
         AlignColons(true), ObjCSelectorNameFound(false),
         HasMultipleNestedBlocks(false), NestedBlockInlined(false) {}
 
@@ -180,90 +177,90 @@ struct ParenState {
   ///
   /// Used to align "<<" operators. 0 if no such operator has been encountered
   /// on a level.
-  unsigned FirstLessLess;
-
-  /// \brief Whether a newline needs to be inserted before the block's closing
-  /// brace.
-  ///
-  /// We only want to insert a newline before the closing brace if there also
-  /// was a newline after the beginning left brace.
-  bool BreakBeforeClosingBrace;
+  unsigned FirstLessLess = 0;
 
   /// \brief The column of a \c ? in a conditional expression;
-  unsigned QuestionColumn;
-
-  /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple
-  /// lines, in this context.
-  bool AvoidBinPacking;
-
-  /// \brief Break after the next comma (or all the commas in this context if
-  /// \c AvoidBinPacking is \c true).
-  bool BreakBeforeParameter;
-
-  /// \brief Line breaking in this context would break a formatting rule.
-  bool NoLineBreak;
-
-  /// \brief True if the last binary operator on this level was wrapped to the
-  /// next line.
-  bool LastOperatorWrapped;
+  unsigned QuestionColumn = 0;
 
   /// \brief The position of the colon in an ObjC method declaration/call.
-  unsigned ColonPos;
+  unsigned ColonPos = 0;
 
   /// \brief The start of the most recent function in a builder-type call.
-  unsigned StartOfFunctionCall;
+  unsigned StartOfFunctionCall = 0;
 
   /// \brief Contains the start of array subscript expressions, so that they
   /// can be aligned.
-  unsigned StartOfArraySubscripts;
+  unsigned StartOfArraySubscripts = 0;
 
   /// \brief If a nested name specifier was broken over multiple lines, this
   /// contains the start column of the second line. Otherwise 0.
-  unsigned NestedNameSpecifierContinuation;
+  unsigned NestedNameSpecifierContinuation = 0;
 
   /// \brief If a call expression was broken over multiple lines, this
   /// contains the start column of the second line. Otherwise 0.
-  unsigned CallContinuation;
+  unsigned CallContinuation = 0;
 
   /// \brief The column of the first variable name in a variable declaration.
   ///
   /// Used to align further variables if necessary.
-  unsigned VariablePos;
+  unsigned VariablePos = 0;
+
+  /// \brief Whether a newline needs to be inserted before the block's closing
+  /// brace.
+  ///
+  /// We only want to insert a newline before the closing brace if there also
+  /// was a newline after the beginning left brace.
+  bool BreakBeforeClosingBrace : 1;
+
+  /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple
+  /// lines, in this context.
+  bool AvoidBinPacking : 1;
+
+  /// \brief Break after the next comma (or all the commas in this context if
+  /// \c AvoidBinPacking is \c true).
+  bool BreakBeforeParameter : 1;
+
+  /// \brief Line breaking in this context would break a formatting rule.
+  bool NoLineBreak : 1;
+
+  /// \brief True if the last binary operator on this level was wrapped to the
+  /// next line.
+  bool LastOperatorWrapped : 1;
 
   /// \brief \c true if this \c ParenState already contains a line-break.
   ///
   /// The first line break in a certain \c ParenState causes extra penalty so
   /// that clang-format prefers similar breaks, i.e. breaks in the same
   /// parenthesis.
-  bool ContainsLineBreak;
+  bool ContainsLineBreak : 1;
 
   /// \brief \c true if this \c ParenState contains multiple segments of a
   /// builder-type call on one line.
-  bool ContainsUnwrappedBuilder;
+  bool ContainsUnwrappedBuilder : 1;
 
   /// \brief \c true if the colons of the curren ObjC method expression should
   /// be aligned.
   ///
   /// Not considered for memoization as it will always have the same value at
   /// the same token.
-  bool AlignColons;
+  bool AlignColons : 1;
 
   /// \brief \c true if at least one selector name was found in the current
   /// ObjC method expression.
   ///
   /// Not considered for memoization as it will always have the same value at
   /// the same token.
-  bool ObjCSelectorNameFound;
+  bool ObjCSelectorNameFound : 1;
 
   /// \brief \c true if there are multiple nested blocks inside these parens.
   ///
   /// Not considered for memoization as it will always have the same value at
   /// the same token.
-  bool HasMultipleNestedBlocks;
+  bool HasMultipleNestedBlocks : 1;
 
   // \brief The start of a nested block (e.g. lambda introducer in C++ or
   // "function" in JavaScript) is not wrapped to a new line.
-  bool NestedBlockInlined;
+  bool NestedBlockInlined : 1;
 
   bool operator<(const ParenState &Other) const {
     if (Indent != Other.Indent)
diff --git a/contrib/llvm/tools/clang/lib/Format/Format.cpp b/contrib/llvm/tools/clang/lib/Format/Format.cpp
index c725b4b..0feeaa0 100644
--- a/contrib/llvm/tools/clang/lib/Format/Format.cpp
+++ b/contrib/llvm/tools/clang/lib/Format/Format.cpp
@@ -174,7 +174,8 @@ template <> struct MappingTraits<FormatStyle> {
     IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft);
     IO.mapOptional("AlignOperands", Style.AlignOperands);
     IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments);
-    IO.mapOptional("AlignConsecutiveAssignments", Style.AlignConsecutiveAssignments);
+    IO.mapOptional("AlignConsecutiveAssignments",
+                   Style.AlignConsecutiveAssignments);
     IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine",
                    Style.AllowAllParametersOfDeclarationOnNextLine);
     IO.mapOptional("AllowShortBlocksOnASingleLine",
@@ -785,7 +786,8 @@ private:
     // Backticks get lexed as tok::unknown tokens. If a template string contains
     // a comment start, it gets lexed as a tok::comment, or tok::unknown if
     // unterminated.
-    if (!EndBacktick->isOneOf(tok::comment, tok::unknown))
+    if (!EndBacktick->isOneOf(tok::comment, tok::string_literal,
+                              tok::char_constant, tok::unknown))
       return false;
     size_t CommentBacktickPos = EndBacktick->TokenText.find('`');
     // Unknown token that's not actually a backtick, or a comment that doesn't
@@ -1122,7 +1124,10 @@ private:
       Column = FormatTok->LastLineColumnWidth;
     }
 
-    if (std::find(ForEachMacros.begin(), ForEachMacros.end(),
+    if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() &&
+          Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() ==
+              tok::pp_define) &&
+        std::find(ForEachMacros.begin(), ForEachMacros.end(),
                   FormatTok->Tok.getIdentifierInfo()) != ForEachMacros.end())
       FormatTok->Type = TT_ForEachMacro;
 
@@ -1254,7 +1259,8 @@ public:
   }
 
   tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
-                               FormatTokenLexer &Tokens, bool *IncompleteFormat) {
+                               FormatTokenLexer &Tokens,
+                               bool *IncompleteFormat) {
     TokenAnnotator Annotator(Style, Tokens.getKeywords());
     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
       Annotator.annotate(*AnnotatedLines[i]);
@@ -1500,8 +1506,7 @@ tooling::Replacements reformat(const FormatStyle &Style,
 
 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
                                ArrayRef<tooling::Range> Ranges,
-                               StringRef FileName,
-                               bool *IncompleteFormat) {
+                               StringRef FileName, bool *IncompleteFormat) {
   if (Style.DisableFormat)
     return tooling::Replacements();
 
diff --git a/contrib/llvm/tools/clang/lib/Format/FormatToken.cpp b/contrib/llvm/tools/clang/lib/Format/FormatToken.cpp
index 88678ca..316171d 100644
--- a/contrib/llvm/tools/clang/lib/Format/FormatToken.cpp
+++ b/contrib/llvm/tools/clang/lib/Format/FormatToken.cpp
@@ -203,11 +203,14 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
 
   // We can never place more than ColumnLimit / 3 items in a row (because of the
   // spaces and the comma).
-  for (unsigned Columns = 1; Columns <= Style.ColumnLimit / 3; ++Columns) {
+  unsigned MaxItems = Style.ColumnLimit / 3;
+  std::vector<unsigned> MinSizeInColumn;
+  MinSizeInColumn.reserve(MaxItems);
+  for (unsigned Columns = 1; Columns <= MaxItems; ++Columns) {
     ColumnFormat Format;
     Format.Columns = Columns;
     Format.ColumnSizes.resize(Columns);
-    std::vector<unsigned> MinSizeInColumn(Columns, UINT_MAX);
+    MinSizeInColumn.assign(Columns, UINT_MAX);
     Format.LineCount = 1;
     bool HasRowWithSufficientColumns = false;
     unsigned Column = 0;
diff --git a/contrib/llvm/tools/clang/lib/Format/FormatToken.h b/contrib/llvm/tools/clang/lib/Format/FormatToken.h
index dd12969..5b7dadb 100644
--- a/contrib/llvm/tools/clang/lib/Format/FormatToken.h
+++ b/contrib/llvm/tools/clang/lib/Format/FormatToken.h
@@ -86,24 +86,12 @@ enum TokenType {
 };
 
 // Represents what type of block a set of braces open.
-enum BraceBlockKind {
-  BK_Unknown,
-  BK_Block,
-  BK_BracedInit
-};
+enum BraceBlockKind { BK_Unknown, BK_Block, BK_BracedInit };
 
 // The packing kind of a function's parameters.
-enum ParameterPackingKind {
-  PPK_BinPacked,
-  PPK_OnePerLine,
-  PPK_Inconclusive
-};
+enum ParameterPackingKind { PPK_BinPacked, PPK_OnePerLine, PPK_Inconclusive };
 
-enum FormatDecision {
-  FD_Unformatted,
-  FD_Continue,
-  FD_Break
-};
+enum FormatDecision { FD_Unformatted, FD_Continue, FD_Break };
 
 class TokenRole;
 class AnnotatedLine;
diff --git a/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp
index 78e6103..8d08c3d 100644
--- a/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp
+++ b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp
@@ -87,7 +87,7 @@ private:
       if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) &&
           CurrentToken->Previous->is(TT_BinaryOperator) &&
           Contexts[Contexts.size() - 2].IsExpression &&
-          Line.First->isNot(tok::kw_template))
+          !Line.startsWith(tok::kw_template))
         return false;
       updateParameterCount(Left, CurrentToken);
       if (!consumeToken())
@@ -255,7 +255,7 @@ private:
 
     // A '[' could be an index subscript (after an identifier or after
     // ')' or ']'), it could be the start of an Objective-C method
-    // expression, or it could the the start of an Objective-C array literal.
+    // expression, or it could the start of an Objective-C array literal.
     FormatToken *Left = CurrentToken->Previous;
     Left->ParentBracket = Contexts.back().ContextKind;
     FormatToken *Parent = Left->getPreviousNonComment();
@@ -457,7 +457,7 @@ private:
       if (Contexts.back().ColonIsDictLiteral) {
         Tok->Type = TT_DictLiteral;
       } else if (Contexts.back().ColonIsObjCMethodExpr ||
-                 Line.First->is(TT_ObjCMethodSpecifier)) {
+                 Line.startsWith(TT_ObjCMethodSpecifier)) {
         Tok->Type = TT_ObjCMethodExpr;
         Tok->Previous->Type = TT_SelectorName;
         if (Tok->Previous->ColumnWidth >
@@ -503,7 +503,7 @@ private:
       if (!parseParens())
         return false;
       if (Line.MustBeDeclaration && Contexts.size() == 1 &&
-          !Contexts.back().IsExpression && Line.First->isNot(TT_ObjCProperty) &&
+          !Contexts.back().IsExpression && !Line.startsWith(TT_ObjCProperty) &&
           (!Tok->Previous ||
            !Tok->Previous->isOneOf(tok::kw_decltype, TT_LeadingJavaAnnotation)))
         Line.MightBeFunctionDecl = true;
@@ -558,7 +558,7 @@ private:
       break;
     case tok::question:
       if (Style.Language == FormatStyle::LK_JavaScript && Tok->Next &&
-          Tok->Next->isOneOf(tok::semi, tok::colon, tok::r_paren,
+          Tok->Next->isOneOf(tok::semi, tok::comma, tok::colon, tok::r_paren,
                              tok::r_brace)) {
         // Question marks before semicolons, colons, etc. indicate optional
         // types (fields, parameters), e.g.
@@ -581,7 +581,7 @@ private:
       if (Contexts.back().InCtorInitializer)
         Tok->Type = TT_CtorInitializerComma;
       else if (Contexts.back().FirstStartOfName &&
-               (Contexts.size() == 1 || Line.First->is(tok::kw_for))) {
+               (Contexts.size() == 1 || Line.startsWith(tok::kw_for))) {
         Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
         Line.IsMultiVariableDeclStmt = true;
       }
@@ -724,7 +724,7 @@ public:
     if (ImportStatement)
       return LT_ImportStatement;
 
-    if (Line.First->is(TT_ObjCMethodSpecifier)) {
+    if (Line.startsWith(TT_ObjCMethodSpecifier)) {
       if (Contexts.back().FirstObjCSelectorName)
         Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
             Contexts.back().LongestObjCSelectorName;
@@ -820,7 +820,7 @@ private:
         !Line.First->isOneOf(tok::kw_template, tok::kw_using) &&
         (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) {
       Contexts.back().IsExpression = true;
-      if (!Line.First->is(TT_UnaryOperator)) {
+      if (!Line.startsWith(TT_UnaryOperator)) {
         for (FormatToken *Previous = Current.Previous;
              Previous && !Previous->isOneOf(tok::comma, tok::semi);
              Previous = Previous->Previous) {
@@ -870,7 +870,7 @@ private:
       Contexts.back().InCtorInitializer = true;
     } else if (Current.is(tok::kw_new)) {
       Contexts.back().CanBeExpression = false;
-    } else if (Current.is(tok::semi) || Current.is(tok::exclaim)) {
+    } else if (Current.isOneOf(tok::semi, tok::exclaim)) {
       // This should be the condition or increment in a for-loop.
       Contexts.back().IsExpression = true;
     }
@@ -1081,7 +1081,7 @@ private:
     // there is also an identifier before the ().
     else if (LeftOfParens && Tok.Next &&
              (LeftOfParens->Tok.getIdentifierInfo() == nullptr ||
-              LeftOfParens->is(tok::kw_return)) &&
+              LeftOfParens->isOneOf(tok::kw_return, tok::kw_case)) &&
              !LeftOfParens->isOneOf(TT_OverloadedOperator, tok::at,
                                     TT_TemplateCloser)) {
       if (Tok.Next->isOneOf(tok::identifier, tok::numeric_constant)) {
@@ -1441,11 +1441,11 @@ void TokenAnnotator::annotate(AnnotatedLine &Line) {
   ExpressionParser ExprParser(Style, Keywords, Line);
   ExprParser.parse();
 
-  if (Line.First->is(TT_ObjCMethodSpecifier))
+  if (Line.startsWith(TT_ObjCMethodSpecifier))
     Line.Type = LT_ObjCMethodDecl;
-  else if (Line.First->is(TT_ObjCDecl))
+  else if (Line.startsWith(TT_ObjCDecl))
     Line.Type = LT_ObjCDecl;
-  else if (Line.First->is(TT_ObjCProperty))
+  else if (Line.startsWith(TT_ObjCProperty))
     Line.Type = LT_ObjCProperty;
 
   Line.First->SpacesRequiredBefore = 1;
@@ -1638,7 +1638,7 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
 
   if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
       Right.is(tok::kw_operator)) {
-    if (Line.First->is(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
+    if (Line.startsWith(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
       return 3;
     if (Left.is(TT_StartOfName))
       return 110;
@@ -1674,8 +1674,7 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
       (!Right.Next || Right.Next->isNot(tok::l_paren))) {
     // Moving trailing annotations to the next line is fine for ObjC method
     // declarations.
-    if (Line.First->is(TT_ObjCMethodSpecifier))
-
+    if (Line.startsWith(TT_ObjCMethodSpecifier))
       return 10;
     // Generally, breaking before a trailing annotation is bad unless it is
     // function-like. It seems to be especially preferable to keep standard
@@ -1687,7 +1686,7 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
   }
 
   // In for-loops, prefer breaking at ',' and ';'.
-  if (Line.First->is(tok::kw_for) && Left.is(tok::equal))
+  if (Line.startsWith(tok::kw_for) && Left.is(tok::equal))
     return 4;
 
   // In Objective-C method expressions, prefer breaking before "param:" over
@@ -1800,6 +1799,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
     return true;
   if (Left.is(TT_PointerOrReference))
     return Right.Tok.isLiteral() || Right.is(TT_BlockComment) ||
+           (Right.is(tok::l_brace) && Right.BlockKind == BK_Block) ||
            (!Right.isOneOf(TT_PointerOrReference, TT_ArraySubscriptLSquare,
                            tok::l_paren) &&
             (Style.PointerAlignment != FormatStyle::PAS_Right &&
@@ -1844,7 +1844,8 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
                            tok::kw_new, tok::kw_delete) &&
               (!Left.Previous || Left.Previous->isNot(tok::period))))) ||
            (Style.SpaceBeforeParens == FormatStyle::SBPO_Always &&
-            (Left.is(tok::identifier) || Left.isFunctionLikeKeyword() || Left.is(tok::r_paren)) &&
+            (Left.is(tok::identifier) || Left.isFunctionLikeKeyword() ||
+             Left.is(tok::r_paren)) &&
             Line.Type != LT_PreprocessorDirective);
   }
   if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword)
@@ -1989,7 +1990,7 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
       Left.MatchingParen && Left.MatchingParen->is(TT_OverloadedOperatorLParen))
     return false;
   if (Right.is(tok::less) && Left.isNot(tok::l_paren) &&
-      Line.First->is(tok::hash))
+      Line.startsWith(tok::hash))
     return true;
   if (Right.is(TT_TrailingUnaryOperator))
     return false;
@@ -2010,6 +2011,39 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
   if (Right.NewlinesBefore > 1)
     return true;
 
+  if (Style.Language == FormatStyle::LK_JavaScript) {
+    // FIXME: This might apply to other languages and token kinds.
+    if (Right.is(tok::char_constant) && Left.is(tok::plus) && Left.Previous &&
+        Left.Previous->is(tok::char_constant))
+      return true;
+    if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace) && Line.Level == 0 &&
+        Left.Previous && Left.Previous->is(tok::equal) &&
+        Line.First->isOneOf(tok::identifier, Keywords.kw_import, tok::kw_export,
+                            tok::kw_const) &&
+        // kw_var is a pseudo-token that's a tok::identifier, so matches above.
+        !Line.startsWith(Keywords.kw_var))
+      // Object literals on the top level of a file are treated as "enum-style".
+      // Each key/value pair is put on a separate line, instead of bin-packing.
+      return true;
+    if (Left.is(tok::l_brace) && Line.Level == 0 &&
+        (Line.startsWith(tok::kw_enum) ||
+         Line.startsWith(tok::kw_export, tok::kw_enum)))
+      // JavaScript top-level enum key/value pairs are put on separate lines
+      // instead of bin-packing.
+      return true;
+    if (Right.is(tok::r_brace) && Left.is(tok::l_brace) &&
+        !Left.Children.empty())
+      // Support AllowShortFunctionsOnASingleLine for JavaScript.
+      return Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_None ||
+             (Left.NestingLevel == 0 && Line.Level == 0 &&
+              Style.AllowShortFunctionsOnASingleLine ==
+                  FormatStyle::SFS_Inline);
+  } else if (Style.Language == FormatStyle::LK_Java) {
+    if (Right.is(tok::plus) && Left.is(tok::string_literal) && Right.Next &&
+        Right.Next->is(tok::string_literal))
+      return true;
+  }
+
   // If the last token before a '}' is a comma or a trailing comment, the
   // intention is to insert a line break after it in order to make shuffling
   // around entries easier.
@@ -2030,7 +2064,7 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
            Left.isNot(TT_CtorInitializerColon) &&
            (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline);
   if (Left.isTrailingComment())
-   return true;
+    return true;
   if (Left.isStringLiteral() &&
       (Right.isStringLiteral() || Right.is(TT_ObjCStringLiteral)))
     return true;
@@ -2060,12 +2094,6 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
     return true;
   if (Right.is(TT_InlineASMBrace))
     return Right.HasUnescapedNewline;
-  if (Style.Language == FormatStyle::LK_JavaScript && Right.is(tok::r_brace) &&
-      Left.is(tok::l_brace) && !Left.Children.empty())
-    // Support AllowShortFunctionsOnASingleLine for JavaScript.
-    return Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_None ||
-           (Left.NestingLevel == 0 && Line.Level == 0 &&
-            Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Inline);
   if (isAllmanBrace(Left) || isAllmanBrace(Right))
     return Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
            Style.BreakBeforeBraces == FormatStyle::BS_GNU;
@@ -2082,26 +2110,6 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
       Line.Last->is(tok::l_brace))
     return true;
 
-  if (Style.Language == FormatStyle::LK_JavaScript) {
-    // FIXME: This might apply to other languages and token kinds.
-    if (Right.is(tok::char_constant) && Left.is(tok::plus) && Left.Previous &&
-        Left.Previous->is(tok::char_constant))
-      return true;
-    if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace) &&
-        Left.NestingLevel == 0 && Left.Previous &&
-        Left.Previous->is(tok::equal) &&
-        Line.First->isOneOf(tok::identifier, Keywords.kw_import,
-                            tok::kw_export) &&
-        // kw_var is a pseudo-token that's a tok::identifier, so matches above.
-        !Line.First->is(Keywords.kw_var))
-      // Enum style object literal.
-      return true;
-  } else if (Style.Language == FormatStyle::LK_Java) {
-    if (Right.is(tok::plus) && Left.is(tok::string_literal) && Right.Next &&
-        Right.Next->is(tok::string_literal))
-      return true;
-  }
-
   return false;
 }
 
diff --git a/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.h b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.h
index a948cdb..b8a6be0 100644
--- a/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.h
+++ b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.h
@@ -59,7 +59,7 @@ public:
       I->Tok->Previous = Current;
       Current = Current->Next;
       Current->Children.clear();
-      for (const auto& Child : Node.Children) {
+      for (const auto &Child : Node.Children) {
         Children.push_back(new AnnotatedLine(Child));
         Current->Children.push_back(Children.back());
       }
@@ -80,6 +80,12 @@ public:
     }
   }
 
+  /// \c true if this line starts with the given tokens in order, ignoring
+  /// comments.
+  template <typename... Ts> bool startsWith(Ts... Tokens) const {
+    return startsWith(First, Tokens...);
+  }
+
   FormatToken *First;
   FormatToken *Last;
 
@@ -107,6 +113,18 @@ private:
   // Disallow copying.
   AnnotatedLine(const AnnotatedLine &) = delete;
   void operator=(const AnnotatedLine &) = delete;
+
+  template <typename A, typename... Ts>
+  bool startsWith(FormatToken *Tok, A K1) const {
+    while (Tok && Tok->is(tok::comment))
+      Tok = Tok->Next;
+    return Tok && Tok->is(K1);
+  }
+
+  template <typename A, typename... Ts>
+  bool startsWith(FormatToken *Tok, A K1, Ts... Tokens) const {
+    return startsWith(Tok, K1) && startsWith(Tok->Next, Tokens...);
+  }
 };
 
 /// \brief Determines extra information about the tokens comprising an
diff --git a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.cpp b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.cpp
index cbf8c6c..b6784b3 100644
--- a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.cpp
+++ b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.cpp
@@ -21,7 +21,7 @@ namespace {
 bool startsExternCBlock(const AnnotatedLine &Line) {
   const FormatToken *Next = Line.First->getNextNonComment();
   const FormatToken *NextNext = Next ? Next->getNextNonComment() : nullptr;
-  return Line.First->is(tok::kw_extern) && Next && Next->isStringLiteral() &&
+  return Line.startsWith(tok::kw_extern) && Next && Next->isStringLiteral() &&
          NextNext && NextNext->is(tok::l_brace);
 }
 
@@ -51,11 +51,13 @@ public:
   /// next.
   void nextLine(const AnnotatedLine &Line) {
     Offset = getIndentOffset(*Line.First);
+    // Update the indent level cache size so that we can rely on it
+    // having the right size in adjustToUnmodifiedline.
+    while (IndentForLevel.size() <= Line.Level)
+      IndentForLevel.push_back(-1);
     if (Line.InPPDirective) {
       Indent = Line.Level * Style.IndentWidth + AdditionalIndent;
     } else {
-      while (IndentForLevel.size() <= Line.Level)
-        IndentForLevel.push_back(-1);
       IndentForLevel.resize(Line.Level + 1);
       Indent = getIndent(IndentForLevel, Line.Level);
     }
@@ -72,7 +74,7 @@ public:
     unsigned LevelIndent = Line.First->OriginalColumn;
     if (static_cast<int>(LevelIndent) - Offset >= 0)
       LevelIndent -= Offset;
-    if ((Line.First->isNot(tok::comment) || IndentForLevel[Line.Level] == -1) &&
+    if ((!Line.First->is(tok::comment) || IndentForLevel[Line.Level] == -1) &&
         !Line.InPPDirective)
       IndentForLevel[Line.Level] = LevelIndent;
   }
@@ -187,7 +189,7 @@ private:
     // If necessary, change to something smarter.
     bool MergeShortFunctions =
         Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_All ||
-        (Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Empty &&
+        (Style.AllowShortFunctionsOnASingleLine >= FormatStyle::SFS_Empty &&
          I[1]->First->is(tok::r_brace)) ||
         (Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Inline &&
          TheLine->Level != 0);
@@ -278,7 +280,7 @@ private:
                              TT_LineComment))
       return 0;
     // Only inline simple if's (no nested if or else).
-    if (I + 2 != E && Line.First->is(tok::kw_if) &&
+    if (I + 2 != E && Line.startsWith(tok::kw_if) &&
         I[2]->First->is(tok::kw_else))
       return 0;
     return 1;
@@ -332,12 +334,11 @@ private:
       return 0;
     if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::kw_try,
                             tok::kw___try, tok::kw_catch, tok::kw___finally,
-                            tok::kw_for, tok::r_brace) ||
-        Line.First->is(Keywords.kw___except)) {
+                            tok::kw_for, tok::r_brace, Keywords.kw___except)) {
       if (!Style.AllowShortBlocksOnASingleLine)
         return 0;
       if (!Style.AllowShortIfStatementsOnASingleLine &&
-          Line.First->is(tok::kw_if))
+          Line.startsWith(tok::kw_if))
         return 0;
       if (!Style.AllowShortLoopsOnASingleLine &&
           Line.First->isOneOf(tok::kw_while, tok::kw_do, tok::kw_for))
@@ -360,7 +361,7 @@ private:
       Tok->SpacesRequiredBefore = 0;
       Tok->CanBreakBefore = true;
       return 1;
-    } else if (Limit != 0 && Line.First->isNot(tok::kw_namespace) &&
+    } else if (Limit != 0 && !Line.startsWith(tok::kw_namespace) &&
                !startsExternCBlock(Line)) {
       // We don't merge short records.
       if (Line.First->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct,
@@ -444,9 +445,9 @@ private:
 
   const FormatStyle &Style;
   const AdditionalKeywords &Keywords;
-  const SmallVectorImpl<AnnotatedLine*>::const_iterator End;
+  const SmallVectorImpl<AnnotatedLine *>::const_iterator End;
 
-  SmallVectorImpl<AnnotatedLine*>::const_iterator Next;
+  SmallVectorImpl<AnnotatedLine *>::const_iterator Next;
 };
 
 static void markFinalized(FormatToken *Tok) {
diff --git a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.h b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.h
index da9aa1c..478617d 100644
--- a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.h
+++ b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.h
@@ -52,7 +52,8 @@ private:
 
   /// \brief Returns the column limit for a line, taking into account whether we
   /// need an escaped newline due to a continued preprocessor directive.
-  unsigned getColumnLimit(bool InPPDirective, const AnnotatedLine *NextLine) const;
+  unsigned getColumnLimit(bool InPPDirective,
+                          const AnnotatedLine *NextLine) const;
 
   // Cache to store the penalty of formatting a vector of AnnotatedLines
   // starting from a specific additional offset. Improves performance if there
diff --git a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp
index 6ad4329..ea1ca39 100644
--- a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp
@@ -251,7 +251,6 @@ void UnwrappedLineParser::parse() {
       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
     }
   } while (!PPLevelBranchIndex.empty());
-
 }
 
 void UnwrappedLineParser::parseFile() {
@@ -774,7 +773,15 @@ void UnwrappedLineParser::parseStructuralElement() {
         parseBracedList();
       break;
     case tok::kw_enum:
+      // parseEnum falls through and does not yet add an unwrapped line as an
+      // enum definition can start a structural element.
       parseEnum();
+      // This does not apply for Java and JavaScript.
+      if (Style.Language == FormatStyle::LK_Java ||
+          Style.Language == FormatStyle::LK_JavaScript) {
+        addUnwrappedLine();
+        return;
+      }
       break;
     case tok::kw_typedef:
       nextToken();
@@ -785,9 +792,15 @@ void UnwrappedLineParser::parseStructuralElement() {
     case tok::kw_struct:
     case tok::kw_union:
     case tok::kw_class:
+      // parseRecord falls through and does not yet add an unwrapped line as a
+      // record declaration or definition can start a structural element.
       parseRecord();
-      // A record declaration or definition is always the start of a structural
-      // element.
+      // This does not apply for Java and JavaScript.
+      if (Style.Language == FormatStyle::LK_Java ||
+          Style.Language == FormatStyle::LK_JavaScript) {
+        addUnwrappedLine();
+        return;
+      }
       break;
     case tok::period:
       nextToken();
@@ -848,6 +861,7 @@ void UnwrappedLineParser::parseStructuralElement() {
            Style.Language == FormatStyle::LK_Java) &&
           FormatTok->is(Keywords.kw_interface)) {
         parseRecord();
+        addUnwrappedLine();
         break;
       }
 
@@ -872,8 +886,7 @@ void UnwrappedLineParser::parseStructuralElement() {
                 ? FormatTok->NewlinesBefore > 0
                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
 
-        if (FollowedByNewline &&
-            (Text.size() >= 5 || FunctionLike) &&
+        if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
           addUnwrappedLine();
           return;
@@ -1033,7 +1046,7 @@ void UnwrappedLineParser::tryToParseJSFunction() {
     if (FormatTok->is(tok::l_brace))
       tryToParseBracedList();
     else
-      while(FormatTok->isNot(tok::l_brace) && !eof())
+      while (FormatTok->isNot(tok::l_brace) && !eof())
         nextToken();
   }
 
@@ -1066,7 +1079,7 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
         nextToken();
         // Fat arrows can be followed by simple expressions or by child blocks
         // in curly braces.
-        if (FormatTok->is(tok::l_brace)){
+        if (FormatTok->is(tok::l_brace)) {
           parseChildBlock();
           continue;
         }
@@ -1475,6 +1488,7 @@ void UnwrappedLineParser::parseEnum() {
   // Eat up enum class ...
   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
     nextToken();
+
   while (FormatTok->Tok.getIdentifierInfo() ||
          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
                             tok::greater, tok::comma, tok::question)) {
@@ -1482,8 +1496,14 @@ void UnwrappedLineParser::parseEnum() {
     // We can have macros or attributes in between 'enum' and the enum name.
     if (FormatTok->is(tok::l_paren))
       parseParens();
-    if (FormatTok->is(tok::identifier))
+    if (FormatTok->is(tok::identifier)) {
       nextToken();
+      // If there are two identifiers in a row, this is likely an elaborate
+      // return type. In Java, this can be "implements", etc.
+      if (Style.Language == FormatStyle::LK_Cpp &&
+          FormatTok->is(tok::identifier))
+        return;
+    }
   }
 
   // Just a declaration or something is wrong.
@@ -1505,8 +1525,8 @@ void UnwrappedLineParser::parseEnum() {
     addUnwrappedLine();
   }
 
-  // We fall through to parsing a structural element afterwards, so that in
-  // enum A {} n, m;
+  // There is no addUnwrappedLine() here so that we fall through to parsing a
+  // structural element afterwards. Thus, in "enum A {} n, m;",
   // "} n, m;" will end up in one unwrapped line.
 }
 
@@ -1576,7 +1596,6 @@ void UnwrappedLineParser::parseRecord() {
   const FormatToken &InitialToken = *FormatTok;
   nextToken();
 
-
   // The actual identifier can be a nested name specifier, and in macros
   // it is often token-pasted.
   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
@@ -1623,13 +1642,9 @@ void UnwrappedLineParser::parseRecord() {
     parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
                /*MunchSemi=*/false);
   }
-  // We fall through to parsing a structural element afterwards, so
-  // class A {} n, m;
-  // will end up in one unwrapped line.
-  // This does not apply for Java and JavaScript.
-  if (Style.Language == FormatStyle::LK_Java ||
-      Style.Language == FormatStyle::LK_JavaScript)
-    addUnwrappedLine();
+  // There is no addUnwrappedLine() here so that we fall through to parsing a
+  // structural element afterwards. Thus, in "class A {} n, m;",
+  // "} n, m;" will end up in one unwrapped line.
 }
 
 void UnwrappedLineParser::parseObjCProtocolList() {
@@ -1722,7 +1737,8 @@ void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
     return;
   }
 
-  if (FormatTok->isOneOf(tok::kw_const, tok::kw_class, Keywords.kw_var))
+  if (FormatTok->isOneOf(tok::kw_const, tok::kw_class, tok::kw_enum,
+                         Keywords.kw_var))
     return; // Fall through to parsing the corresponding structure.
 
   if (FormatTok->is(tok::l_brace)) {
diff --git a/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.h b/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.h
index 4bfc813..d973838 100644
--- a/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.h
+++ b/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.h
@@ -167,9 +167,11 @@ private:
   /// \brief Align consecutive assignments over all \c Changes.
   void alignConsecutiveAssignments();
 
-  /// \brief Align consecutive assignments from change \p Start to change \p End at
+  /// \brief Align consecutive assignments from change \p Start to change \p End
+  /// at
   /// the specified \p Column.
-  void alignConsecutiveAssignments(unsigned Start, unsigned End, unsigned Column);
+  void alignConsecutiveAssignments(unsigned Start, unsigned End,
+                                   unsigned Column);
 
   /// \brief Align trailing comments over all \c Changes.
   void alignTrailingComments();
diff --git a/contrib/llvm/tools/clang/lib/Frontend/ASTMerge.cpp b/contrib/llvm/tools/clang/lib/Frontend/ASTMerge.cpp
index b84df94..f6f1551 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/ASTMerge.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/ASTMerge.cpp
@@ -45,8 +45,10 @@ void ASTMergeAction::ExecuteAction() {
                                     new ForwardingDiagnosticConsumer(
                                           *CI.getDiagnostics().getClient()),
                                     /*ShouldOwnClient=*/true));
-    std::unique_ptr<ASTUnit> Unit = ASTUnit::LoadFromASTFile(
-        ASTFiles[I], Diags, CI.getFileSystemOpts(), false);
+    std::unique_ptr<ASTUnit> Unit =
+        ASTUnit::LoadFromASTFile(ASTFiles[I], CI.getPCHContainerOperations(),
+                                 Diags, CI.getFileSystemOpts(), false);
+
     if (!Unit)
       continue;
 
diff --git a/contrib/llvm/tools/clang/lib/Frontend/ASTUnit.cpp b/contrib/llvm/tools/clang/lib/Frontend/ASTUnit.cpp
index 4fd330d4..01c56bd 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/ASTUnit.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/ASTUnit.cpp
@@ -648,7 +648,9 @@ void ASTUnit::ConfigureDiags(IntrusiveRefCntPtr<DiagnosticsEngine> Diags,
 }
 
 std::unique_ptr<ASTUnit> ASTUnit::LoadFromASTFile(
-    const std::string &Filename, IntrusiveRefCntPtr<DiagnosticsEngine> Diags,
+    const std::string &Filename,
+    std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+    IntrusiveRefCntPtr<DiagnosticsEngine> Diags,
     const FileSystemOptions &FileSystemOpts, bool OnlyLocalDecls,
     ArrayRef<RemappedFile> RemappedFiles, bool CaptureDiagnostics,
     bool AllowPCHWithCompilerErrors, bool UserFilesAreVolatile) {
@@ -705,10 +707,10 @@ std::unique_ptr<ASTUnit> ASTUnit::LoadFromASTFile(
   bool disableValid = false;
   if (::getenv("LIBCLANG_DISABLE_PCH_VALIDATION"))
     disableValid = true;
-  AST->Reader = new ASTReader(PP, Context,
-                             /*isysroot=*/"",
-                             /*DisableValidation=*/disableValid,
-                             AllowPCHWithCompilerErrors);
+  AST->Reader = new ASTReader(PP, Context, *PCHContainerOps,
+                              /*isysroot=*/"",
+                              /*DisableValidation=*/disableValid,
+                              AllowPCHWithCompilerErrors);
 
   AST->Reader->setListener(llvm::make_unique<ASTInfoCollector>(
       *AST->PP, Context, AST->ASTFileLangOpts, AST->TargetOpts, AST->Target,
@@ -916,13 +918,16 @@ class PrecompilePreambleConsumer : public PCHGenerator {
   unsigned &Hash;
   std::vector<Decl *> TopLevelDecls;
   PrecompilePreambleAction *Action;
+  raw_ostream *Out;
 
 public:
   PrecompilePreambleConsumer(ASTUnit &Unit, PrecompilePreambleAction *Action,
                              const Preprocessor &PP, StringRef isysroot,
                              raw_ostream *Out)
-    : PCHGenerator(PP, "", nullptr, isysroot, Out, /*AllowASTWithErrors=*/true),
-      Unit(Unit), Hash(Unit.getCurrentTopLevelHashValue()), Action(Action) {
+      : PCHGenerator(PP, "", nullptr, isysroot, std::make_shared<PCHBuffer>(),
+                     /*AllowASTWithErrors=*/true),
+        Unit(Unit), Hash(Unit.getCurrentTopLevelHashValue()), Action(Action),
+        Out(Out) {
     Hash = 0;
   }
 
@@ -943,6 +948,14 @@ public:
   void HandleTranslationUnit(ASTContext &Ctx) override {
     PCHGenerator::HandleTranslationUnit(Ctx);
     if (hasEmittedPCH()) {
+      // Write the generated bitstream to "Out".
+      *Out << getPCH();
+      // Make sure it hits disk now.
+      Out->flush();
+      // Free the buffer.
+      llvm::SmallVector<char, 0> Empty;
+      getPCH() = std::move(Empty);
+
       // Translate the top-level declarations we captured during
       // parsing into declaration IDs in the precompiled
       // preamble. This will allow us to deserialize those top-level
@@ -1015,14 +1028,16 @@ static void checkAndSanitizeDiags(SmallVectorImpl<StoredDiagnostic> &
 ///
 /// \returns True if a failure occurred that causes the ASTUnit not to
 /// contain any translation-unit information, false otherwise.
-bool ASTUnit::Parse(std::unique_ptr<llvm::MemoryBuffer> OverrideMainBuffer) {
+bool ASTUnit::Parse(std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+                    std::unique_ptr<llvm::MemoryBuffer> OverrideMainBuffer) {
   SavedMainFileBuffer.reset();
 
   if (!Invocation)
     return true;
 
   // Create the compiler instance to use for building the AST.
-  std::unique_ptr<CompilerInstance> Clang(new CompilerInstance());
+  std::unique_ptr<CompilerInstance> Clang(
+      new CompilerInstance(PCHContainerOps));
 
   // Recover resources if we crash before exiting this method.
   llvm::CrashRecoveryContextCleanupRegistrar<CompilerInstance>
@@ -1323,6 +1338,7 @@ makeStandaloneDiagnostic(const LangOptions &LangOpts,
 /// Otherwise, returns a NULL pointer.
 std::unique_ptr<llvm::MemoryBuffer>
 ASTUnit::getMainBufferWithPrecompiledPreamble(
+    std::shared_ptr<PCHContainerOperations> PCHContainerOps,
     const CompilerInvocation &PreambleInvocationIn, bool AllowRebuild,
     unsigned MaxLines) {
 
@@ -1487,7 +1503,8 @@ ASTUnit::getMainBufferWithPrecompiledPreamble(
   PreprocessorOpts.PrecompiledPreambleBytes.second = false;
   
   // Create the compiler instance to use for building the precompiled preamble.
-  std::unique_ptr<CompilerInstance> Clang(new CompilerInstance());
+  std::unique_ptr<CompilerInstance> Clang(
+      new CompilerInstance(PCHContainerOps));
 
   // Recover resources if we crash before exiting this method.
   llvm::CrashRecoveryContextCleanupRegistrar<CompilerInstance>
@@ -1702,12 +1719,13 @@ ASTUnit *ASTUnit::create(CompilerInvocation *CI,
 }
 
 ASTUnit *ASTUnit::LoadFromCompilerInvocationAction(
-    CompilerInvocation *CI, IntrusiveRefCntPtr<DiagnosticsEngine> Diags,
-    ASTFrontendAction *Action, ASTUnit *Unit, bool Persistent,
-    StringRef ResourceFilesPath, bool OnlyLocalDecls, bool CaptureDiagnostics,
-    bool PrecompilePreamble, bool CacheCodeCompletionResults,
-    bool IncludeBriefCommentsInCodeCompletion, bool UserFilesAreVolatile,
-    std::unique_ptr<ASTUnit> *ErrAST) {
+    CompilerInvocation *CI,
+    std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+    IntrusiveRefCntPtr<DiagnosticsEngine> Diags, ASTFrontendAction *Action,
+    ASTUnit *Unit, bool Persistent, StringRef ResourceFilesPath,
+    bool OnlyLocalDecls, bool CaptureDiagnostics, bool PrecompilePreamble,
+    bool CacheCodeCompletionResults, bool IncludeBriefCommentsInCodeCompletion,
+    bool UserFilesAreVolatile, std::unique_ptr<ASTUnit> *ErrAST) {
   assert(CI && "A CompilerInvocation is required");
 
   std::unique_ptr<ASTUnit> OwnAST;
@@ -1746,7 +1764,8 @@ ASTUnit *ASTUnit::LoadFromCompilerInvocationAction(
   ProcessWarningOptions(AST->getDiagnostics(), CI->getDiagnosticOpts());
 
   // Create the compiler instance to use for building the AST.
-  std::unique_ptr<CompilerInstance> Clang(new CompilerInstance());
+  std::unique_ptr<CompilerInstance> Clang(
+      new CompilerInstance(PCHContainerOps));
 
   // Recover resources if we crash before exiting this method.
   llvm::CrashRecoveryContextCleanupRegistrar<CompilerInstance>
@@ -1841,7 +1860,9 @@ ASTUnit *ASTUnit::LoadFromCompilerInvocationAction(
     return AST;
 }
 
-bool ASTUnit::LoadFromCompilerInvocation(bool PrecompilePreamble) {
+bool ASTUnit::LoadFromCompilerInvocation(
+    std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+    bool PrecompilePreamble) {
   if (!Invocation)
     return true;
   
@@ -1853,7 +1874,8 @@ bool ASTUnit::LoadFromCompilerInvocation(bool PrecompilePreamble) {
   std::unique_ptr<llvm::MemoryBuffer> OverrideMainBuffer;
   if (PrecompilePreamble) {
     PreambleRebuildCounter = 2;
-    OverrideMainBuffer = getMainBufferWithPrecompiledPreamble(*Invocation);
+    OverrideMainBuffer =
+        getMainBufferWithPrecompiledPreamble(PCHContainerOps, *Invocation);
   }
   
   SimpleTimer ParsingTimer(WantTiming);
@@ -1863,12 +1885,14 @@ bool ASTUnit::LoadFromCompilerInvocation(bool PrecompilePreamble) {
   llvm::CrashRecoveryContextCleanupRegistrar<llvm::MemoryBuffer>
     MemBufferCleanup(OverrideMainBuffer.get());
 
-  return Parse(std::move(OverrideMainBuffer));
+  return Parse(PCHContainerOps, std::move(OverrideMainBuffer));
 }
 
 std::unique_ptr<ASTUnit> ASTUnit::LoadFromCompilerInvocation(
-    CompilerInvocation *CI, IntrusiveRefCntPtr<DiagnosticsEngine> Diags,
-    bool OnlyLocalDecls, bool CaptureDiagnostics, bool PrecompilePreamble,
+    CompilerInvocation *CI,
+    std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+    IntrusiveRefCntPtr<DiagnosticsEngine> Diags, bool OnlyLocalDecls,
+    bool CaptureDiagnostics, bool PrecompilePreamble,
     TranslationUnitKind TUKind, bool CacheCodeCompletionResults,
     bool IncludeBriefCommentsInCodeCompletion, bool UserFilesAreVolatile) {
   // Create the AST unit.
@@ -1897,13 +1921,14 @@ std::unique_ptr<ASTUnit> ASTUnit::LoadFromCompilerInvocation(
     llvm::CrashRecoveryContextReleaseRefCleanup<DiagnosticsEngine> >
     DiagCleanup(Diags.get());
 
-  if (AST->LoadFromCompilerInvocation(PrecompilePreamble))
+  if (AST->LoadFromCompilerInvocation(PCHContainerOps, PrecompilePreamble))
     return nullptr;
   return AST;
 }
 
 ASTUnit *ASTUnit::LoadFromCommandLine(
     const char **ArgBegin, const char **ArgEnd,
+    std::shared_ptr<PCHContainerOperations> PCHContainerOps,
     IntrusiveRefCntPtr<DiagnosticsEngine> Diags, StringRef ResourceFilesPath,
     bool OnlyLocalDecls, bool CaptureDiagnostics,
     ArrayRef<RemappedFile> RemappedFiles, bool RemappedFilesKeepOriginalName,
@@ -1975,7 +2000,7 @@ ASTUnit *ASTUnit::LoadFromCommandLine(
   llvm::CrashRecoveryContextCleanupRegistrar<ASTUnit>
     ASTUnitCleanup(AST.get());
 
-  if (AST->LoadFromCompilerInvocation(PrecompilePreamble)) {
+  if (AST->LoadFromCompilerInvocation(PCHContainerOps, PrecompilePreamble)) {
     // Some error occurred, if caller wants to examine diagnostics, pass it the
     // ASTUnit.
     if (ErrAST) {
@@ -1988,7 +2013,8 @@ ASTUnit *ASTUnit::LoadFromCommandLine(
   return AST.release();
 }
 
-bool ASTUnit::Reparse(ArrayRef<RemappedFile> RemappedFiles) {
+bool ASTUnit::Reparse(std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+                      ArrayRef<RemappedFile> RemappedFiles) {
   if (!Invocation)
     return true;
 
@@ -2012,8 +2038,9 @@ bool ASTUnit::Reparse(ArrayRef<RemappedFile> RemappedFiles) {
   // build a precompiled preamble, do so now.
   std::unique_ptr<llvm::MemoryBuffer> OverrideMainBuffer;
   if (!getPreambleFile(this).empty() || PreambleRebuildCounter > 0)
-    OverrideMainBuffer = getMainBufferWithPrecompiledPreamble(*Invocation);
-    
+    OverrideMainBuffer =
+        getMainBufferWithPrecompiledPreamble(PCHContainerOps, *Invocation);
+
   // Clear out the diagnostics state.
   getDiagnostics().Reset();
   ProcessWarningOptions(getDiagnostics(), Invocation->getDiagnosticOpts());
@@ -2021,7 +2048,7 @@ bool ASTUnit::Reparse(ArrayRef<RemappedFile> RemappedFiles) {
     getDiagnostics().setNumWarnings(NumWarningsInPreamble);
 
   // Parse the sources
-  bool Result = Parse(std::move(OverrideMainBuffer));
+  bool Result = Parse(PCHContainerOps, std::move(OverrideMainBuffer));
 
   // If we're caching global code-completion results, and the top-level 
   // declarations have changed, clear out the code-completion cache.
@@ -2273,18 +2300,15 @@ void AugmentedCodeCompleteConsumer::ProcessCodeCompleteResults(Sema &S,
                                   AllResults.size());
 }
 
-
-
-void ASTUnit::CodeComplete(StringRef File, unsigned Line, unsigned Column,
-                           ArrayRef<RemappedFile> RemappedFiles,
-                           bool IncludeMacros, 
-                           bool IncludeCodePatterns,
-                           bool IncludeBriefComments,
-                           CodeCompleteConsumer &Consumer,
-                           DiagnosticsEngine &Diag, LangOptions &LangOpts,
-                           SourceManager &SourceMgr, FileManager &FileMgr,
-                   SmallVectorImpl<StoredDiagnostic> &StoredDiagnostics,
-             SmallVectorImpl<const llvm::MemoryBuffer *> &OwnedBuffers) {
+void ASTUnit::CodeComplete(
+    StringRef File, unsigned Line, unsigned Column,
+    ArrayRef<RemappedFile> RemappedFiles, bool IncludeMacros,
+    bool IncludeCodePatterns, bool IncludeBriefComments,
+    CodeCompleteConsumer &Consumer,
+    std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+    DiagnosticsEngine &Diag, LangOptions &LangOpts, SourceManager &SourceMgr,
+    FileManager &FileMgr, SmallVectorImpl<StoredDiagnostic> &StoredDiagnostics,
+    SmallVectorImpl<const llvm::MemoryBuffer *> &OwnedBuffers) {
   if (!Invocation)
     return;
 
@@ -2318,7 +2342,8 @@ void ASTUnit::CodeComplete(StringRef File, unsigned Line, unsigned Column,
   LangOpts.SpellChecking = false;
   CCInvocation->getDiagnosticOpts().IgnoreWarnings = true;
 
-  std::unique_ptr<CompilerInstance> Clang(new CompilerInstance());
+  std::unique_ptr<CompilerInstance> Clang(
+      new CompilerInstance(PCHContainerOps));
 
   // Recover resources if we crash before exiting this method.
   llvm::CrashRecoveryContextCleanupRegistrar<CompilerInstance>
@@ -2389,7 +2414,7 @@ void ASTUnit::CodeComplete(StringRef File, unsigned Line, unsigned Column,
       if (!llvm::sys::fs::getUniqueID(MainPath, MainID)) {
         if (CompleteFileID == MainID && Line > 1)
           OverrideMainBuffer = getMainBufferWithPrecompiledPreamble(
-              *CCInvocation, false, Line - 1);
+              PCHContainerOps, *CCInvocation, false, Line - 1);
       }
     }
   }
diff --git a/contrib/llvm/tools/clang/lib/Frontend/ChainedIncludesSource.cpp b/contrib/llvm/tools/clang/lib/Frontend/ChainedIncludesSource.cpp
index f3677f8..be30d43 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/ChainedIncludesSource.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/ChainedIncludesSource.cpp
@@ -80,8 +80,9 @@ createASTReader(CompilerInstance &CI, StringRef pchFile,
                 ASTDeserializationListener *deserialListener = nullptr) {
   Preprocessor &PP = CI.getPreprocessor();
   std::unique_ptr<ASTReader> Reader;
-  Reader.reset(new ASTReader(PP, CI.getASTContext(), /*isysroot=*/"",
-                             /*DisableValidation=*/true));
+  Reader.reset(new ASTReader(PP, CI.getASTContext(),
+                             *CI.getPCHContainerOperations(),
+                             /*isysroot=*/"", /*DisableValidation=*/true));
   for (unsigned ti = 0; ti < bufNames.size(); ++ti) {
     StringRef sr(bufNames[ti]);
     Reader->addInMemoryBuffer(sr, std::move(MemBufs[ti]));
@@ -145,7 +146,8 @@ IntrusiveRefCntPtr<ExternalSemaSource> clang::createChainedIncludesSource(
     IntrusiveRefCntPtr<DiagnosticsEngine> Diags(
         new DiagnosticsEngine(DiagID, &CI.getDiagnosticOpts(), DiagClient));
 
-    std::unique_ptr<CompilerInstance> Clang(new CompilerInstance());
+    std::unique_ptr<CompilerInstance> Clang(
+        new CompilerInstance(CI.getPCHContainerOperations()));
     Clang->setInvocation(CInvok.release());
     Clang->setDiagnostics(Diags.get());
     Clang->setTarget(TargetInfo::CreateTargetInfo(
@@ -157,11 +159,9 @@ IntrusiveRefCntPtr<ExternalSemaSource> clang::createChainedIncludesSource(
                                                  &Clang->getPreprocessor());
     Clang->createASTContext();
 
-    SmallVector<char, 256> serialAST;
-    llvm::raw_svector_ostream OS(serialAST);
-    auto consumer =
-        llvm::make_unique<PCHGenerator>(Clang->getPreprocessor(), "-", nullptr,
-                                        /*isysroot=*/"", &OS);
+    auto Buffer = std::make_shared<PCHBuffer>();
+    auto consumer = llvm::make_unique<PCHGenerator>(
+        Clang->getPreprocessor(), "-", nullptr, /*isysroot=*/"", Buffer);
     Clang->getASTContext().setASTMutationListener(
                                             consumer->GetASTMutationListener());
     Clang->setASTConsumer(std::move(consumer));
@@ -198,7 +198,11 @@ IntrusiveRefCntPtr<ExternalSemaSource> clang::createChainedIncludesSource(
 
     ParseAST(Clang->getSema());
     Clang->getDiagnosticClient().EndSourceFile();
-    SerialBufs.push_back(llvm::MemoryBuffer::getMemBufferCopy(OS.str()));
+    assert(Buffer->IsComplete && "serialization did not complete");
+    auto &serialAST = Buffer->Data;
+    SerialBufs.push_back(llvm::MemoryBuffer::getMemBufferCopy(
+        StringRef(serialAST.data(), serialAST.size())));
+    serialAST.clear();
     source->CIs.push_back(Clang.release());
   }
 
diff --git a/contrib/llvm/tools/clang/lib/Frontend/CompilerInstance.cpp b/contrib/llvm/tools/clang/lib/Frontend/CompilerInstance.cpp
index aef3905..6b0fed67 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/CompilerInstance.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/CompilerInstance.cpp
@@ -51,12 +51,13 @@
 
 using namespace clang;
 
-CompilerInstance::CompilerInstance(bool BuildingModule)
-  : ModuleLoader(BuildingModule),
-    Invocation(new CompilerInvocation()), ModuleManager(nullptr),
-    BuildGlobalModuleIndex(false), HaveFullGlobalModuleIndex(false),
-    ModuleBuildFailed(false) {
-}
+CompilerInstance::CompilerInstance(
+    std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+    bool BuildingModule)
+    : ModuleLoader(BuildingModule), Invocation(new CompilerInvocation()),
+      ModuleManager(nullptr), ThePCHContainerOperations(PCHContainerOps),
+      BuildGlobalModuleIndex(false), HaveFullGlobalModuleIndex(false),
+      ModuleBuildFailed(false) {}
 
 CompilerInstance::~CompilerInstance() {
   assert(OutputFiles.empty() && "Still output files in flight?");
@@ -321,7 +322,8 @@ void CompilerInstance::createPreprocessor(TranslationUnitKind TUKind) {
                           PP->getFileManager(), PPOpts);
 
   // Predefine macros and configure the preprocessor.
-  InitializePreprocessor(*PP, PPOpts, getFrontendOpts());
+  InitializePreprocessor(*PP, PPOpts, *getPCHContainerOperations(),
+                         getFrontendOpts());
 
   // Initialize the header search object.
   ApplyHeaderSearchOptions(PP->getHeaderSearchInfo(), getHeaderSearchOpts(),
@@ -397,22 +399,24 @@ void CompilerInstance::createPCHExternalASTSource(
   ModuleManager = createPCHExternalASTSource(
       Path, getHeaderSearchOpts().Sysroot, DisablePCHValidation,
       AllowPCHWithCompilerErrors, getPreprocessor(), getASTContext(),
-      DeserializationListener, OwnDeserializationListener, Preamble,
+      *getPCHContainerOperations(), DeserializationListener,
+      OwnDeserializationListener, Preamble,
       getFrontendOpts().UseGlobalModuleIndex);
 }
 
 IntrusiveRefCntPtr<ASTReader> CompilerInstance::createPCHExternalASTSource(
     StringRef Path, const std::string &Sysroot, bool DisablePCHValidation,
     bool AllowPCHWithCompilerErrors, Preprocessor &PP, ASTContext &Context,
+    const PCHContainerOperations &PCHContainerOps,
     void *DeserializationListener, bool OwnDeserializationListener,
     bool Preamble, bool UseGlobalModuleIndex) {
   HeaderSearchOptions &HSOpts = PP.getHeaderSearchInfo().getHeaderSearchOpts();
 
-  IntrusiveRefCntPtr<ASTReader> Reader(
-      new ASTReader(PP, Context, Sysroot.empty() ? "" : Sysroot.c_str(),
-                    DisablePCHValidation, AllowPCHWithCompilerErrors,
-                    /*AllowConfigurationMismatch*/ false,
-                    HSOpts.ModulesValidateSystemHeaders, UseGlobalModuleIndex));
+  IntrusiveRefCntPtr<ASTReader> Reader(new ASTReader(
+      PP, Context, PCHContainerOps, Sysroot.empty() ? "" : Sysroot.c_str(),
+      DisablePCHValidation, AllowPCHWithCompilerErrors,
+      /*AllowConfigurationMismatch*/ false, HSOpts.ModulesValidateSystemHeaders,
+      UseGlobalModuleIndex));
 
   // We need the external source to be set up before we read the AST, because
   // eagerly-deserialized declarations may use it.
@@ -918,7 +922,8 @@ static bool compileModuleImpl(CompilerInstance &ImportingInstance,
   
   // Construct a compiler instance that will be used to actually create the
   // module.
-  CompilerInstance Instance(/*BuildingModule=*/true);
+  CompilerInstance Instance(ImportingInstance.getPCHContainerOperations(),
+                            /*BuildingModule=*/true);
   Instance.setInvocation(&*Invocation);
 
   Instance.createDiagnostics(new ForwardingDiagnosticConsumer(
@@ -1232,13 +1237,13 @@ void CompilerInstance::createModuleManager() {
     HeaderSearchOptions &HSOpts = getHeaderSearchOpts();
     std::string Sysroot = HSOpts.Sysroot;
     const PreprocessorOptions &PPOpts = getPreprocessorOpts();
-    ModuleManager = new ASTReader(getPreprocessor(), *Context,
-                                  Sysroot.empty() ? "" : Sysroot.c_str(),
-                                  PPOpts.DisablePCHValidation,
-                                  /*AllowASTWithCompilerErrors=*/false,
-                                  /*AllowConfigurationMismatch=*/false,
-                                  HSOpts.ModulesValidateSystemHeaders,
-                                  getFrontendOpts().UseGlobalModuleIndex);
+    ModuleManager = new ASTReader(
+        getPreprocessor(), *Context, *getPCHContainerOperations(),
+        Sysroot.empty() ? "" : Sysroot.c_str(), PPOpts.DisablePCHValidation,
+        /*AllowASTWithCompilerErrors=*/false,
+        /*AllowConfigurationMismatch=*/false,
+        HSOpts.ModulesValidateSystemHeaders,
+        getFrontendOpts().UseGlobalModuleIndex);
     if (hasASTConsumer()) {
       ModuleManager->setDeserializationListener(
         getASTConsumer().GetASTDeserializationListener());
@@ -1279,6 +1284,7 @@ bool CompilerInstance::loadModuleFile(StringRef FileName) {
       ModuleFileStack.push_back(FileName);
       ModuleNameStack.push_back(StringRef());
       if (ASTReader::readASTFileControlBlock(FileName, CI.getFileManager(),
+                                             *CI.getPCHContainerOperations(),
                                              *this)) {
         CI.getDiagnostics().Report(
             SourceLocation(), CI.getFileManager().getBufferForFile(FileName)
@@ -1644,8 +1650,8 @@ GlobalModuleIndex *CompilerInstance::loadGlobalModuleIndex(
     llvm::sys::fs::create_directories(
       getPreprocessor().getHeaderSearchInfo().getModuleCachePath());
     GlobalModuleIndex::writeIndex(
-      getFileManager(),
-      getPreprocessor().getHeaderSearchInfo().getModuleCachePath());
+        getFileManager(), *getPCHContainerOperations(),
+        getPreprocessor().getHeaderSearchInfo().getModuleCachePath());
     ModuleManager->resetForReload();
     ModuleManager->loadGlobalIndex();
     GlobalIndex = ModuleManager->getGlobalIndex();
@@ -1672,8 +1678,8 @@ GlobalModuleIndex *CompilerInstance::loadGlobalModuleIndex(
     }
     if (RecreateIndex) {
       GlobalModuleIndex::writeIndex(
-        getFileManager(),
-        getPreprocessor().getHeaderSearchInfo().getModuleCachePath());
+          getFileManager(), *getPCHContainerOperations(),
+          getPreprocessor().getHeaderSearchInfo().getModuleCachePath());
       ModuleManager->resetForReload();
       ModuleManager->loadGlobalIndex();
       GlobalIndex = ModuleManager->getGlobalIndex();
diff --git a/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp b/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp
index 4f7d15f..8546763 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp
@@ -321,6 +321,29 @@ GenerateOptimizationRemarkRegex(DiagnosticsEngine &Diags, ArgList &Args,
   return Pattern;
 }
 
+static bool parseDiagnosticLevelMask(StringRef FlagName,
+                                     const std::vector<std::string> &Levels,
+                                     DiagnosticsEngine *Diags,
+                                     DiagnosticLevelMask &M) {
+  bool Success = true;
+  for (const auto &Level : Levels) {
+    DiagnosticLevelMask const PM =
+      llvm::StringSwitch<DiagnosticLevelMask>(Level)
+        .Case("note",    DiagnosticLevelMask::Note)
+        .Case("remark",  DiagnosticLevelMask::Remark)
+        .Case("warning", DiagnosticLevelMask::Warning)
+        .Case("error",   DiagnosticLevelMask::Error)
+        .Default(DiagnosticLevelMask::None);
+    if (PM == DiagnosticLevelMask::None) {
+      Success = false;
+      if (Diags)
+        Diags->Report(diag::err_drv_invalid_value) << FlagName << Level;
+    }
+    M = M | PM;
+  }
+  return Success;
+}
+
 static void parseSanitizerKinds(StringRef FlagName,
                                 const std::vector<std::string> &Sanitizers,
                                 DiagnosticsEngine &Diags, SanitizerSet &S) {
@@ -532,8 +555,6 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
       Args.hasArg(OPT_fsanitize_coverage_8bit_counters);
   Opts.SanitizeMemoryTrackOrigins =
       getLastArgIntValue(Args, OPT_fsanitize_memory_track_origins_EQ, 0, Diags);
-  Opts.SanitizeUndefinedTrapOnError =
-      Args.hasArg(OPT_fsanitize_undefined_trap_on_error);
   Opts.SSPBufferSize =
       getLastArgIntValue(Args, OPT_stack_protector_buffer_size, 8, Diags);
   Opts.StackRealignment = Args.hasArg(OPT_mstackrealign);
@@ -643,6 +664,9 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
   parseSanitizerKinds("-fsanitize-recover=",
                       Args.getAllArgValues(OPT_fsanitize_recover_EQ), Diags,
                       Opts.SanitizeRecover);
+  parseSanitizerKinds("-fsanitize-trap=",
+                      Args.getAllArgValues(OPT_fsanitize_trap_EQ), Diags,
+                      Opts.SanitizeTrap);
 
   Opts.CudaGpuBinaryFileNames =
       Args.getAllArgValues(OPT_fcuda_include_gpubinary);
@@ -748,11 +772,18 @@ bool clang::ParseDiagnosticArgs(DiagnosticOptions &Opts, ArgList &Args,
       << Args.getLastArg(OPT_fdiagnostics_format)->getAsString(Args)
       << Format;
   }
-  
+
   Opts.ShowSourceRanges = Args.hasArg(OPT_fdiagnostics_print_source_range_info);
   Opts.ShowParseableFixits = Args.hasArg(OPT_fdiagnostics_parseable_fixits);
   Opts.ShowPresumedLoc = !Args.hasArg(OPT_fno_diagnostics_use_presumed_location);
   Opts.VerifyDiagnostics = Args.hasArg(OPT_verify);
+  DiagnosticLevelMask DiagMask = DiagnosticLevelMask::None;
+  Success &= parseDiagnosticLevelMask("-verify-ignore-unexpected=",
+    Args.getAllArgValues(OPT_verify_ignore_unexpected_EQ),
+    Diags, DiagMask);
+  if (Args.hasArg(OPT_verify_ignore_unexpected))
+    DiagMask = DiagnosticLevelMask::All;
+  Opts.setVerifyIgnoreUnexpected(DiagMask);
   Opts.ElideType = !Args.hasArg(OPT_fno_elide_type);
   Opts.ShowTemplateTree = Args.hasArg(OPT_fdiagnostics_show_template_tree);
   Opts.ErrorLimit = getLastArgIntValue(Args, OPT_ferror_limit, 0, Diags);
@@ -1065,8 +1096,7 @@ static void ParseHeaderSearchArgs(HeaderSearchOptions &Opts, ArgList &Args) {
   Opts.ModuleCachePath = Args.getLastArgValue(OPT_fmodules_cache_path);
   Opts.ModuleUserBuildPath = Args.getLastArgValue(OPT_fmodules_user_build_path);
   Opts.DisableModuleHash = Args.hasArg(OPT_fdisable_module_hash);
-  // -fmodules implies -fmodule-maps
-  Opts.ModuleMaps = Args.hasArg(OPT_fmodule_maps) || Args.hasArg(OPT_fmodules);
+  Opts.ImplicitModuleMaps = Args.hasArg(OPT_fimplicit_module_maps);
   Opts.ModuleMapFileHomeIsCwd = Args.hasArg(OPT_fmodule_map_file_home_is_cwd);
   Opts.ModuleCachePruneInterval =
       getLastArgIntValue(Args, OPT_fmodules_prune_interval, 7 * 24 * 60 * 60);
@@ -1495,8 +1525,6 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
     !Args.hasArg(OPT_fno_modules_search_all) &&
     Args.hasArg(OPT_fmodules_search_all);
   Opts.ModulesErrorRecovery = !Args.hasArg(OPT_fno_modules_error_recovery);
-  Opts.ModulesImplicitMaps = Args.hasFlag(OPT_fmodules_implicit_maps,
-                                          OPT_fno_modules_implicit_maps, true);
   Opts.ImplicitModules = !Args.hasArg(OPT_fno_implicit_modules);
   Opts.CharIsSigned = Opts.OpenCL || !Args.hasArg(OPT_fno_signed_char);
   Opts.WChar = Opts.CPlusPlus && !Args.hasArg(OPT_fno_wchar);
@@ -1803,7 +1831,7 @@ static void ParseTargetArgs(TargetOptions &Opts, ArgList &Args) {
   Opts.FeaturesAsWritten = Args.getAllArgValues(OPT_target_feature);
   Opts.LinkerVersion = Args.getLastArgValue(OPT_target_linker_version);
   Opts.Triple = llvm::Triple::normalize(Args.getLastArgValue(OPT_triple));
-
+  Opts.Reciprocals = Args.getAllArgValues(OPT_mrecip_EQ);
   // Use the default target triple if unspecified.
   if (Opts.Triple.empty())
     Opts.Triple = llvm::sys::getDefaultTargetTriple();
diff --git a/contrib/llvm/tools/clang/lib/Frontend/FrontendAction.cpp b/contrib/llvm/tools/clang/lib/Frontend/FrontendAction.cpp
index 9bba755..db9dd3b 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/FrontendAction.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/FrontendAction.cpp
@@ -191,7 +191,8 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
     IntrusiveRefCntPtr<DiagnosticsEngine> Diags(&CI.getDiagnostics());
 
     std::unique_ptr<ASTUnit> AST =
-        ASTUnit::LoadFromASTFile(InputFile, Diags, CI.getFileSystemOpts());
+        ASTUnit::LoadFromASTFile(InputFile, CI.getPCHContainerOperations(),
+                                 Diags, CI.getFileSystemOpts());
 
     if (!AST)
       goto failure;
@@ -271,11 +272,10 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
       for (llvm::sys::fs::directory_iterator Dir(DirNative, EC), DirEnd;
            Dir != DirEnd && !EC; Dir.increment(EC)) {
         // Check whether this is an acceptable AST file.
-        if (ASTReader::isAcceptableASTFile(Dir->path(), FileMgr,
-                                           CI.getLangOpts(),
-                                           CI.getTargetOpts(),
-                                           CI.getPreprocessorOpts(),
-                                           SpecificModuleCachePath)) {
+        if (ASTReader::isAcceptableASTFile(
+                Dir->path(), FileMgr, *CI.getPCHContainerOperations(),
+                CI.getLangOpts(), CI.getTargetOpts(), CI.getPreprocessorOpts(),
+                SpecificModuleCachePath)) {
           PPOpts.ImplicitPCHInclude = Dir->path();
           Found = true;
           break;
@@ -443,8 +443,8 @@ bool FrontendAction::Execute() {
   if (CI.shouldBuildGlobalModuleIndex() && CI.hasFileManager() &&
       CI.hasPreprocessor()) {
     GlobalModuleIndex::writeIndex(
-      CI.getFileManager(),
-      CI.getPreprocessor().getHeaderSearchInfo().getModuleCachePath());
+        CI.getFileManager(), *CI.getPCHContainerOperations(),
+        CI.getPreprocessor().getHeaderSearchInfo().getModuleCachePath());
   }
 
   return true;
diff --git a/contrib/llvm/tools/clang/lib/Frontend/FrontendActions.cpp b/contrib/llvm/tools/clang/lib/Frontend/FrontendActions.cpp
index 46cdeeb..6f202a1 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/FrontendActions.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/FrontendActions.cpp
@@ -14,6 +14,7 @@
 #include "clang/Frontend/ASTUnit.h"
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/Frontend/FrontendDiagnostic.h"
+#include "clang/Frontend/MultiplexConsumer.h"
 #include "clang/Frontend/Utils.h"
 #include "clang/Lex/HeaderSearch.h"
 #include "clang/Lex/Pragma.h"
@@ -79,18 +80,28 @@ std::unique_ptr<ASTConsumer>
 GeneratePCHAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
   std::string Sysroot;
   std::string OutputFile;
-  raw_ostream *OS =
+  raw_pwrite_stream *OS =
       ComputeASTConsumerArguments(CI, InFile, Sysroot, OutputFile);
   if (!OS)
     return nullptr;
 
   if (!CI.getFrontendOpts().RelocatablePCH)
     Sysroot.clear();
-  return llvm::make_unique<PCHGenerator>(CI.getPreprocessor(), OutputFile,
-                                         nullptr, Sysroot, OS);
+
+  auto Buffer = std::make_shared<PCHBuffer>();
+  std::vector<std::unique_ptr<ASTConsumer>> Consumers;
+  Consumers.push_back(llvm::make_unique<PCHGenerator>(
+      CI.getPreprocessor(), OutputFile, nullptr, Sysroot, Buffer));
+  Consumers.push_back(
+      CI.getPCHContainerOperations()->CreatePCHContainerGenerator(
+          CI.getDiagnostics(), CI.getHeaderSearchOpts(),
+          CI.getPreprocessorOpts(), CI.getTargetOpts(), CI.getLangOpts(),
+          InFile, OutputFile, OS, Buffer));
+
+  return llvm::make_unique<MultiplexConsumer>(std::move(Consumers));
 }
 
-raw_ostream *GeneratePCHAction::ComputeASTConsumerArguments(
+raw_pwrite_stream *GeneratePCHAction::ComputeASTConsumerArguments(
     CompilerInstance &CI, StringRef InFile, std::string &Sysroot,
     std::string &OutputFile) {
   Sysroot = CI.getHeaderSearchOpts().Sysroot;
@@ -102,7 +113,7 @@ raw_ostream *GeneratePCHAction::ComputeASTConsumerArguments(
   // We use createOutputFile here because this is exposed via libclang, and we
   // must disable the RemoveFileOnSignal behavior.
   // We use a temporary to avoid race conditions.
-  raw_ostream *OS =
+  raw_pwrite_stream *OS =
       CI.createOutputFile(CI.getFrontendOpts().OutputFile, /*Binary=*/true,
                           /*RemoveFileOnSignal=*/false, InFile,
                           /*Extension=*/"", /*useTemporary=*/true);
@@ -118,13 +129,21 @@ GenerateModuleAction::CreateASTConsumer(CompilerInstance &CI,
                                         StringRef InFile) {
   std::string Sysroot;
   std::string OutputFile;
-  raw_ostream *OS =
+  raw_pwrite_stream *OS =
       ComputeASTConsumerArguments(CI, InFile, Sysroot, OutputFile);
   if (!OS)
     return nullptr;
 
-  return llvm::make_unique<PCHGenerator>(CI.getPreprocessor(), OutputFile,
-                                         Module, Sysroot, OS);
+  auto Buffer = std::make_shared<PCHBuffer>();
+  std::vector<std::unique_ptr<ASTConsumer>> Consumers;
+  Consumers.push_back(llvm::make_unique<PCHGenerator>(
+      CI.getPreprocessor(), OutputFile, Module, Sysroot, Buffer));
+  Consumers.push_back(
+      CI.getPCHContainerOperations()->CreatePCHContainerGenerator(
+          CI.getDiagnostics(), CI.getHeaderSearchOpts(),
+          CI.getPreprocessorOpts(), CI.getTargetOpts(), CI.getLangOpts(),
+          InFile, OutputFile, OS, Buffer));
+  return llvm::make_unique<MultiplexConsumer>(std::move(Consumers));
 }
 
 static SmallVectorImpl<char> &
@@ -348,7 +367,7 @@ bool GenerateModuleAction::BeginSourceFileAction(CompilerInstance &CI,
   return true;
 }
 
-raw_ostream *GenerateModuleAction::ComputeASTConsumerArguments(
+raw_pwrite_stream *GenerateModuleAction::ComputeASTConsumerArguments(
     CompilerInstance &CI, StringRef InFile, std::string &Sysroot,
     std::string &OutputFile) {
   // If no output file was provided, figure out where this module would go
@@ -363,7 +382,7 @@ raw_ostream *GenerateModuleAction::ComputeASTConsumerArguments(
   // We use createOutputFile here because this is exposed via libclang, and we
   // must disable the RemoveFileOnSignal behavior.
   // We use a temporary to avoid race conditions.
-  raw_ostream *OS =
+  raw_pwrite_stream *OS =
       CI.createOutputFile(CI.getFrontendOpts().OutputFile, /*Binary=*/true,
                           /*RemoveFileOnSignal=*/false, InFile,
                           /*Extension=*/"", /*useTemporary=*/true,
@@ -395,13 +414,13 @@ void VerifyPCHAction::ExecuteAction() {
   CompilerInstance &CI = getCompilerInstance();
   bool Preamble = CI.getPreprocessorOpts().PrecompiledPreambleBytes.first != 0;
   const std::string &Sysroot = CI.getHeaderSearchOpts().Sysroot;
-  std::unique_ptr<ASTReader> Reader(
-      new ASTReader(CI.getPreprocessor(), CI.getASTContext(),
-                    Sysroot.empty() ? "" : Sysroot.c_str(),
-                    /*DisableValidation*/ false,
-                    /*AllowPCHWithCompilerErrors*/ false,
-                    /*AllowConfigurationMismatch*/ true,
-                    /*ValidateSystemInputs*/ true));
+  std::unique_ptr<ASTReader> Reader(new ASTReader(
+      CI.getPreprocessor(), CI.getASTContext(), *CI.getPCHContainerOperations(),
+      Sysroot.empty() ? "" : Sysroot.c_str(),
+      /*DisableValidation*/ false,
+      /*AllowPCHWithCompilerErrors*/ false,
+      /*AllowConfigurationMismatch*/ true,
+      /*ValidateSystemInputs*/ true));
 
   Reader->ReadAST(getCurrentFile(),
                   Preamble ? serialization::MK_Preamble
@@ -550,9 +569,9 @@ void DumpModuleInfoAction::ExecuteAction() {
 
   Out << "Information for module file '" << getCurrentFile() << "':\n";
   DumpModuleInfoListener Listener(Out);
-  ASTReader::readASTFileControlBlock(getCurrentFile(),
-                                     getCompilerInstance().getFileManager(),
-                                     Listener);
+  ASTReader::readASTFileControlBlock(
+      getCurrentFile(), getCompilerInstance().getFileManager(),
+      *getCompilerInstance().getPCHContainerOperations(), Listener);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/tools/clang/lib/Frontend/InitPreprocessor.cpp b/contrib/llvm/tools/clang/lib/Frontend/InitPreprocessor.cpp
index dfc46f4..2417146 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/InitPreprocessor.cpp
@@ -97,10 +97,11 @@ static void AddImplicitIncludePTH(MacroBuilder &Builder, Preprocessor &PP,
 /// \brief Add an implicit \#include using the original file used to generate
 /// a PCH file.
 static void AddImplicitIncludePCH(MacroBuilder &Builder, Preprocessor &PP,
+                                  const PCHContainerOperations &PCHContainerOps,
                                   StringRef ImplicitIncludePCH) {
   std::string OriginalFile =
-    ASTReader::getOriginalSourceFile(ImplicitIncludePCH, PP.getFileManager(),
-                                     PP.getDiagnostics());
+      ASTReader::getOriginalSourceFile(ImplicitIncludePCH, PP.getFileManager(),
+                                       PCHContainerOps, PP.getDiagnostics());
   if (OriginalFile.empty())
     return;
 
@@ -891,9 +892,10 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
 /// InitializePreprocessor - Initialize the preprocessor getting it and the
 /// environment ready to process a single file. This returns true on error.
 ///
-void clang::InitializePreprocessor(Preprocessor &PP,
-                                   const PreprocessorOptions &InitOpts,
-                                   const FrontendOptions &FEOpts) {
+void clang::InitializePreprocessor(
+    Preprocessor &PP, const PreprocessorOptions &InitOpts,
+    const PCHContainerOperations &PCHContainerOps,
+    const FrontendOptions &FEOpts) {
   const LangOptions &LangOpts = PP.getLangOpts();
   std::string PredefineBuffer;
   PredefineBuffer.reserve(4080);
@@ -952,7 +954,8 @@ void clang::InitializePreprocessor(Preprocessor &PP,
 
   // Process -include-pch/-include-pth directives.
   if (!InitOpts.ImplicitPCHInclude.empty())
-    AddImplicitIncludePCH(Builder, PP, InitOpts.ImplicitPCHInclude);
+    AddImplicitIncludePCH(Builder, PP, PCHContainerOps,
+                          InitOpts.ImplicitPCHInclude);
   if (!InitOpts.ImplicitPTHInclude.empty())
     AddImplicitIncludePTH(Builder, PP, InitOpts.ImplicitPTHInclude);
 
diff --git a/contrib/llvm/tools/clang/lib/Frontend/MultiplexConsumer.cpp b/contrib/llvm/tools/clang/lib/Frontend/MultiplexConsumer.cpp
index 219e949..a53f4d2 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/MultiplexConsumer.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/MultiplexConsumer.cpp
@@ -33,11 +33,13 @@ public:
   void ReaderInitialized(ASTReader *Reader) override;
   void IdentifierRead(serialization::IdentID ID,
                       IdentifierInfo *II) override;
+  void MacroRead(serialization::MacroID ID, MacroInfo *MI) override;
   void TypeRead(serialization::TypeIdx Idx, QualType T) override;
   void DeclRead(serialization::DeclID ID, const Decl *D) override;
   void SelectorRead(serialization::SelectorID iD, Selector Sel) override;
   void MacroDefinitionRead(serialization::PreprocessedEntityID,
                            MacroDefinitionRecord *MD) override;
+  void ModuleRead(serialization::SubmoduleID ID, Module *Mod) override;
 
 private:
   std::vector<ASTDeserializationListener *> Listeners;
@@ -60,6 +62,12 @@ void MultiplexASTDeserializationListener::IdentifierRead(
     Listeners[i]->IdentifierRead(ID, II);
 }
 
+void MultiplexASTDeserializationListener::MacroRead(
+    serialization::MacroID ID, MacroInfo *MI) {
+  for (auto &Listener : Listeners)
+    Listener->MacroRead(ID, MI);
+}
+
 void MultiplexASTDeserializationListener::TypeRead(
     serialization::TypeIdx Idx, QualType T) {
   for (size_t i = 0, e = Listeners.size(); i != e; ++i)
@@ -84,6 +92,12 @@ void MultiplexASTDeserializationListener::MacroDefinitionRead(
     Listeners[i]->MacroDefinitionRead(ID, MD);
 }
 
+void MultiplexASTDeserializationListener::ModuleRead(
+    serialization::SubmoduleID ID, Module *Mod) {
+  for (auto &Listener : Listeners)
+    Listener->ModuleRead(ID, Mod);
+}
+
 // This ASTMutationListener forwards its notifications to a set of
 // child listeners.
 class MultiplexASTMutationListener : public ASTMutationListener {
@@ -99,6 +113,7 @@ public:
                                const VarTemplateSpecializationDecl *D) override;
   void AddedCXXTemplateSpecialization(const FunctionTemplateDecl *TD,
                                       const FunctionDecl *D) override;
+  void ResolvedExceptionSpec(const FunctionDecl *FD) override;
   void DeducedReturnType(const FunctionDecl *FD, QualType ReturnType) override;
   void ResolvedOperatorDelete(const CXXDestructorDecl *DD,
                               const FunctionDecl *Delete) override;
@@ -106,6 +121,7 @@ public:
   void StaticDataMemberInstantiated(const VarDecl *D) override;
   void AddedObjCCategoryToInterface(const ObjCCategoryDecl *CatD,
                                     const ObjCInterfaceDecl *IFD) override;
+  void FunctionDefinitionInstantiated(const FunctionDecl *D) override;
   void AddedObjCPropertyInClassExtension(const ObjCPropertyDecl *Prop,
                                     const ObjCPropertyDecl *OrigProp,
                                     const ObjCCategoryDecl *ClassExt) override;
@@ -153,6 +169,11 @@ void MultiplexASTMutationListener::AddedCXXTemplateSpecialization(
   for (size_t i = 0, e = Listeners.size(); i != e; ++i)
     Listeners[i]->AddedCXXTemplateSpecialization(TD, D);
 }
+void MultiplexASTMutationListener::ResolvedExceptionSpec(
+    const FunctionDecl *FD) {
+  for (auto &Listener : Listeners)
+    Listener->ResolvedExceptionSpec(FD);
+}
 void MultiplexASTMutationListener::DeducedReturnType(const FunctionDecl *FD,
                                                      QualType ReturnType) {
   for (size_t i = 0, e = Listeners.size(); i != e; ++i)
@@ -179,6 +200,11 @@ void MultiplexASTMutationListener::AddedObjCCategoryToInterface(
   for (size_t i = 0, e = Listeners.size(); i != e; ++i)
     Listeners[i]->AddedObjCCategoryToInterface(CatD, IFD);
 }
+void MultiplexASTMutationListener::FunctionDefinitionInstantiated(
+    const FunctionDecl *D) {
+  for (auto &Listener : Listeners)
+    Listener->FunctionDefinitionInstantiated(D);
+}
 void MultiplexASTMutationListener::AddedObjCPropertyInClassExtension(
                                              const ObjCPropertyDecl *Prop,
                                              const ObjCPropertyDecl *OrigProp,
diff --git a/contrib/llvm/tools/clang/lib/Frontend/PCHContainerOperations.cpp b/contrib/llvm/tools/clang/lib/Frontend/PCHContainerOperations.cpp
new file mode 100644
index 0000000..fd3278b
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Frontend/PCHContainerOperations.cpp
@@ -0,0 +1,70 @@
+//===--- Frontend/PCHContainerOperations.cpp - PCH Containers ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines PCHContainerOperations and RawPCHContainerOperation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Frontend/PCHContainerOperations.h"
+#include "clang/AST/ASTConsumer.h"
+#include "llvm/Bitcode/BitstreamReader.h"
+#include "llvm/Support/raw_ostream.h"
+#include "clang/Lex/ModuleLoader.h"
+using namespace clang;
+
+PCHContainerOperations::~PCHContainerOperations() {}
+
+namespace {
+
+/// \brief A PCHContainerGenerator that writes out the PCH to a flat file.
+class PCHContainerGenerator : public ASTConsumer {
+  std::shared_ptr<PCHBuffer> Buffer;
+  raw_pwrite_stream *OS;
+
+public:
+  PCHContainerGenerator(DiagnosticsEngine &Diags,
+                        const HeaderSearchOptions &HSO,
+                        const PreprocessorOptions &PPO, const TargetOptions &TO,
+                        const LangOptions &LO, const std::string &MainFileName,
+                        const std::string &OutputFileName,
+                        llvm::raw_pwrite_stream *OS,
+                        std::shared_ptr<PCHBuffer> Buffer)
+      : Buffer(Buffer), OS(OS) {}
+
+  virtual ~PCHContainerGenerator() {}
+
+  void HandleTranslationUnit(ASTContext &Ctx) override {
+    if (Buffer->IsComplete) {
+      // Make sure it hits disk now.
+      *OS << Buffer->Data;
+      OS->flush();
+    }
+    // Free the space of the temporary buffer.
+    llvm::SmallVector<char, 0> Empty;
+    Buffer->Data = std::move(Empty);
+  }
+};
+}
+
+std::unique_ptr<ASTConsumer>
+RawPCHContainerOperations::CreatePCHContainerGenerator(
+    DiagnosticsEngine &Diags, const HeaderSearchOptions &HSO,
+    const PreprocessorOptions &PPO, const TargetOptions &TO,
+    const LangOptions &LO, const std::string &MainFileName,
+    const std::string &OutputFileName, llvm::raw_pwrite_stream *OS,
+    std::shared_ptr<PCHBuffer> Buffer) const {
+  return llvm::make_unique<PCHContainerGenerator>(
+      Diags, HSO, PPO, TO, LO, MainFileName, OutputFileName, OS, Buffer);
+}
+
+void RawPCHContainerOperations::ExtractPCH(
+    llvm::MemoryBufferRef Buffer, llvm::BitstreamReader &StreamFile) const {
+  StreamFile.init((const unsigned char *)Buffer.getBufferStart(),
+                  (const unsigned char *)Buffer.getBufferEnd());
+}
diff --git a/contrib/llvm/tools/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/contrib/llvm/tools/clang/lib/Frontend/PrintPreprocessedOutput.cpp
index 6192554..a58c935 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/PrintPreprocessedOutput.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/PrintPreprocessedOutput.cpp
@@ -562,8 +562,13 @@ struct UnknownPragmaHandler : public PragmaHandler {
   const char *Prefix;
   PrintPPOutputPPCallbacks *Callbacks;
 
-  UnknownPragmaHandler(const char *prefix, PrintPPOutputPPCallbacks *callbacks)
-    : Prefix(prefix), Callbacks(callbacks) {}
+  // Set to true if tokens should be expanded
+  bool ShouldExpandTokens;
+
+  UnknownPragmaHandler(const char *prefix, PrintPPOutputPPCallbacks *callbacks,
+                       bool RequireTokenExpansion)
+      : Prefix(prefix), Callbacks(callbacks),
+        ShouldExpandTokens(RequireTokenExpansion) {}
   void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
                     Token &PragmaTok) override {
     // Figure out what line we went to and insert the appropriate number of
@@ -571,16 +576,24 @@ struct UnknownPragmaHandler : public PragmaHandler {
     Callbacks->startNewLineIfNeeded();
     Callbacks->MoveToLine(PragmaTok.getLocation());
     Callbacks->OS.write(Prefix, strlen(Prefix));
+
+    Token PrevToken;
+    Token PrevPrevToken;
+    PrevToken.startToken();
+    PrevPrevToken.startToken();
+
     // Read and print all of the pragma tokens.
     while (PragmaTok.isNot(tok::eod)) {
-      if (PragmaTok.hasLeadingSpace())
+      if (PragmaTok.hasLeadingSpace() ||
+          Callbacks->AvoidConcat(PrevPrevToken, PrevToken, PragmaTok))
         Callbacks->OS << ' ';
       std::string TokSpell = PP.getSpelling(PragmaTok);
       Callbacks->OS.write(&TokSpell[0], TokSpell.size());
 
-      // Expand macros in pragmas with -fms-extensions.  The assumption is that
-      // the majority of pragmas in such a file will be Microsoft pragmas.
-      if (PP.getLangOpts().MicrosoftExt)
+      PrevPrevToken = PrevToken;
+      PrevToken = PragmaTok;
+
+      if (ShouldExpandTokens)
         PP.Lex(PragmaTok);
       else
         PP.LexUnexpandedToken(PragmaTok);
@@ -718,10 +731,29 @@ void clang::DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS,
 
   PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(
       PP, *OS, !Opts.ShowLineMarkers, Opts.ShowMacros, Opts.UseLineDirectives);
-  PP.AddPragmaHandler(new UnknownPragmaHandler("#pragma", Callbacks));
-  PP.AddPragmaHandler("GCC", new UnknownPragmaHandler("#pragma GCC",Callbacks));
-  PP.AddPragmaHandler("clang",
-                      new UnknownPragmaHandler("#pragma clang", Callbacks));
+
+  // Expand macros in pragmas with -fms-extensions.  The assumption is that
+  // the majority of pragmas in such a file will be Microsoft pragmas.
+  PP.AddPragmaHandler(new UnknownPragmaHandler(
+      "#pragma", Callbacks,
+      /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
+  PP.AddPragmaHandler(
+      "GCC", new UnknownPragmaHandler(
+                 "#pragma GCC", Callbacks,
+                 /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
+  PP.AddPragmaHandler(
+      "clang", new UnknownPragmaHandler(
+                   "#pragma clang", Callbacks,
+                   /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
+
+  // The tokens after pragma omp need to be expanded.
+  //
+  //  OpenMP [2.1, Directive format]
+  //  Preprocessing tokens following the #pragma omp are subject to macro
+  //  replacement.
+  PP.AddPragmaHandler("omp",
+                      new UnknownPragmaHandler("#pragma omp", Callbacks,
+                                               /*RequireTokenExpansion=*/true));
 
   PP.addPPCallbacks(std::unique_ptr<PPCallbacks>(Callbacks));
 
diff --git a/contrib/llvm/tools/clang/lib/Frontend/Rewrite/FixItRewriter.cpp b/contrib/llvm/tools/clang/lib/Frontend/Rewrite/FixItRewriter.cpp
index a3e14f9..dc787ac 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/Rewrite/FixItRewriter.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/Rewrite/FixItRewriter.cpp
@@ -81,6 +81,13 @@ bool FixItRewriter::WriteFixedFiles(
   RewritesReceiver Rec(Rewrite);
   Editor.applyRewrites(Rec);
 
+  if (FixItOpts->InPlace) {
+    // Overwriting open files on Windows is tricky, but the rewriter can do it
+    // for us.
+    Rewrite.overwriteChangedFiles();
+    return false;
+  }
+
   for (iterator I = buffer_begin(), E = buffer_end(); I != E; ++I) {
     const FileEntry *Entry = Rewrite.getSourceMgr().getFileEntryForID(I->first);
     int fd;
diff --git a/contrib/llvm/tools/clang/lib/Frontend/Rewrite/FrontendActions.cpp b/contrib/llvm/tools/clang/lib/Frontend/Rewrite/FrontendActions.cpp
index 1b5eb28..dbc661b 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/Rewrite/FrontendActions.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/Rewrite/FrontendActions.cpp
@@ -48,9 +48,10 @@ FixItAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
 namespace {
 class FixItRewriteInPlace : public FixItOptions {
 public:
+  FixItRewriteInPlace() { InPlace = true; }
+
   std::string RewriteFilename(const std::string &Filename, int &fd) override {
-    fd = -1;
-    return Filename;
+    llvm_unreachable("don't call RewriteFilename for inplace rewrites");
   }
 };
 
diff --git a/contrib/llvm/tools/clang/lib/Frontend/VerifyDiagnosticConsumer.cpp b/contrib/llvm/tools/clang/lib/Frontend/VerifyDiagnosticConsumer.cpp
index 910e394..55df936 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/VerifyDiagnosticConsumer.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/VerifyDiagnosticConsumer.cpp
@@ -691,7 +691,8 @@ static unsigned CheckLists(DiagnosticsEngine &Diags, SourceManager &SourceMgr,
                            const char *Label,
                            DirectiveList &Left,
                            const_diag_iterator d2_begin,
-                           const_diag_iterator d2_end) {
+                           const_diag_iterator d2_end,
+                           bool IgnoreUnexpected) {
   std::vector<Directive *> LeftOnly;
   DiagList Right(d2_begin, d2_end);
 
@@ -727,7 +728,8 @@ static unsigned CheckLists(DiagnosticsEngine &Diags, SourceManager &SourceMgr,
   }
   // Now all that's left in Right are those that were not matched.
   unsigned num = PrintExpected(Diags, SourceMgr, LeftOnly, Label);
-  num += PrintUnexpected(Diags, &SourceMgr, Right.begin(), Right.end(), Label);
+  if (!IgnoreUnexpected)
+    num += PrintUnexpected(Diags, &SourceMgr, Right.begin(), Right.end(), Label);
   return num;
 }
 
@@ -745,21 +747,28 @@ static unsigned CheckResults(DiagnosticsEngine &Diags, SourceManager &SourceMgr,
   //   Seen \ Expected - set seen but not expected
   unsigned NumProblems = 0;
 
+  const DiagnosticLevelMask DiagMask =
+    Diags.getDiagnosticOptions().getVerifyIgnoreUnexpected();
+
   // See if there are error mismatches.
   NumProblems += CheckLists(Diags, SourceMgr, "error", ED.Errors,
-                            Buffer.err_begin(), Buffer.err_end());
+                            Buffer.err_begin(), Buffer.err_end(),
+                            bool(DiagnosticLevelMask::Error & DiagMask));
 
   // See if there are warning mismatches.
   NumProblems += CheckLists(Diags, SourceMgr, "warning", ED.Warnings,
-                            Buffer.warn_begin(), Buffer.warn_end());
+                            Buffer.warn_begin(), Buffer.warn_end(),
+                            bool(DiagnosticLevelMask::Warning & DiagMask));
 
   // See if there are remark mismatches.
   NumProblems += CheckLists(Diags, SourceMgr, "remark", ED.Remarks,
-                            Buffer.remark_begin(), Buffer.remark_end());
+                            Buffer.remark_begin(), Buffer.remark_end(),
+                            bool(DiagnosticLevelMask::Remark & DiagMask));
 
   // See if there are note mismatches.
   NumProblems += CheckLists(Diags, SourceMgr, "note", ED.Notes,
-                            Buffer.note_begin(), Buffer.note_end());
+                            Buffer.note_begin(), Buffer.note_end(),
+                            bool(DiagnosticLevelMask::Note & DiagMask));
 
   return NumProblems;
 }
@@ -854,12 +863,20 @@ void VerifyDiagnosticConsumer::CheckDiagnostics() {
     // Check that the expected diagnostics occurred.
     NumErrors += CheckResults(Diags, *SrcManager, *Buffer, ED);
   } else {
-    NumErrors += (PrintUnexpected(Diags, nullptr, Buffer->err_begin(),
-                                  Buffer->err_end(), "error") +
-                  PrintUnexpected(Diags, nullptr, Buffer->warn_begin(),
-                                  Buffer->warn_end(), "warn") +
-                  PrintUnexpected(Diags, nullptr, Buffer->note_begin(),
-                                  Buffer->note_end(), "note"));
+    const DiagnosticLevelMask DiagMask =
+        ~Diags.getDiagnosticOptions().getVerifyIgnoreUnexpected();
+    if (bool(DiagnosticLevelMask::Error & DiagMask))
+      NumErrors += PrintUnexpected(Diags, nullptr, Buffer->err_begin(),
+                                   Buffer->err_end(), "error");
+    if (bool(DiagnosticLevelMask::Warning & DiagMask))
+      NumErrors += PrintUnexpected(Diags, nullptr, Buffer->warn_begin(),
+                                   Buffer->warn_end(), "warn");
+    if (bool(DiagnosticLevelMask::Remark & DiagMask))
+      NumErrors += PrintUnexpected(Diags, nullptr, Buffer->remark_begin(),
+                                   Buffer->remark_end(), "remark");
+    if (bool(DiagnosticLevelMask::Note & DiagMask))
+      NumErrors += PrintUnexpected(Diags, nullptr, Buffer->note_begin(),
+                                   Buffer->note_end(), "note");
   }
 
   Diags.setClient(CurClient, Owner.release() != nullptr);
diff --git a/contrib/llvm/tools/clang/lib/Headers/Intrin.h b/contrib/llvm/tools/clang/lib/Headers/Intrin.h
index 727a55e..dd04e06 100644
--- a/contrib/llvm/tools/clang/lib/Headers/Intrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/Intrin.h
@@ -39,6 +39,9 @@
 #include <setjmp.h>
 #endif
 
+/* Define the default attributes for the functions in this file. */
+#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -421,7 +424,7 @@ unsigned __int64 _shrx_u64(unsigned __int64, unsigned int);
  * Multiply two 64-bit integers and obtain a 64-bit result.
  * The low-half is returned directly and the high half is in an out parameter.
  */
-static __inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned __int64 DEFAULT_FN_ATTRS
 _umul128(unsigned __int64 _Multiplier, unsigned __int64 _Multiplicand,
          unsigned __int64 *_HighProduct) {
   unsigned __int128 _FullProduct =
@@ -429,7 +432,7 @@ _umul128(unsigned __int64 _Multiplier, unsigned __int64 _Multiplicand,
   *_HighProduct = _FullProduct >> 64;
   return _FullProduct;
 }
-static __inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned __int64 DEFAULT_FN_ATTRS
 __umulh(unsigned __int64 _Multiplier, unsigned __int64 _Multiplicand) {
   unsigned __int128 _FullProduct =
       (unsigned __int128)_Multiplier * (unsigned __int128)_Multiplicand;
@@ -444,54 +447,54 @@ void __cdecl _xsaveopt64(void *, unsigned __int64);
 /*----------------------------------------------------------------------------*\
 |* Bit Twiddling
 \*----------------------------------------------------------------------------*/
-static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned char DEFAULT_FN_ATTRS
 _rotl8(unsigned char _Value, unsigned char _Shift) {
   _Shift &= 0x7;
   return _Shift ? (_Value << _Shift) | (_Value >> (8 - _Shift)) : _Value;
 }
-static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned char DEFAULT_FN_ATTRS
 _rotr8(unsigned char _Value, unsigned char _Shift) {
   _Shift &= 0x7;
   return _Shift ? (_Value >> _Shift) | (_Value << (8 - _Shift)) : _Value;
 }
-static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned short DEFAULT_FN_ATTRS
 _rotl16(unsigned short _Value, unsigned char _Shift) {
   _Shift &= 0xf;
   return _Shift ? (_Value << _Shift) | (_Value >> (16 - _Shift)) : _Value;
 }
-static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned short DEFAULT_FN_ATTRS
 _rotr16(unsigned short _Value, unsigned char _Shift) {
   _Shift &= 0xf;
   return _Shift ? (_Value >> _Shift) | (_Value << (16 - _Shift)) : _Value;
 }
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned int DEFAULT_FN_ATTRS
 _rotl(unsigned int _Value, int _Shift) {
   _Shift &= 0x1f;
   return _Shift ? (_Value << _Shift) | (_Value >> (32 - _Shift)) : _Value;
 }
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned int DEFAULT_FN_ATTRS
 _rotr(unsigned int _Value, int _Shift) {
   _Shift &= 0x1f;
   return _Shift ? (_Value >> _Shift) | (_Value << (32 - _Shift)) : _Value;
 }
-static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned long DEFAULT_FN_ATTRS
 _lrotl(unsigned long _Value, int _Shift) {
   _Shift &= 0x1f;
   return _Shift ? (_Value << _Shift) | (_Value >> (32 - _Shift)) : _Value;
 }
-static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned long DEFAULT_FN_ATTRS
 _lrotr(unsigned long _Value, int _Shift) {
   _Shift &= 0x1f;
   return _Shift ? (_Value >> _Shift) | (_Value << (32 - _Shift)) : _Value;
 }
 static
-__inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__))
+__inline__ unsigned __int64 DEFAULT_FN_ATTRS
 _rotl64(unsigned __int64 _Value, int _Shift) {
   _Shift &= 0x3f;
   return _Shift ? (_Value << _Shift) | (_Value >> (64 - _Shift)) : _Value;
 }
 static
-__inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__))
+__inline__ unsigned __int64 DEFAULT_FN_ATTRS
 _rotr64(unsigned __int64 _Value, int _Shift) {
   _Shift &= 0x3f;
   return _Shift ? (_Value >> _Shift) | (_Value << (64 - _Shift)) : _Value;
@@ -499,52 +502,52 @@ _rotr64(unsigned __int64 _Value, int _Shift) {
 /*----------------------------------------------------------------------------*\
 |* Bit Counting and Testing
 \*----------------------------------------------------------------------------*/
-static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned char DEFAULT_FN_ATTRS
 _BitScanForward(unsigned long *_Index, unsigned long _Mask) {
   if (!_Mask)
     return 0;
   *_Index = __builtin_ctzl(_Mask);
   return 1;
 }
-static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned char DEFAULT_FN_ATTRS
 _BitScanReverse(unsigned long *_Index, unsigned long _Mask) {
   if (!_Mask)
     return 0;
   *_Index = 31 - __builtin_clzl(_Mask);
   return 1;
 }
-static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned short DEFAULT_FN_ATTRS
 __popcnt16(unsigned short value) {
   return __builtin_popcount((int)value);
 }
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned int DEFAULT_FN_ATTRS
 __popcnt(unsigned int value) {
   return __builtin_popcount(value);
 }
-static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned char DEFAULT_FN_ATTRS
 _bittest(long const *a, long b) {
   return (*a >> b) & 1;
 }
-static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned char DEFAULT_FN_ATTRS
 _bittestandcomplement(long *a, long b) {
   unsigned char x = (*a >> b) & 1;
   *a = *a ^ (1 << b);
   return x;
 }
-static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned char DEFAULT_FN_ATTRS
 _bittestandreset(long *a, long b) {
   unsigned char x = (*a >> b) & 1;
   *a = *a & ~(1 << b);
   return x;
 }
-static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned char DEFAULT_FN_ATTRS
 _bittestandset(long *a, long b) {
   unsigned char x = (*a >> b) & 1;
   *a = *a | (1 << b);
   return x;
 }
 #if defined(__i386__) || defined(__x86_64__)
-static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned char DEFAULT_FN_ATTRS
 _interlockedbittestandset(long volatile *__BitBase, long __BitPos) {
   unsigned char __Res;
   __asm__ ("xor %0, %0\n"
@@ -556,14 +559,14 @@ _interlockedbittestandset(long volatile *__BitBase, long __BitPos) {
 }
 #endif
 #ifdef __x86_64__
-static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned char DEFAULT_FN_ATTRS
 _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask) {
   if (!_Mask)
     return 0;
   *_Index = __builtin_ctzll(_Mask);
   return 1;
 }
-static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned char DEFAULT_FN_ATTRS
 _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask) {
   if (!_Mask)
     return 0;
@@ -571,33 +574,33 @@ _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask) {
   return 1;
 }
 static __inline__
-unsigned __int64 __attribute__((__always_inline__, __nodebug__))
+unsigned __int64 DEFAULT_FN_ATTRS
  __popcnt64(unsigned __int64 value) {
   return __builtin_popcountll(value);
 }
-static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned char DEFAULT_FN_ATTRS
 _bittest64(__int64 const *a, __int64 b) {
   return (*a >> b) & 1;
 }
-static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned char DEFAULT_FN_ATTRS
 _bittestandcomplement64(__int64 *a, __int64 b) {
   unsigned char x = (*a >> b) & 1;
   *a = *a ^ (1ll << b);
   return x;
 }
-static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned char DEFAULT_FN_ATTRS
 _bittestandreset64(__int64 *a, __int64 b) {
   unsigned char x = (*a >> b) & 1;
   *a = *a & ~(1ll << b);
   return x;
 }
-static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned char DEFAULT_FN_ATTRS
 _bittestandset64(__int64 *a, __int64 b) {
   unsigned char x = (*a >> b) & 1;
   *a = *a | (1ll << b);
   return x;
 }
-static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned char DEFAULT_FN_ATTRS
 _interlockedbittestandset64(__int64 volatile *__BitBase, __int64 __BitPos) {
   unsigned char __Res;
   __asm__ ("xor %0, %0\n"
@@ -611,16 +614,16 @@ _interlockedbittestandset64(__int64 volatile *__BitBase, __int64 __BitPos) {
 /*----------------------------------------------------------------------------*\
 |* Interlocked Exchange Add
 \*----------------------------------------------------------------------------*/
-static __inline__ char __attribute__((__always_inline__, __nodebug__))
+static __inline__ char DEFAULT_FN_ATTRS
 _InterlockedExchangeAdd8(char volatile *_Addend, char _Value) {
   return __atomic_add_fetch(_Addend, _Value, 0) - _Value;
 }
-static __inline__ short __attribute__((__always_inline__, __nodebug__))
+static __inline__ short DEFAULT_FN_ATTRS
 _InterlockedExchangeAdd16(short volatile *_Addend, short _Value) {
   return __atomic_add_fetch(_Addend, _Value, 0) - _Value;
 }
 #ifdef __x86_64__
-static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __int64 DEFAULT_FN_ATTRS
 _InterlockedExchangeAdd64(__int64 volatile *_Addend, __int64 _Value) {
   return __atomic_add_fetch(_Addend, _Value, 0) - _Value;
 }
@@ -628,20 +631,20 @@ _InterlockedExchangeAdd64(__int64 volatile *_Addend, __int64 _Value) {
 /*----------------------------------------------------------------------------*\
 |* Interlocked Exchange Sub
 \*----------------------------------------------------------------------------*/
-static __inline__ char __attribute__((__always_inline__, __nodebug__))
+static __inline__ char DEFAULT_FN_ATTRS
 _InterlockedExchangeSub8(char volatile *_Subend, char _Value) {
   return __atomic_sub_fetch(_Subend, _Value, 0) + _Value;
 }
-static __inline__ short __attribute__((__always_inline__, __nodebug__))
+static __inline__ short DEFAULT_FN_ATTRS
 _InterlockedExchangeSub16(short volatile *_Subend, short _Value) {
   return __atomic_sub_fetch(_Subend, _Value, 0) + _Value;
 }
-static __inline__ long __attribute__((__always_inline__, __nodebug__))
+static __inline__ long DEFAULT_FN_ATTRS
 _InterlockedExchangeSub(long volatile *_Subend, long _Value) {
   return __atomic_sub_fetch(_Subend, _Value, 0) + _Value;
 }
 #ifdef __x86_64__
-static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __int64 DEFAULT_FN_ATTRS
 _InterlockedExchangeSub64(__int64 volatile *_Subend, __int64 _Value) {
   return __atomic_sub_fetch(_Subend, _Value, 0) + _Value;
 }
@@ -649,12 +652,12 @@ _InterlockedExchangeSub64(__int64 volatile *_Subend, __int64 _Value) {
 /*----------------------------------------------------------------------------*\
 |* Interlocked Increment
 \*----------------------------------------------------------------------------*/
-static __inline__ short __attribute__((__always_inline__, __nodebug__))
+static __inline__ short DEFAULT_FN_ATTRS
 _InterlockedIncrement16(short volatile *_Value) {
   return __atomic_add_fetch(_Value, 1, 0);
 }
 #ifdef __x86_64__
-static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __int64 DEFAULT_FN_ATTRS
 _InterlockedIncrement64(__int64 volatile *_Value) {
   return __atomic_add_fetch(_Value, 1, 0);
 }
@@ -662,12 +665,12 @@ _InterlockedIncrement64(__int64 volatile *_Value) {
 /*----------------------------------------------------------------------------*\
 |* Interlocked Decrement
 \*----------------------------------------------------------------------------*/
-static __inline__ short __attribute__((__always_inline__, __nodebug__))
+static __inline__ short DEFAULT_FN_ATTRS
 _InterlockedDecrement16(short volatile *_Value) {
   return __atomic_sub_fetch(_Value, 1, 0);
 }
 #ifdef __x86_64__
-static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __int64 DEFAULT_FN_ATTRS
 _InterlockedDecrement64(__int64 volatile *_Value) {
   return __atomic_sub_fetch(_Value, 1, 0);
 }
@@ -675,20 +678,20 @@ _InterlockedDecrement64(__int64 volatile *_Value) {
 /*----------------------------------------------------------------------------*\
 |* Interlocked And
 \*----------------------------------------------------------------------------*/
-static __inline__ char __attribute__((__always_inline__, __nodebug__))
+static __inline__ char DEFAULT_FN_ATTRS
 _InterlockedAnd8(char volatile *_Value, char _Mask) {
   return __atomic_and_fetch(_Value, _Mask, 0);
 }
-static __inline__ short __attribute__((__always_inline__, __nodebug__))
+static __inline__ short DEFAULT_FN_ATTRS
 _InterlockedAnd16(short volatile *_Value, short _Mask) {
   return __atomic_and_fetch(_Value, _Mask, 0);
 }
-static __inline__ long __attribute__((__always_inline__, __nodebug__))
+static __inline__ long DEFAULT_FN_ATTRS
 _InterlockedAnd(long volatile *_Value, long _Mask) {
   return __atomic_and_fetch(_Value, _Mask, 0);
 }
 #ifdef __x86_64__
-static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __int64 DEFAULT_FN_ATTRS
 _InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask) {
   return __atomic_and_fetch(_Value, _Mask, 0);
 }
@@ -696,20 +699,20 @@ _InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask) {
 /*----------------------------------------------------------------------------*\
 |* Interlocked Or
 \*----------------------------------------------------------------------------*/
-static __inline__ char __attribute__((__always_inline__, __nodebug__))
+static __inline__ char DEFAULT_FN_ATTRS
 _InterlockedOr8(char volatile *_Value, char _Mask) {
   return __atomic_or_fetch(_Value, _Mask, 0);
 }
-static __inline__ short __attribute__((__always_inline__, __nodebug__))
+static __inline__ short DEFAULT_FN_ATTRS
 _InterlockedOr16(short volatile *_Value, short _Mask) {
   return __atomic_or_fetch(_Value, _Mask, 0);
 }
-static __inline__ long __attribute__((__always_inline__, __nodebug__))
+static __inline__ long DEFAULT_FN_ATTRS
 _InterlockedOr(long volatile *_Value, long _Mask) {
   return __atomic_or_fetch(_Value, _Mask, 0);
 }
 #ifdef __x86_64__
-static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __int64 DEFAULT_FN_ATTRS
 _InterlockedOr64(__int64 volatile *_Value, __int64 _Mask) {
   return __atomic_or_fetch(_Value, _Mask, 0);
 }
@@ -717,20 +720,20 @@ _InterlockedOr64(__int64 volatile *_Value, __int64 _Mask) {
 /*----------------------------------------------------------------------------*\
 |* Interlocked Xor
 \*----------------------------------------------------------------------------*/
-static __inline__ char __attribute__((__always_inline__, __nodebug__))
+static __inline__ char DEFAULT_FN_ATTRS
 _InterlockedXor8(char volatile *_Value, char _Mask) {
   return __atomic_xor_fetch(_Value, _Mask, 0);
 }
-static __inline__ short __attribute__((__always_inline__, __nodebug__))
+static __inline__ short DEFAULT_FN_ATTRS
 _InterlockedXor16(short volatile *_Value, short _Mask) {
   return __atomic_xor_fetch(_Value, _Mask, 0);
 }
-static __inline__ long __attribute__((__always_inline__, __nodebug__))
+static __inline__ long DEFAULT_FN_ATTRS
 _InterlockedXor(long volatile *_Value, long _Mask) {
   return __atomic_xor_fetch(_Value, _Mask, 0);
 }
 #ifdef __x86_64__
-static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __int64 DEFAULT_FN_ATTRS
 _InterlockedXor64(__int64 volatile *_Value, __int64 _Mask) {
   return __atomic_xor_fetch(_Value, _Mask, 0);
 }
@@ -738,18 +741,18 @@ _InterlockedXor64(__int64 volatile *_Value, __int64 _Mask) {
 /*----------------------------------------------------------------------------*\
 |* Interlocked Exchange
 \*----------------------------------------------------------------------------*/
-static __inline__ char __attribute__((__always_inline__, __nodebug__))
+static __inline__ char DEFAULT_FN_ATTRS
 _InterlockedExchange8(char volatile *_Target, char _Value) {
   __atomic_exchange(_Target, &_Value, &_Value, 0);
   return _Value;
 }
-static __inline__ short __attribute__((__always_inline__, __nodebug__))
+static __inline__ short DEFAULT_FN_ATTRS
 _InterlockedExchange16(short volatile *_Target, short _Value) {
   __atomic_exchange(_Target, &_Value, &_Value, 0);
   return _Value;
 }
 #ifdef __x86_64__
-static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __int64 DEFAULT_FN_ATTRS
 _InterlockedExchange64(__int64 volatile *_Target, __int64 _Value) {
   __atomic_exchange(_Target, &_Value, &_Value, 0);
   return _Value;
@@ -758,19 +761,19 @@ _InterlockedExchange64(__int64 volatile *_Target, __int64 _Value) {
 /*----------------------------------------------------------------------------*\
 |* Interlocked Compare Exchange
 \*----------------------------------------------------------------------------*/
-static __inline__ char __attribute__((__always_inline__, __nodebug__))
+static __inline__ char DEFAULT_FN_ATTRS
 _InterlockedCompareExchange8(char volatile *_Destination,
                              char _Exchange, char _Comparand) {
   __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0);
   return _Comparand;
 }
-static __inline__ short __attribute__((__always_inline__, __nodebug__))
+static __inline__ short DEFAULT_FN_ATTRS
 _InterlockedCompareExchange16(short volatile *_Destination,
                               short _Exchange, short _Comparand) {
   __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0);
   return _Comparand;
 }
-static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __int64 DEFAULT_FN_ATTRS
 _InterlockedCompareExchange64(__int64 volatile *_Destination,
                               __int64 _Exchange, __int64 _Comparand) {
   __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0);
@@ -780,24 +783,24 @@ _InterlockedCompareExchange64(__int64 volatile *_Destination,
 |* Barriers
 \*----------------------------------------------------------------------------*/
 #if defined(__i386__) || defined(__x86_64__)
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 __attribute__((__deprecated__("use other intrinsics or C++11 atomics instead")))
 _ReadWriteBarrier(void) {
   __asm__ volatile ("" : : : "memory");
 }
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 __attribute__((__deprecated__("use other intrinsics or C++11 atomics instead")))
 _ReadBarrier(void) {
   __asm__ volatile ("" : : : "memory");
 }
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 __attribute__((__deprecated__("use other intrinsics or C++11 atomics instead")))
 _WriteBarrier(void) {
   __asm__ volatile ("" : : : "memory");
 }
 #endif
 #ifdef __x86_64__
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 __faststorefence(void) {
   __asm__ volatile("lock orq $0, (%%rsp)" : : : "memory");
 }
@@ -812,33 +815,33 @@ __faststorefence(void) {
     (__offset))
 
 #ifdef __i386__
-static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned char DEFAULT_FN_ATTRS
 __readfsbyte(unsigned long __offset) {
   return *__ptr_to_addr_space(257, unsigned char, __offset);
 }
-static __inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned __int64 DEFAULT_FN_ATTRS
 __readfsqword(unsigned long __offset) {
   return *__ptr_to_addr_space(257, unsigned __int64, __offset);
 }
-static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned short DEFAULT_FN_ATTRS
 __readfsword(unsigned long __offset) {
   return *__ptr_to_addr_space(257, unsigned short, __offset);
 }
 #endif
 #ifdef __x86_64__
-static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned char DEFAULT_FN_ATTRS
 __readgsbyte(unsigned long __offset) {
   return *__ptr_to_addr_space(256, unsigned char, __offset);
 }
-static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned long DEFAULT_FN_ATTRS
 __readgsdword(unsigned long __offset) {
   return *__ptr_to_addr_space(256, unsigned long, __offset);
 }
-static __inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned __int64 DEFAULT_FN_ATTRS
 __readgsqword(unsigned long __offset) {
   return *__ptr_to_addr_space(256, unsigned __int64, __offset);
 }
-static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned short DEFAULT_FN_ATTRS
 __readgsword(unsigned long __offset) {
   return *__ptr_to_addr_space(256, unsigned short, __offset);
 }
@@ -848,44 +851,44 @@ __readgsword(unsigned long __offset) {
 |* movs, stos
 \*----------------------------------------------------------------------------*/
 #if defined(__i386__) || defined(__x86_64__)
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 __movsb(unsigned char *__dst, unsigned char const *__src, size_t __n) {
   __asm__("rep movsb" : : "D"(__dst), "S"(__src), "c"(__n)
                         : "%edi", "%esi", "%ecx");
 }
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 __movsd(unsigned long *__dst, unsigned long const *__src, size_t __n) {
   __asm__("rep movsl" : : "D"(__dst), "S"(__src), "c"(__n)
                         : "%edi", "%esi", "%ecx");
 }
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 __movsw(unsigned short *__dst, unsigned short const *__src, size_t __n) {
   __asm__("rep movsh" : : "D"(__dst), "S"(__src), "c"(__n)
                         : "%edi", "%esi", "%ecx");
 }
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 __stosb(unsigned char *__dst, unsigned char __x, size_t __n) {
   __asm__("rep stosb" : : "D"(__dst), "a"(__x), "c"(__n)
                         : "%edi", "%ecx");
 }
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 __stosd(unsigned long *__dst, unsigned long __x, size_t __n) {
   __asm__("rep stosl" : : "D"(__dst), "a"(__x), "c"(__n)
                         : "%edi", "%ecx");
 }
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 __stosw(unsigned short *__dst, unsigned short __x, size_t __n) {
   __asm__("rep stosh" : : "D"(__dst), "a"(__x), "c"(__n)
                         : "%edi", "%ecx");
 }
 #endif
 #ifdef __x86_64__
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 __movsq(unsigned long long *__dst, unsigned long long const *__src, size_t __n) {
   __asm__("rep movsq" : : "D"(__dst), "S"(__src), "c"(__n)
                         : "%edi", "%esi", "%ecx");
 }
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 __stosq(unsigned __int64 *__dst, unsigned __int64 __x, size_t __n) {
   __asm__("rep stosq" : : "D"(__dst), "a"(__x), "c"(__n)
                         : "%edi", "%ecx");
@@ -895,32 +898,32 @@ __stosq(unsigned __int64 *__dst, unsigned __int64 __x, size_t __n) {
 /*----------------------------------------------------------------------------*\
 |* Misc
 \*----------------------------------------------------------------------------*/
-static __inline__ void * __attribute__((__always_inline__, __nodebug__))
+static __inline__ void * DEFAULT_FN_ATTRS
 _AddressOfReturnAddress(void) {
   return (void*)((char*)__builtin_frame_address(0) + sizeof(void*));
 }
-static __inline__ void * __attribute__((__always_inline__, __nodebug__))
+static __inline__ void * DEFAULT_FN_ATTRS
 _ReturnAddress(void) {
   return __builtin_return_address(0);
 }
 #if defined(__i386__) || defined(__x86_64__)
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 __cpuid(int __info[4], int __level) {
   __asm__ ("cpuid" : "=a"(__info[0]), "=b" (__info[1]), "=c"(__info[2]), "=d"(__info[3])
                    : "a"(__level));
 }
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 __cpuidex(int __info[4], int __level, int __ecx) {
   __asm__ ("cpuid" : "=a"(__info[0]), "=b" (__info[1]), "=c"(__info[2]), "=d"(__info[3])
                    : "a"(__level), "c"(__ecx));
 }
-static __inline__ unsigned __int64 __cdecl __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned __int64 __cdecl DEFAULT_FN_ATTRS
 _xgetbv(unsigned int __xcr_no) {
   unsigned int __eax, __edx;
   __asm__ ("xgetbv" : "=a" (__eax), "=d" (__edx) : "c" (__xcr_no));
   return ((unsigned __int64)__edx << 32) | __eax;
 }
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 __halt(void) {
   __asm__ volatile ("hlt");
 }
@@ -930,7 +933,7 @@ __halt(void) {
 |* Privileged intrinsics
 \*----------------------------------------------------------------------------*/
 #if defined(__i386__) || defined(__x86_64__)
-static __inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned __int64 DEFAULT_FN_ATTRS
 __readmsr(unsigned long __register) {
   // Loads the contents of a 64-bit model specific register (MSR) specified in
   // the ECX register into registers EDX:EAX. The EDX register is loaded with
@@ -944,14 +947,14 @@ __readmsr(unsigned long __register) {
   return (((unsigned __int64)__edx) << 32) | (unsigned __int64)__eax;
 }
 
-static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned long DEFAULT_FN_ATTRS
 __readcr3(void) {
   unsigned long __cr3_val;
   __asm__ __volatile__ ("mov %%cr3, %0" : "=q"(__cr3_val) : : "memory");
   return __cr3_val;
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 __writecr3(unsigned int __cr3_val) {
   __asm__ ("mov %0, %%cr3" : : "q"(__cr3_val) : "memory");
 }
@@ -961,5 +964,7 @@ __writecr3(unsigned int __cr3_val) {
 }
 #endif
 
+#undef DEFAULT_FN_ATTRS
+
 #endif /* __INTRIN_H */
 #endif /* _MSC_VER */
diff --git a/contrib/llvm/tools/clang/lib/Headers/__wmmintrin_aes.h b/contrib/llvm/tools/clang/lib/Headers/__wmmintrin_aes.h
index 2bfa027..17b3f1d 100644
--- a/contrib/llvm/tools/clang/lib/Headers/__wmmintrin_aes.h
+++ b/contrib/llvm/tools/clang/lib/Headers/__wmmintrin_aes.h
@@ -25,35 +25,34 @@
 
 #include <emmintrin.h>
 
-#if !defined (__AES__)
-#  error "AES instructions not enabled"
-#else
+/* Define the default attributes for the functions in this file. */
+#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("aes")))
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_aesenc_si128(__m128i __V, __m128i __R)
 {
   return (__m128i)__builtin_ia32_aesenc128(__V, __R);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_aesenclast_si128(__m128i __V, __m128i __R)
 {
   return (__m128i)__builtin_ia32_aesenclast128(__V, __R);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_aesdec_si128(__m128i __V, __m128i __R)
 {
   return (__m128i)__builtin_ia32_aesdec128(__V, __R);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_aesdeclast_si128(__m128i __V, __m128i __R)
 {
   return (__m128i)__builtin_ia32_aesdeclast128(__V, __R);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_aesimc_si128(__m128i __V)
 {
   return (__m128i)__builtin_ia32_aesimc128(__V);
@@ -62,6 +61,6 @@ _mm_aesimc_si128(__m128i __V)
 #define _mm_aeskeygenassist_si128(C, R) \
   __builtin_ia32_aeskeygenassist128((C), (R))
 
-#endif
+#undef DEFAULT_FN_ATTRS
 
 #endif  /* _WMMINTRIN_AES_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/__wmmintrin_pclmul.h b/contrib/llvm/tools/clang/lib/Headers/__wmmintrin_pclmul.h
index 8d1f1b7..48a85d2 100644
--- a/contrib/llvm/tools/clang/lib/Headers/__wmmintrin_pclmul.h
+++ b/contrib/llvm/tools/clang/lib/Headers/__wmmintrin_pclmul.h
@@ -23,12 +23,8 @@
 #ifndef _WMMINTRIN_PCLMUL_H
 #define _WMMINTRIN_PCLMUL_H
 
-#if !defined (__PCLMUL__)
-# error "PCLMUL instruction is not enabled"
-#else
 #define _mm_clmulepi64_si128(__X, __Y, __I) \
   ((__m128i)__builtin_ia32_pclmulqdq128((__v2di)(__m128i)(__X), \
                                         (__v2di)(__m128i)(__Y), (char)(__I)))
-#endif
 
 #endif /* _WMMINTRIN_PCLMUL_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/adxintrin.h b/contrib/llvm/tools/clang/lib/Headers/adxintrin.h
index 9db8bcb..050dc8a 100644
--- a/contrib/llvm/tools/clang/lib/Headers/adxintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/adxintrin.h
@@ -28,9 +28,11 @@
 #ifndef __ADXINTRIN_H
 #define __ADXINTRIN_H
 
+/* Define the default attributes for the functions in this file. */
+#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+
 /* Intrinsics that are available only if __ADX__ defined */
-#ifdef __ADX__
-static __inline unsigned char __attribute__((__always_inline__, __nodebug__))
+static __inline unsigned char __attribute__((__always_inline__, __nodebug__, __target__("adx")))
 _addcarryx_u32(unsigned char __cf, unsigned int __x, unsigned int __y,
                unsigned int *__p)
 {
@@ -38,17 +40,16 @@ _addcarryx_u32(unsigned char __cf, unsigned int __x, unsigned int __y,
 }
 
 #ifdef __x86_64__
-static __inline unsigned char __attribute__((__always_inline__, __nodebug__))
+static __inline unsigned char __attribute__((__always_inline__, __nodebug__, __target__("adx")))
 _addcarryx_u64(unsigned char __cf, unsigned long long __x,
                unsigned long long __y, unsigned long long  *__p)
 {
   return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p);
 }
 #endif
-#endif
 
 /* Intrinsics that are also available if __ADX__ undefined */
-static __inline unsigned char __attribute__((__always_inline__, __nodebug__))
+static __inline unsigned char DEFAULT_FN_ATTRS
 _addcarry_u32(unsigned char __cf, unsigned int __x, unsigned int __y,
               unsigned int *__p)
 {
@@ -56,7 +57,7 @@ _addcarry_u32(unsigned char __cf, unsigned int __x, unsigned int __y,
 }
 
 #ifdef __x86_64__
-static __inline unsigned char __attribute__((__always_inline__, __nodebug__))
+static __inline unsigned char DEFAULT_FN_ATTRS
 _addcarry_u64(unsigned char __cf, unsigned long long __x,
               unsigned long long __y, unsigned long long  *__p)
 {
@@ -64,7 +65,7 @@ _addcarry_u64(unsigned char __cf, unsigned long long __x,
 }
 #endif
 
-static __inline unsigned char __attribute__((__always_inline__, __nodebug__))
+static __inline unsigned char DEFAULT_FN_ATTRS
 _subborrow_u32(unsigned char __cf, unsigned int __x, unsigned int __y,
               unsigned int *__p)
 {
@@ -72,7 +73,7 @@ _subborrow_u32(unsigned char __cf, unsigned int __x, unsigned int __y,
 }
 
 #ifdef __x86_64__
-static __inline unsigned char __attribute__((__always_inline__, __nodebug__))
+static __inline unsigned char DEFAULT_FN_ATTRS
 _subborrow_u64(unsigned char __cf, unsigned long long __x,
                unsigned long long __y, unsigned long long  *__p)
 {
@@ -80,4 +81,6 @@ _subborrow_u64(unsigned char __cf, unsigned long long __x,
 }
 #endif
 
+#undef DEFAULT_FN_ATTRS
+
 #endif /* __ADXINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/altivec.h b/contrib/llvm/tools/clang/lib/Headers/altivec.h
index 7427ed5..28df890 100644
--- a/contrib/llvm/tools/clang/lib/Headers/altivec.h
+++ b/contrib/llvm/tools/clang/lib/Headers/altivec.h
@@ -12012,6 +12012,29 @@ static vector unsigned long long __ATTRS_o_ai __builtin_crypto_vpmsumb(
     vector unsigned long long __a, vector unsigned long long __b) {
   return __builtin_altivec_crypto_vpmsumd(__a, __b);
 }
+
+static vector signed char __ATTRS_o_ai vec_vgbbd (vector signed char __a)
+{
+  return __builtin_altivec_vgbbd((vector unsigned char) __a);
+}
+
+static vector unsigned char __ATTRS_o_ai vec_vgbbd (vector unsigned char __a)
+{
+  return __builtin_altivec_vgbbd(__a);
+}
+
+static vector long long __ATTRS_o_ai
+vec_vbpermq (vector signed char __a, vector signed char __b)
+{
+  return __builtin_altivec_vbpermq((vector unsigned char) __a,
+                                   (vector unsigned char) __b);
+}
+
+static vector long long __ATTRS_o_ai
+vec_vbpermq (vector unsigned char __a, vector unsigned char __b)
+{
+  return __builtin_altivec_vbpermq(__a, __b);
+}
 #endif
 
 #undef __ATTRS_o_ai
diff --git a/contrib/llvm/tools/clang/lib/Headers/ammintrin.h b/contrib/llvm/tools/clang/lib/Headers/ammintrin.h
index 17f5ab1..3f38205 100644
--- a/contrib/llvm/tools/clang/lib/Headers/ammintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/ammintrin.h
@@ -24,12 +24,11 @@
 #ifndef __AMMINTRIN_H
 #define __AMMINTRIN_H
 
-#ifndef __SSE4A__
-#error "SSE4A instruction set not enabled"
-#else
-
 #include <pmmintrin.h>
 
+/* Define the default attributes for the functions in this file. */
+#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a")))
+
 /// \brief Extracts the specified bits from the lower 64 bits of the 128-bit
 ///    integer vector operand at the index idx and of the length len.
 ///
@@ -81,7 +80,7 @@
 ///    non-zero, the result is undefined. 
 /// \returns A 128-bit vector whose lower 64 bits contain the bits extracted 
 ///    from the source operand.
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_extract_si64(__m128i __x, __m128i __y)
 {
   return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y);
@@ -157,7 +156,7 @@ _mm_extract_si64(__m128i __x, __m128i __y)
 ///    lower bits of source operand __y. The upper 64 bits of the return value 
 ///    are undefined.
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_insert_si64(__m128i __x, __m128i __y)
 {
   return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y);
@@ -178,7 +177,7 @@ _mm_insert_si64(__m128i __x, __m128i __y)
 /// \param __a
 ///    The 64-bit double-precision floating-point register value to
 ///    be stored.
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_stream_sd(double *__p, __m128d __a)
 {
   __builtin_ia32_movntsd(__p, (__v2df)__a);
@@ -199,12 +198,12 @@ _mm_stream_sd(double *__p, __m128d __a)
 /// \param __a
 ///    The 32-bit single-precision floating-point register value to
 ///    be stored.
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_stream_ss(float *__p, __m128 __a)
 {
   __builtin_ia32_movntss(__p, (__v4sf)__a);
 }
 
-#endif /* __SSE4A__ */
+#undef DEFAULT_FN_ATTRS
 
 #endif /* __AMMINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/arm_acle.h b/contrib/llvm/tools/clang/lib/Headers/arm_acle.h
index 6c56f3b..73a7e76 100644
--- a/contrib/llvm/tools/clang/lib/Headers/arm_acle.h
+++ b/contrib/llvm/tools/clang/lib/Headers/arm_acle.h
@@ -289,6 +289,14 @@ static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
 }
 #endif
 
+/* 10.1 Special register intrinsics */
+#define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg)
+#define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg)
+#define __arm_rsrp(sysreg) __builtin_arm_rsrp(sysreg)
+#define __arm_wsr(sysreg, v) __builtin_arm_wsr(sysreg, v)
+#define __arm_wsr64(sysreg, v) __builtin_arm_wsr64(sysreg, v)
+#define __arm_wsrp(sysreg, v) __builtin_arm_wsrp(sysreg, v)
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/contrib/llvm/tools/clang/lib/Headers/avx2intrin.h b/contrib/llvm/tools/clang/lib/Headers/avx2intrin.h
index e1e639d..5f1817e 100644
--- a/contrib/llvm/tools/clang/lib/Headers/avx2intrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/avx2intrin.h
@@ -28,94 +28,97 @@
 #ifndef __AVX2INTRIN_H
 #define __AVX2INTRIN_H
 
+/* Define the default attributes for the functions in this file. */
+#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx2")))
+
 /* SSE4 Multiple Packed Sums of Absolute Difference.  */
 #define _mm256_mpsadbw_epu8(X, Y, M) __builtin_ia32_mpsadbw256((X), (Y), (M))
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_abs_epi8(__m256i __a)
 {
     return (__m256i)__builtin_ia32_pabsb256((__v32qi)__a);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_abs_epi16(__m256i __a)
 {
     return (__m256i)__builtin_ia32_pabsw256((__v16hi)__a);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_abs_epi32(__m256i __a)
 {
     return (__m256i)__builtin_ia32_pabsd256((__v8si)__a);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_packs_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_packsswb256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_packs_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_packssdw256((__v8si)__a, (__v8si)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_packus_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_packuswb256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_packus_epi32(__m256i __V1, __m256i __V2)
 {
   return (__m256i) __builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_add_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v32qi)__a + (__v32qi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_add_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v16hi)__a + (__v16hi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_add_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v8si)__a + (__v8si)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_add_epi64(__m256i __a, __m256i __b)
 {
   return __a + __b;
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_adds_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_paddsb256((__v32qi)__a, (__v32qi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_adds_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_paddsw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_adds_epu8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_paddusb256((__v32qi)__a, (__v32qi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_adds_epu16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_paddusw256((__v16hi)__a, (__v16hi)__b);
@@ -126,31 +129,31 @@ _mm256_adds_epu16(__m256i __a, __m256i __b)
   __m256i __b = (b); \
   (__m256i)__builtin_ia32_palignr256((__v32qi)__a, (__v32qi)__b, (n)); })
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_and_si256(__m256i __a, __m256i __b)
 {
   return __a & __b;
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_andnot_si256(__m256i __a, __m256i __b)
 {
   return ~__a & __b;
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_avg_epu8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pavgb256((__v32qi)__a, (__v32qi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_avg_epu16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pavgw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M)
 {
   return (__m256i)__builtin_ia32_pblendvb256((__v32qi)__V1, (__v32qi)__V2,
@@ -178,307 +181,307 @@ _mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M)
                                    (((M) & 0x40) ? 30 : 14), \
                                    (((M) & 0x80) ? 31 : 15)); })
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_cmpeq_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v32qi)__a == (__v32qi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_cmpeq_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v16hi)__a == (__v16hi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_cmpeq_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v8si)__a == (__v8si)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_cmpeq_epi64(__m256i __a, __m256i __b)
 {
   return (__m256i)(__a == __b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_cmpgt_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v32qi)__a > (__v32qi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_cmpgt_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v16hi)__a > (__v16hi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_cmpgt_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v8si)__a > (__v8si)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_cmpgt_epi64(__m256i __a, __m256i __b)
 {
   return (__m256i)(__a > __b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_hadd_epi16(__m256i __a, __m256i __b)
 {
     return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_hadd_epi32(__m256i __a, __m256i __b)
 {
     return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_hadds_epi16(__m256i __a, __m256i __b)
 {
     return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_hsub_epi16(__m256i __a, __m256i __b)
 {
     return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_hsub_epi32(__m256i __a, __m256i __b)
 {
     return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_hsubs_epi16(__m256i __a, __m256i __b)
 {
     return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_maddubs_epi16(__m256i __a, __m256i __b)
 {
     return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)__a, (__v32qi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_madd_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pmaddwd256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_max_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pmaxsb256((__v32qi)__a, (__v32qi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_max_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pmaxsw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_max_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pmaxsd256((__v8si)__a, (__v8si)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_max_epu8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pmaxub256((__v32qi)__a, (__v32qi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_max_epu16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pmaxuw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_max_epu32(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pmaxud256((__v8si)__a, (__v8si)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_min_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pminsb256((__v32qi)__a, (__v32qi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_min_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pminsw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_min_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pminsd256((__v8si)__a, (__v8si)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_min_epu8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pminub256((__v32qi)__a, (__v32qi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_min_epu16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_min_epu32(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pminud256((__v8si)__a, (__v8si)__b);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm256_movemask_epi8(__m256i __a)
 {
   return __builtin_ia32_pmovmskb256((__v32qi)__a);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_cvtepi8_epi16(__m128i __V)
 {
   return (__m256i)__builtin_ia32_pmovsxbw256((__v16qi)__V);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_cvtepi8_epi32(__m128i __V)
 {
   return (__m256i)__builtin_ia32_pmovsxbd256((__v16qi)__V);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_cvtepi8_epi64(__m128i __V)
 {
   return (__m256i)__builtin_ia32_pmovsxbq256((__v16qi)__V);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_cvtepi16_epi32(__m128i __V)
 {
   return (__m256i)__builtin_ia32_pmovsxwd256((__v8hi)__V);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_cvtepi16_epi64(__m128i __V)
 {
   return (__m256i)__builtin_ia32_pmovsxwq256((__v8hi)__V);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_cvtepi32_epi64(__m128i __V)
 {
   return (__m256i)__builtin_ia32_pmovsxdq256((__v4si)__V);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_cvtepu8_epi16(__m128i __V)
 {
   return (__m256i)__builtin_ia32_pmovzxbw256((__v16qi)__V);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_cvtepu8_epi32(__m128i __V)
 {
   return (__m256i)__builtin_ia32_pmovzxbd256((__v16qi)__V);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_cvtepu8_epi64(__m128i __V)
 {
   return (__m256i)__builtin_ia32_pmovzxbq256((__v16qi)__V);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_cvtepu16_epi32(__m128i __V)
 {
   return (__m256i)__builtin_ia32_pmovzxwd256((__v8hi)__V);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_cvtepu16_epi64(__m128i __V)
 {
   return (__m256i)__builtin_ia32_pmovzxwq256((__v8hi)__V);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_cvtepu32_epi64(__m128i __V)
 {
   return (__m256i)__builtin_ia32_pmovzxdq256((__v4si)__V);
 }
 
-static __inline__  __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__  __m256i DEFAULT_FN_ATTRS
 _mm256_mul_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pmuldq256((__v8si)__a, (__v8si)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_mulhrs_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_mulhi_epu16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_mulhi_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pmulhw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_mullo_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v16hi)__a * (__v16hi)__b);
 }
 
-static __inline__  __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__  __m256i DEFAULT_FN_ATTRS
 _mm256_mullo_epi32 (__m256i __a, __m256i __b)
 {
   return (__m256i)((__v8si)__a * (__v8si)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_mul_epu32(__m256i __a, __m256i __b)
 {
   return __builtin_ia32_pmuludq256((__v8si)__a, (__v8si)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_or_si256(__m256i __a, __m256i __b)
 {
   return __a | __b;
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_sad_epu8(__m256i __a, __m256i __b)
 {
   return __builtin_ia32_psadbw256((__v32qi)__a, (__v32qi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_shuffle_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pshufb256((__v32qi)__a, (__v32qi)__b);
@@ -520,19 +523,19 @@ _mm256_shuffle_epi8(__m256i __a, __m256i __b)
                                    8 + (((imm) & 0xc0) >> 6), \
                                    12, 13, 14, 15); })
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_sign_epi8(__m256i __a, __m256i __b)
 {
     return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_sign_epi16(__m256i __a, __m256i __b)
 {
     return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_sign_epi32(__m256i __a, __m256i __b)
 {
     return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b);
@@ -544,61 +547,61 @@ _mm256_sign_epi32(__m256i __a, __m256i __b)
 
 #define _mm256_bslli_epi128(a, count) _mm256_slli_si256((a), (count))
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_slli_epi16(__m256i __a, int __count)
 {
   return (__m256i)__builtin_ia32_psllwi256((__v16hi)__a, __count);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_sll_epi16(__m256i __a, __m128i __count)
 {
   return (__m256i)__builtin_ia32_psllw256((__v16hi)__a, (__v8hi)__count);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_slli_epi32(__m256i __a, int __count)
 {
   return (__m256i)__builtin_ia32_pslldi256((__v8si)__a, __count);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_sll_epi32(__m256i __a, __m128i __count)
 {
   return (__m256i)__builtin_ia32_pslld256((__v8si)__a, (__v4si)__count);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_slli_epi64(__m256i __a, int __count)
 {
   return __builtin_ia32_psllqi256(__a, __count);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_sll_epi64(__m256i __a, __m128i __count)
 {
   return __builtin_ia32_psllq256(__a, __count);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_srai_epi16(__m256i __a, int __count)
 {
   return (__m256i)__builtin_ia32_psrawi256((__v16hi)__a, __count);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_sra_epi16(__m256i __a, __m128i __count)
 {
   return (__m256i)__builtin_ia32_psraw256((__v16hi)__a, (__v8hi)__count);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_srai_epi32(__m256i __a, int __count)
 {
   return (__m256i)__builtin_ia32_psradi256((__v8si)__a, __count);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_sra_epi32(__m256i __a, __m128i __count)
 {
   return (__m256i)__builtin_ia32_psrad256((__v8si)__a, (__v4si)__count);
@@ -610,175 +613,175 @@ _mm256_sra_epi32(__m256i __a, __m128i __count)
 
 #define _mm256_bsrli_epi128(a, count) _mm256_srli_si256((a), (count))
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_srli_epi16(__m256i __a, int __count)
 {
   return (__m256i)__builtin_ia32_psrlwi256((__v16hi)__a, __count);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_srl_epi16(__m256i __a, __m128i __count)
 {
   return (__m256i)__builtin_ia32_psrlw256((__v16hi)__a, (__v8hi)__count);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_srli_epi32(__m256i __a, int __count)
 {
   return (__m256i)__builtin_ia32_psrldi256((__v8si)__a, __count);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_srl_epi32(__m256i __a, __m128i __count)
 {
   return (__m256i)__builtin_ia32_psrld256((__v8si)__a, (__v4si)__count);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_srli_epi64(__m256i __a, int __count)
 {
   return __builtin_ia32_psrlqi256(__a, __count);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_srl_epi64(__m256i __a, __m128i __count)
 {
   return __builtin_ia32_psrlq256(__a, __count);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_sub_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v32qi)__a - (__v32qi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_sub_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v16hi)__a - (__v16hi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_sub_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v8si)__a - (__v8si)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_sub_epi64(__m256i __a, __m256i __b)
 {
   return __a - __b;
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_subs_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_psubsb256((__v32qi)__a, (__v32qi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_subs_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_psubsw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_subs_epu8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_psubusb256((__v32qi)__a, (__v32qi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_subs_epu16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_psubusw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_unpackhi_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 8, 32+8, 9, 32+9, 10, 32+10, 11, 32+11, 12, 32+12, 13, 32+13, 14, 32+14, 15, 32+15, 24, 32+24, 25, 32+25, 26, 32+26, 27, 32+27, 28, 32+28, 29, 32+29, 30, 32+30, 31, 32+31);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_unpackhi_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_unpackhi_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 2, 8+2, 3, 8+3, 6, 8+6, 7, 8+7);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_unpackhi_epi64(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_shufflevector(__a, __b, 1, 4+1, 3, 4+3);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_unpacklo_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 0, 32+0, 1, 32+1, 2, 32+2, 3, 32+3, 4, 32+4, 5, 32+5, 6, 32+6, 7, 32+7, 16, 32+16, 17, 32+17, 18, 32+18, 19, 32+19, 20, 32+20, 21, 32+21, 22, 32+22, 23, 32+23);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_unpacklo_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_unpacklo_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 0, 8+0, 1, 8+1, 4, 8+4, 5, 8+5);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_unpacklo_epi64(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_shufflevector(__a, __b, 0, 4+0, 2, 4+2);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_xor_si256(__m256i __a, __m256i __b)
 {
   return __a ^ __b;
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_stream_load_si256(__m256i *__V)
 {
   return (__m256i)__builtin_ia32_movntdqa256((__v4di *)__V);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_broadcastss_ps(__m128 __X)
 {
   return (__m128)__builtin_ia32_vbroadcastss_ps((__v4sf)__X);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_broadcastsd_pd(__m128d __a)
 {
   return __builtin_shufflevector(__a, __a, 0, 0);
 }
 
-static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256 DEFAULT_FN_ATTRS
 _mm256_broadcastss_ps(__m128 __X)
 {
   return (__m256)__builtin_ia32_vbroadcastss_ps256((__v4sf)__X);
 }
 
-static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256d DEFAULT_FN_ATTRS
 _mm256_broadcastsd_pd(__m128d __X)
 {
   return (__m256d)__builtin_ia32_vbroadcastsd_pd256((__v2df)__X);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_broadcastsi128_si256(__m128i __X)
 {
   return (__m256i)__builtin_shufflevector(__X, __X, 0, 1, 0, 1);
@@ -806,56 +809,56 @@ _mm256_broadcastsi128_si256(__m128i __X)
                                    (((M) & 0x40) ? 14 : 6), \
                                    (((M) & 0x80) ? 15 : 7)); })
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_broadcastb_epi8(__m128i __X)
 {
   return (__m256i)__builtin_ia32_pbroadcastb256((__v16qi)__X);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_broadcastw_epi16(__m128i __X)
 {
   return (__m256i)__builtin_ia32_pbroadcastw256((__v8hi)__X);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_broadcastd_epi32(__m128i __X)
 {
   return (__m256i)__builtin_ia32_pbroadcastd256((__v4si)__X);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_broadcastq_epi64(__m128i __X)
 {
   return (__m256i)__builtin_ia32_pbroadcastq256(__X);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_broadcastb_epi8(__m128i __X)
 {
   return (__m128i)__builtin_ia32_pbroadcastb128((__v16qi)__X);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_broadcastw_epi16(__m128i __X)
 {
   return (__m128i)__builtin_ia32_pbroadcastw128((__v8hi)__X);
 }
 
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_broadcastd_epi32(__m128i __X)
 {
   return (__m128i)__builtin_ia32_pbroadcastd128((__v4si)__X);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_broadcastq_epi64(__m128i __X)
 {
   return (__m128i)__builtin_ia32_pbroadcastq128(__X);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_permvarsi256((__v8si)__a, (__v8si)__b);
@@ -867,7 +870,7 @@ _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)
                                    (M) & 0x3, ((M) & 0xc) >> 2, \
                                    ((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); })
 
-static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256 DEFAULT_FN_ATTRS
 _mm256_permutevar8x32_ps(__m256 __a, __m256 __b)
 {
   return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8sf)__b);
@@ -900,109 +903,109 @@ _mm256_permutevar8x32_ps(__m256 __a, __m256 __b)
     (((M) & 1) ? 4 : 2), \
     (((M) & 1) ? 5 : 3) );})
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_maskload_epi32(int const *__X, __m256i __M)
 {
   return (__m256i)__builtin_ia32_maskloadd256((const __v8si *)__X, (__v8si)__M);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_maskload_epi64(long long const *__X, __m256i __M)
 {
   return (__m256i)__builtin_ia32_maskloadq256((const __v4di *)__X, __M);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_maskload_epi32(int const *__X, __m128i __M)
 {
   return (__m128i)__builtin_ia32_maskloadd((const __v4si *)__X, (__v4si)__M);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_maskload_epi64(long long const *__X, __m128i __M)
 {
   return (__m128i)__builtin_ia32_maskloadq((const __v2di *)__X, (__v2di)__M);
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm256_maskstore_epi32(int *__X, __m256i __M, __m256i __Y)
 {
   __builtin_ia32_maskstored256((__v8si *)__X, (__v8si)__M, (__v8si)__Y);
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm256_maskstore_epi64(long long *__X, __m256i __M, __m256i __Y)
 {
   __builtin_ia32_maskstoreq256((__v4di *)__X, __M, __Y);
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_maskstore_epi32(int *__X, __m128i __M, __m128i __Y)
 {
   __builtin_ia32_maskstored((__v4si *)__X, (__v4si)__M, (__v4si)__Y);
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_maskstore_epi64(long long *__X, __m128i __M, __m128i __Y)
 {
   __builtin_ia32_maskstoreq(( __v2di *)__X, __M, __Y);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_sllv_epi32(__m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_psllv8si((__v8si)__X, (__v8si)__Y);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_sllv_epi32(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_psllv4si((__v4si)__X, (__v4si)__Y);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_sllv_epi64(__m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_psllv4di(__X, __Y);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_sllv_epi64(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_psllv2di(__X, __Y);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_srav_epi32(__m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_psrav8si((__v8si)__X, (__v8si)__Y);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_srav_epi32(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_psrav4si((__v4si)__X, (__v4si)__Y);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_srlv_epi32(__m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_psrlv8si((__v8si)__X, (__v8si)__Y);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_srlv_epi32(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_psrlv4si((__v4si)__X, (__v4si)__Y);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_srlv_epi64(__m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_psrlv4di(__X, __Y);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_srlv_epi64(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_psrlv2di(__X, __Y);
@@ -1248,4 +1251,6 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
              (const __v4di *)__m, (__v4di)__i, \
              (__v4di)_mm256_set1_epi64x(-1), (s)); })
 
+#undef DEFAULT_FN_ATTRS
+
 #endif /* __AVX2INTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512bwintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512bwintrin.h
index d0591e4..4eb9747 100644
--- a/contrib/llvm/tools/clang/lib/Headers/avx512bwintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/avx512bwintrin.h
@@ -33,7 +33,10 @@ typedef unsigned long long __mmask64;
 typedef char __v64qi __attribute__ ((__vector_size__ (64)));
 typedef short __v32hi __attribute__ ((__vector_size__ (64)));
 
-static  __inline __v64qi __attribute__ ((__always_inline__, __nodebug__))
+/* Define the default attributes for the functions in this file. */
+#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw")))
+
+static  __inline __v64qi DEFAULT_FN_ATTRS
 _mm512_setzero_qi (void) {
   return (__v64qi){ 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0, 0, 0, 0, 0, 0, 0,
@@ -45,7 +48,7 @@ _mm512_setzero_qi (void) {
                        0, 0, 0, 0, 0, 0, 0, 0 };
 }
 
-static  __inline __v32hi __attribute__ ((__always_inline__, __nodebug__))
+static  __inline __v32hi DEFAULT_FN_ATTRS
 _mm512_setzero_hi (void) {
   return (__v32hi){ 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0, 0, 0, 0, 0, 0, 0,
@@ -55,300 +58,300 @@ _mm512_setzero_hi (void) {
 
 /* Integer compare */
 
-static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask64 DEFAULT_FN_ATTRS
 _mm512_cmpeq_epi8_mask(__m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_pcmpeqb512_mask((__v64qi)__a, (__v64qi)__b,
                                                    (__mmask64)-1);
 }
 
-static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask64 DEFAULT_FN_ATTRS
 _mm512_mask_cmpeq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_pcmpeqb512_mask((__v64qi)__a, (__v64qi)__b,
                                                    __u);
 }
 
-static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask64 DEFAULT_FN_ATTRS
 _mm512_cmpeq_epu8_mask(__m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 0,
                                                  (__mmask64)-1);
 }
 
-static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask64 DEFAULT_FN_ATTRS
 _mm512_mask_cmpeq_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 0,
                                                  __u);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm512_cmpeq_epi16_mask(__m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b,
                                                    (__mmask32)-1);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm512_mask_cmpeq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b,
                                                    __u);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm512_cmpeq_epu16_mask(__m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 0,
                                                  (__mmask32)-1);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm512_mask_cmpeq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 0,
                                                  __u);
 }
 
-static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask64 DEFAULT_FN_ATTRS
 _mm512_cmpge_epi8_mask(__m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
                                                 (__mmask64)-1);
 }
 
-static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask64 DEFAULT_FN_ATTRS
 _mm512_mask_cmpge_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
                                                 __u);
 }
 
-static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask64 DEFAULT_FN_ATTRS
 _mm512_cmpge_epu8_mask(__m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
                                                  (__mmask64)-1);
 }
 
-static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask64 DEFAULT_FN_ATTRS
 _mm512_mask_cmpge_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
                                                  __u);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm512_cmpge_epi16_mask(__m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
                                                 (__mmask32)-1);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm512_mask_cmpge_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
                                                 __u);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm512_cmpge_epu16_mask(__m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
                                                  (__mmask32)-1);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm512_mask_cmpge_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
                                                  __u);
 }
 
-static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask64 DEFAULT_FN_ATTRS
 _mm512_cmpgt_epi8_mask(__m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_pcmpgtb512_mask((__v64qi)__a, (__v64qi)__b,
                                                    (__mmask64)-1);
 }
 
-static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask64 DEFAULT_FN_ATTRS
 _mm512_mask_cmpgt_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_pcmpgtb512_mask((__v64qi)__a, (__v64qi)__b,
                                                    __u);
 }
 
-static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask64 DEFAULT_FN_ATTRS
 _mm512_cmpgt_epu8_mask(__m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 6,
                                                  (__mmask64)-1);
 }
 
-static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask64 DEFAULT_FN_ATTRS
 _mm512_mask_cmpgt_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 6,
                                                  __u);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm512_cmpgt_epi16_mask(__m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_pcmpgtw512_mask((__v32hi)__a, (__v32hi)__b,
                                                    (__mmask32)-1);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm512_mask_cmpgt_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_pcmpgtw512_mask((__v32hi)__a, (__v32hi)__b,
                                                    __u);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm512_cmpgt_epu16_mask(__m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 6,
                                                  (__mmask32)-1);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm512_mask_cmpgt_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 6,
                                                  __u);
 }
 
-static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask64 DEFAULT_FN_ATTRS
 _mm512_cmple_epi8_mask(__m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
                                                 (__mmask64)-1);
 }
 
-static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask64 DEFAULT_FN_ATTRS
 _mm512_mask_cmple_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
                                                 __u);
 }
 
-static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask64 DEFAULT_FN_ATTRS
 _mm512_cmple_epu8_mask(__m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
                                                  (__mmask64)-1);
 }
 
-static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask64 DEFAULT_FN_ATTRS
 _mm512_mask_cmple_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
                                                  __u);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm512_cmple_epi16_mask(__m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
                                                 (__mmask32)-1);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm512_mask_cmple_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
                                                 __u);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm512_cmple_epu16_mask(__m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
                                                  (__mmask32)-1);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm512_mask_cmple_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
                                                  __u);
 }
 
-static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask64 DEFAULT_FN_ATTRS
 _mm512_cmplt_epi8_mask(__m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
                                                 (__mmask64)-1);
 }
 
-static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask64 DEFAULT_FN_ATTRS
 _mm512_mask_cmplt_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
                                                 __u);
 }
 
-static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask64 DEFAULT_FN_ATTRS
 _mm512_cmplt_epu8_mask(__m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
                                                  (__mmask64)-1);
 }
 
-static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask64 DEFAULT_FN_ATTRS
 _mm512_mask_cmplt_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
                                                  __u);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm512_cmplt_epi16_mask(__m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
                                                 (__mmask32)-1);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm512_mask_cmplt_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
                                                 __u);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm512_cmplt_epu16_mask(__m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
                                                  (__mmask32)-1);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm512_mask_cmplt_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
                                                  __u);
 }
 
-static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask64 DEFAULT_FN_ATTRS
 _mm512_cmpneq_epi8_mask(__m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
                                                 (__mmask64)-1);
 }
 
-static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask64 DEFAULT_FN_ATTRS
 _mm512_mask_cmpneq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
                                                 __u);
 }
 
-static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask64 DEFAULT_FN_ATTRS
 _mm512_cmpneq_epu8_mask(__m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
                                                  (__mmask64)-1);
 }
 
-static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask64 DEFAULT_FN_ATTRS
 _mm512_mask_cmpneq_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
                                                  __u);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm512_cmpneq_epi16_mask(__m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
                                                 (__mmask32)-1);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm512_mask_cmpneq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
                                                 __u);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm512_cmpneq_epu16_mask(__m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
                                                  (__mmask32)-1);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm512_mask_cmpneq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
                                                  __u);
 }
 
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_add_epi8 (__m512i __A, __m512i __B) {
   return (__m512i) ((__v64qi) __A + (__v64qi) __B);
 }
 
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_mask_add_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
   return (__m512i) __builtin_ia32_paddb512_mask ((__v64qi) __A,
              (__v64qi) __B,
@@ -356,7 +359,7 @@ _mm512_mask_add_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
              (__mmask64) __U);
 }
 
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_maskz_add_epi8 (__mmask64 __U, __m512i __A, __m512i __B) {
   return (__m512i) __builtin_ia32_paddb512_mask ((__v64qi) __A,
              (__v64qi) __B,
@@ -365,12 +368,12 @@ _mm512_maskz_add_epi8 (__mmask64 __U, __m512i __A, __m512i __B) {
              (__mmask64) __U);
 }
 
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_sub_epi8 (__m512i __A, __m512i __B) {
   return (__m512i) ((__v64qi) __A - (__v64qi) __B);
 }
 
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_mask_sub_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
   return (__m512i) __builtin_ia32_psubb512_mask ((__v64qi) __A,
              (__v64qi) __B,
@@ -378,7 +381,7 @@ _mm512_mask_sub_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
              (__mmask64) __U);
 }
 
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_maskz_sub_epi8 (__mmask64 __U, __m512i __A, __m512i __B) {
   return (__m512i) __builtin_ia32_psubb512_mask ((__v64qi) __A,
              (__v64qi) __B,
@@ -387,12 +390,12 @@ _mm512_maskz_sub_epi8 (__mmask64 __U, __m512i __A, __m512i __B) {
              (__mmask64) __U);
 }
 
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_add_epi16 (__m512i __A, __m512i __B) {
   return (__m512i) ((__v32hi) __A + (__v32hi) __B);
 }
 
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_mask_add_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i) __builtin_ia32_paddw512_mask ((__v32hi) __A,
              (__v32hi) __B,
@@ -400,7 +403,7 @@ _mm512_mask_add_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
              (__mmask32) __U);
 }
 
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_maskz_add_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i) __builtin_ia32_paddw512_mask ((__v32hi) __A,
              (__v32hi) __B,
@@ -409,12 +412,12 @@ _mm512_maskz_add_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
              (__mmask32) __U);
 }
 
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_sub_epi16 (__m512i __A, __m512i __B) {
   return (__m512i) ((__v32hi) __A - (__v32hi) __B);
 }
 
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_mask_sub_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i) __builtin_ia32_psubw512_mask ((__v32hi) __A,
              (__v32hi) __B,
@@ -422,7 +425,7 @@ _mm512_mask_sub_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
              (__mmask32) __U);
 }
 
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_maskz_sub_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i) __builtin_ia32_psubw512_mask ((__v32hi) __A,
              (__v32hi) __B,
@@ -431,12 +434,12 @@ _mm512_maskz_sub_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
              (__mmask32) __U);
 }
 
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_mullo_epi16 (__m512i __A, __m512i __B) {
   return (__m512i) ((__v32hi) __A * (__v32hi) __B);
 }
 
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_mask_mullo_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i) __builtin_ia32_pmullw512_mask ((__v32hi) __A,
               (__v32hi) __B,
@@ -444,7 +447,7 @@ _mm512_mask_mullo_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
               (__mmask32) __U);
 }
 
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_maskz_mullo_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i) __builtin_ia32_pmullw512_mask ((__v32hi) __A,
               (__v32hi) __B,
@@ -493,4 +496,6 @@ _mm512_maskz_mullo_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
                                           (__v32hi)(__m512i)(b), \
                                           (p), (__mmask32)(m)); })
 
+#undef DEFAULT_FN_ATTRS
+
 #endif
diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512dqintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512dqintrin.h
index fd33be2..cfcfc62 100644
--- a/contrib/llvm/tools/clang/lib/Headers/avx512dqintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/avx512dqintrin.h
@@ -28,12 +28,15 @@
 #ifndef __AVX512DQINTRIN_H
 #define __AVX512DQINTRIN_H
 
-static __inline__ __m512i __attribute__ ((__always_inline__, __nodebug__))
+/* Define the default attributes for the functions in this file. */
+#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq")))
+
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_mullo_epi64 (__m512i __A, __m512i __B) {
   return (__m512i) ((__v8di) __A * (__v8di) __B);
 }
 
-static __inline__ __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_mask_mullo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
   return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
               (__v8di) __B,
@@ -41,7 +44,7 @@ _mm512_mask_mullo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
               (__mmask8) __U);
 }
 
-static __inline__ __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) {
   return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
               (__v8di) __B,
@@ -50,12 +53,12 @@ _mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) {
               (__mmask8) __U);
 }
 
-static __inline__ __m512d __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512d DEFAULT_FN_ATTRS
 _mm512_xor_pd (__m512d __A, __m512d __B) {
   return (__m512d) ((__v8di) __A ^ (__v8di) __B);
 }
 
-static __inline__ __m512d __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512d DEFAULT_FN_ATTRS
 _mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
              (__v8df) __B,
@@ -63,7 +66,7 @@ _mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m512d __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512d DEFAULT_FN_ATTRS
 _mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B) {
   return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
              (__v8df) __B,
@@ -72,12 +75,12 @@ _mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m512 __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512 DEFAULT_FN_ATTRS
 _mm512_xor_ps (__m512 __A, __m512 __B) {
   return (__m512) ((__v16si) __A ^ (__v16si) __B);
 }
 
-static __inline__ __m512 __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512 DEFAULT_FN_ATTRS
 _mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
             (__v16sf) __B,
@@ -85,7 +88,7 @@ _mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
             (__mmask16) __U);
 }
 
-static __inline__ __m512 __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512 DEFAULT_FN_ATTRS
 _mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
             (__v16sf) __B,
@@ -94,12 +97,12 @@ _mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B) {
             (__mmask16) __U);
 }
 
-static __inline__ __m512d __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512d DEFAULT_FN_ATTRS
 _mm512_or_pd (__m512d __A, __m512d __B) {
   return (__m512d) ((__v8di) __A | (__v8di) __B);
 }
 
-static __inline__ __m512d __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512d DEFAULT_FN_ATTRS
 _mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
             (__v8df) __B,
@@ -107,7 +110,7 @@ _mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
             (__mmask8) __U);
 }
 
-static __inline__ __m512d __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512d DEFAULT_FN_ATTRS
 _mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B) {
   return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
             (__v8df) __B,
@@ -116,12 +119,12 @@ _mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B) {
             (__mmask8) __U);
 }
 
-static __inline__ __m512 __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512 DEFAULT_FN_ATTRS
 _mm512_or_ps (__m512 __A, __m512 __B) {
   return (__m512) ((__v16si) __A | (__v16si) __B);
 }
 
-static __inline__ __m512 __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512 DEFAULT_FN_ATTRS
 _mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
                  (__v16sf) __B,
@@ -129,7 +132,7 @@ _mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
                  (__mmask16) __U);
 }
 
-static __inline__ __m512 __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512 DEFAULT_FN_ATTRS
 _mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
                  (__v16sf) __B,
@@ -138,12 +141,12 @@ _mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B) {
                  (__mmask16) __U);
 }
 
-static __inline__ __m512d __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512d DEFAULT_FN_ATTRS
 _mm512_and_pd (__m512d __A, __m512d __B) {
   return (__m512d) ((__v8di) __A & (__v8di) __B);
 }
 
-static __inline__ __m512d __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512d DEFAULT_FN_ATTRS
 _mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
              (__v8df) __B,
@@ -151,7 +154,7 @@ _mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m512d __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512d DEFAULT_FN_ATTRS
 _mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B) {
   return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
              (__v8df) __B,
@@ -160,12 +163,12 @@ _mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m512 __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512 DEFAULT_FN_ATTRS
 _mm512_and_ps (__m512 __A, __m512 __B) {
   return (__m512) ((__v16si) __A & (__v16si) __B);
 }
 
-static __inline__ __m512 __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512 DEFAULT_FN_ATTRS
 _mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
             (__v16sf) __B,
@@ -173,7 +176,7 @@ _mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
             (__mmask16) __U);
 }
 
-static __inline__ __m512 __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512 DEFAULT_FN_ATTRS
 _mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
             (__v16sf) __B,
@@ -182,7 +185,7 @@ _mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B) {
             (__mmask16) __U);
 }
 
-static __inline__ __m512d __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512d DEFAULT_FN_ATTRS
 _mm512_andnot_pd (__m512d __A, __m512d __B) {
   return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
               (__v8df) __B,
@@ -191,7 +194,7 @@ _mm512_andnot_pd (__m512d __A, __m512d __B) {
               (__mmask8) -1);
 }
 
-static __inline__ __m512d __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512d DEFAULT_FN_ATTRS
 _mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
               (__v8df) __B,
@@ -199,7 +202,7 @@ _mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
               (__mmask8) __U);
 }
 
-static __inline__ __m512d __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512d DEFAULT_FN_ATTRS
 _mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B) {
   return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
               (__v8df) __B,
@@ -208,7 +211,7 @@ _mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B) {
               (__mmask8) __U);
 }
 
-static __inline__ __m512 __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512 DEFAULT_FN_ATTRS
 _mm512_andnot_ps (__m512 __A, __m512 __B) {
   return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
              (__v16sf) __B,
@@ -217,7 +220,7 @@ _mm512_andnot_ps (__m512 __A, __m512 __B) {
              (__mmask16) -1);
 }
 
-static __inline__ __m512 __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512 DEFAULT_FN_ATTRS
 _mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
              (__v16sf) __B,
@@ -225,7 +228,7 @@ _mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
              (__mmask16) __U);
 }
 
-static __inline__ __m512 __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512 DEFAULT_FN_ATTRS
 _mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
              (__v16sf) __B,
@@ -234,4 +237,6 @@ _mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B) {
              (__mmask16) __U);
 }
 
+#undef DEFAULT_FN_ATTRS
+
 #endif
diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512erintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512erintrin.h
index 57c61aa..56edffc 100644
--- a/contrib/llvm/tools/clang/lib/Headers/avx512erintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/avx512erintrin.h
@@ -27,7 +27,6 @@
 #ifndef __AVX512ERINTRIN_H
 #define __AVX512ERINTRIN_H
 
-
 // exp2a23
 #define _mm512_exp2a23_round_pd(A, R) __extension__ ({ \
   (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512fintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512fintrin.h
index d299704..98eb73b 100644
--- a/contrib/llvm/tools/clang/lib/Headers/avx512fintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/avx512fintrin.h
@@ -46,15 +46,18 @@ typedef unsigned short __mmask16;
 #define _MM_FROUND_TO_ZERO          0x03
 #define _MM_FROUND_CUR_DIRECTION    0x04
 
+/* Define the default attributes for the functions in this file. */
+#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
+
 /* Create vectors with repeated elements */
 
-static  __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+static  __inline __m512i DEFAULT_FN_ATTRS
 _mm512_setzero_si512(void)
 {
   return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
 }
 
-static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512i DEFAULT_FN_ATTRS
 _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
 {
   return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
@@ -63,7 +66,7 @@ _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
                  __M);
 }
 
-static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512i DEFAULT_FN_ATTRS
 _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
 {
 #ifdef __x86_64__
@@ -79,45 +82,45 @@ _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
 #endif
 }
 
-static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512 DEFAULT_FN_ATTRS
 _mm512_setzero_ps(void)
 {
   return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                    0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
 }
-static  __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
+static  __inline __m512d DEFAULT_FN_ATTRS
 _mm512_setzero_pd(void)
 {
   return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
 }
 
-static __inline __m512 __attribute__((__always_inline__, __nodebug__))
+static __inline __m512 DEFAULT_FN_ATTRS
 _mm512_set1_ps(float __w)
 {
   return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
                    __w, __w, __w, __w, __w, __w, __w, __w  };
 }
 
-static __inline __m512d __attribute__((__always_inline__, __nodebug__))
+static __inline __m512d DEFAULT_FN_ATTRS
 _mm512_set1_pd(double __w)
 {
   return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
 }
 
-static __inline __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline __m512i DEFAULT_FN_ATTRS
 _mm512_set1_epi32(int __s)
 {
   return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
                              __s, __s, __s, __s, __s, __s, __s, __s };
 }
 
-static __inline __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline __m512i DEFAULT_FN_ATTRS
 _mm512_set1_epi64(long long __d)
 {
   return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
 }
 
-static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512 DEFAULT_FN_ATTRS
 _mm512_broadcastss_ps(__m128 __X)
 {
   float __f = __X[0];
@@ -127,7 +130,7 @@ _mm512_broadcastss_ps(__m128 __X)
                     __f, __f, __f, __f };
 }
 
-static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512d DEFAULT_FN_ATTRS
 _mm512_broadcastsd_pd(__m128d __X)
 {
   double __d = __X[0];
@@ -137,39 +140,39 @@ _mm512_broadcastsd_pd(__m128d __X)
 
 /* Cast between vector types */
 
-static __inline __m512d __attribute__((__always_inline__, __nodebug__))
+static __inline __m512d DEFAULT_FN_ATTRS
 _mm512_castpd256_pd512(__m256d __a)
 {
   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
 }
 
-static __inline __m512 __attribute__((__always_inline__, __nodebug__))
+static __inline __m512 DEFAULT_FN_ATTRS
 _mm512_castps256_ps512(__m256 __a)
 {
   return __builtin_shufflevector(__a, __a, 0,  1,  2,  3,  4,  5,  6,  7,
                                           -1, -1, -1, -1, -1, -1, -1, -1);
 }
 
-static __inline __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline __m128d DEFAULT_FN_ATTRS
 _mm512_castpd512_pd128(__m512d __a)
 {
   return __builtin_shufflevector(__a, __a, 0, 1);
 }
 
-static __inline __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline __m128 DEFAULT_FN_ATTRS
 _mm512_castps512_ps128(__m512 __a)
 {
   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
 }
 
 /* Bitwise operators */
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_and_epi32(__m512i __a, __m512i __b)
 {
   return __a & __b;
 }
 
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
 {
   return (__m512i) __builtin_ia32_pandd512_mask((__v16si) __a,
@@ -177,7 +180,7 @@ _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
               (__v16si) __src,
               (__mmask16) __k);
 }
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
 {
   return (__m512i) __builtin_ia32_pandd512_mask((__v16si) __a,
@@ -187,13 +190,13 @@ _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
               (__mmask16) __k);
 }
 
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_and_epi64(__m512i __a, __m512i __b)
 {
   return __a & __b;
 }
 
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
 {
   return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __a,
@@ -201,7 +204,7 @@ _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
               (__v8di) __src,
               (__mmask8) __k);
 }
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
 {
   return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __a,
@@ -211,7 +214,7 @@ _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
               (__mmask8) __k);
 }
 
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
@@ -221,7 +224,7 @@ _mm512_andnot_epi32 (__m512i __A, __m512i __B)
               (__mmask16) -1);
 }
 
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
@@ -230,7 +233,7 @@ _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
               (__mmask16) __U);
 }
 
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
@@ -240,7 +243,7 @@ _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
               (__mmask16) __U);
 }
 
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_andnot_epi64 (__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
@@ -250,7 +253,7 @@ _mm512_andnot_epi64 (__m512i __A, __m512i __B)
               (__mmask8) -1);
 }
 
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
@@ -258,7 +261,7 @@ _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
               (__v8di) __W, __U);
 }
 
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
@@ -267,13 +270,13 @@ _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
               _mm512_setzero_pd (),
               __U);
 }
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_or_epi32(__m512i __a, __m512i __b)
 {
   return __a | __b;
 }
 
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
 {
   return (__m512i) __builtin_ia32_pord512_mask((__v16si) __a,
@@ -281,7 +284,7 @@ _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
               (__v16si) __src,
               (__mmask16) __k);
 }
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
 {
   return (__m512i) __builtin_ia32_pord512_mask((__v16si) __a,
@@ -291,13 +294,13 @@ _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
               (__mmask16) __k);
 }
 
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_or_epi64(__m512i __a, __m512i __b)
 {
   return __a | __b;
 }
 
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
 {
   return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __a,
@@ -305,7 +308,7 @@ _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
               (__v8di) __src,
               (__mmask8) __k);
 }
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
 {
   return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __a,
@@ -315,13 +318,13 @@ _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
               (__mmask8) __k);
 }
 
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_xor_epi32(__m512i __a, __m512i __b)
 {
   return __a ^ __b;
 }
 
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
 {
   return (__m512i) __builtin_ia32_pxord512_mask((__v16si) __a,
@@ -329,7 +332,7 @@ _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
               (__v16si) __src,
               (__mmask16) __k);
 }
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
 {
   return (__m512i) __builtin_ia32_pxord512_mask((__v16si) __a,
@@ -339,13 +342,13 @@ _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
               (__mmask16) __k);
 }
 
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_xor_epi64(__m512i __a, __m512i __b)
 {
   return __a ^ __b;
 }
 
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
 {
   return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __a,
@@ -353,7 +356,7 @@ _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
               (__v8di) __src,
               (__mmask8) __k);
 }
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
 {
   return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __a,
@@ -363,68 +366,68 @@ _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
               (__mmask8) __k);
 }
 
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_and_si512(__m512i __a, __m512i __b)
 {
   return __a & __b;
 }
 
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_or_si512(__m512i __a, __m512i __b)
 {
   return __a | __b;
 }
 
-static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_xor_si512(__m512i __a, __m512i __b)
 {
   return __a ^ __b;
 }
 /* Arithmetic */
 
-static __inline __m512d __attribute__((__always_inline__, __nodebug__))
+static __inline __m512d DEFAULT_FN_ATTRS
 _mm512_add_pd(__m512d __a, __m512d __b)
 {
   return __a + __b;
 }
 
-static __inline __m512 __attribute__((__always_inline__, __nodebug__))
+static __inline __m512 DEFAULT_FN_ATTRS
 _mm512_add_ps(__m512 __a, __m512 __b)
 {
   return __a + __b;
 }
 
-static __inline __m512d __attribute__((__always_inline__, __nodebug__))
+static __inline __m512d DEFAULT_FN_ATTRS
 _mm512_mul_pd(__m512d __a, __m512d __b)
 {
   return __a * __b;
 }
 
-static __inline __m512 __attribute__((__always_inline__, __nodebug__))
+static __inline __m512 DEFAULT_FN_ATTRS
 _mm512_mul_ps(__m512 __a, __m512 __b)
 {
   return __a * __b;
 }
 
-static __inline __m512d __attribute__((__always_inline__, __nodebug__))
+static __inline __m512d DEFAULT_FN_ATTRS
 _mm512_sub_pd(__m512d __a, __m512d __b)
 {
   return __a - __b;
 }
 
-static __inline __m512 __attribute__((__always_inline__, __nodebug__))
+static __inline __m512 DEFAULT_FN_ATTRS
 _mm512_sub_ps(__m512 __a, __m512 __b)
 {
   return __a - __b;
 }
 
-static __inline__ __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_add_epi64 (__m512i __A, __m512i __B)
 {
   return (__m512i) ((__v8di) __A + (__v8di) __B);
 }
 
-static __inline__ __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
@@ -433,7 +436,7 @@ _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
              (__mmask8) __U);
 }
 
-static __inline__ __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
@@ -443,13 +446,13 @@ _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
              (__mmask8) __U);
 }
 
-static __inline__ __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_sub_epi64 (__m512i __A, __m512i __B)
 {
   return (__m512i) ((__v8di) __A - (__v8di) __B);
 }
 
-static __inline__ __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
@@ -458,7 +461,7 @@ _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
              (__mmask8) __U);
 }
 
-static __inline__ __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
@@ -468,13 +471,13 @@ _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
              (__mmask8) __U);
 }
 
-static __inline__ __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_add_epi32 (__m512i __A, __m512i __B)
 {
   return (__m512i) ((__v16si) __A + (__v16si) __B);
 }
 
-static __inline__ __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
@@ -483,7 +486,7 @@ _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
              (__mmask16) __U);
 }
 
-static __inline__ __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
@@ -493,13 +496,13 @@ _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
              (__mmask16) __U);
 }
 
-static __inline__ __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_sub_epi32 (__m512i __A, __m512i __B)
 {
   return (__m512i) ((__v16si) __A - (__v16si) __B);
 }
 
-static __inline__ __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
@@ -508,7 +511,7 @@ _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
              (__mmask16) __U);
 }
 
-static __inline__ __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m512i DEFAULT_FN_ATTRS
 _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
@@ -518,7 +521,7 @@ _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
              (__mmask16) __U);
 }
 
-static  __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
+static  __inline__ __m512d DEFAULT_FN_ATTRS
 _mm512_max_pd(__m512d __A, __m512d __B)
 {
   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
@@ -529,7 +532,7 @@ _mm512_max_pd(__m512d __A, __m512d __B)
              _MM_FROUND_CUR_DIRECTION);
 }
 
-static  __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
+static  __inline__ __m512 DEFAULT_FN_ATTRS
 _mm512_max_ps(__m512 __A, __m512 __B)
 {
   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
@@ -541,7 +544,7 @@ _mm512_max_ps(__m512 __A, __m512 __B)
 }
 
 static __inline __m512i
-__attribute__ ((__always_inline__, __nodebug__))
+DEFAULT_FN_ATTRS
 _mm512_max_epi32(__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
@@ -551,7 +554,7 @@ _mm512_max_epi32(__m512i __A, __m512i __B)
               (__mmask16) -1);
 }
 
-static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512i DEFAULT_FN_ATTRS
 _mm512_max_epu32(__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
@@ -561,7 +564,7 @@ _mm512_max_epu32(__m512i __A, __m512i __B)
               (__mmask16) -1);
 }
 
-static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512i DEFAULT_FN_ATTRS
 _mm512_max_epi64(__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
@@ -571,7 +574,7 @@ _mm512_max_epi64(__m512i __A, __m512i __B)
               (__mmask8) -1);
 }
 
-static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512i DEFAULT_FN_ATTRS
 _mm512_max_epu64(__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
@@ -581,7 +584,7 @@ _mm512_max_epu64(__m512i __A, __m512i __B)
               (__mmask8) -1);
 }
 
-static  __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
+static  __inline__ __m512d DEFAULT_FN_ATTRS
 _mm512_min_pd(__m512d __A, __m512d __B)
 {
   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
@@ -592,7 +595,7 @@ _mm512_min_pd(__m512d __A, __m512d __B)
              _MM_FROUND_CUR_DIRECTION);
 }
 
-static  __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
+static  __inline__ __m512 DEFAULT_FN_ATTRS
 _mm512_min_ps(__m512 __A, __m512 __B)
 {
   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
@@ -604,7 +607,7 @@ _mm512_min_ps(__m512 __A, __m512 __B)
 }
 
 static __inline __m512i
-__attribute__ ((__always_inline__, __nodebug__))
+DEFAULT_FN_ATTRS
 _mm512_min_epi32(__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
@@ -614,7 +617,7 @@ _mm512_min_epi32(__m512i __A, __m512i __B)
               (__mmask16) -1);
 }
 
-static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512i DEFAULT_FN_ATTRS
 _mm512_min_epu32(__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
@@ -624,7 +627,7 @@ _mm512_min_epu32(__m512i __A, __m512i __B)
               (__mmask16) -1);
 }
 
-static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512i DEFAULT_FN_ATTRS
 _mm512_min_epi64(__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
@@ -634,7 +637,7 @@ _mm512_min_epi64(__m512i __A, __m512i __B)
               (__mmask8) -1);
 }
 
-static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512i DEFAULT_FN_ATTRS
 _mm512_min_epu64(__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
@@ -644,7 +647,7 @@ _mm512_min_epu64(__m512i __A, __m512i __B)
               (__mmask8) -1);
 }
 
-static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512i DEFAULT_FN_ATTRS
 _mm512_mul_epi32(__m512i __X, __m512i __Y)
 {
   return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
@@ -654,7 +657,7 @@ _mm512_mul_epi32(__m512i __X, __m512i __Y)
               (__mmask8) -1);
 }
 
-static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512i DEFAULT_FN_ATTRS
 _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
 {
   return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
@@ -662,7 +665,7 @@ _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
               (__v8di) __W, __M);
 }
 
-static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512i DEFAULT_FN_ATTRS
 _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
 {
   return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
@@ -672,7 +675,7 @@ _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
               __M);
 }
 
-static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512i DEFAULT_FN_ATTRS
 _mm512_mul_epu32(__m512i __X, __m512i __Y)
 {
   return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
@@ -682,7 +685,7 @@ _mm512_mul_epu32(__m512i __X, __m512i __Y)
                (__mmask8) -1);
 }
 
-static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512i DEFAULT_FN_ATTRS
 _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
 {
   return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
@@ -690,7 +693,7 @@ _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
                (__v8di) __W, __M);
 }
 
-static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512i DEFAULT_FN_ATTRS
 _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
 {
   return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
@@ -700,13 +703,13 @@ _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
                __M);
 }
 
-static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512i DEFAULT_FN_ATTRS
 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
 {
   return (__m512i) ((__v16si) __A * (__v16si) __B);
 }
 
-static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512i DEFAULT_FN_ATTRS
 _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
@@ -716,7 +719,7 @@ _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
               __M);
 }
 
-static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512i DEFAULT_FN_ATTRS
 _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
@@ -724,7 +727,7 @@ _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
               (__v16si) __W, __M);
 }
 
-static  __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
+static  __inline__ __m512d DEFAULT_FN_ATTRS
 _mm512_sqrt_pd(__m512d a)
 {
   return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)a,
@@ -733,7 +736,7 @@ _mm512_sqrt_pd(__m512d a)
                                                 _MM_FROUND_CUR_DIRECTION);
 }
 
-static  __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
+static  __inline__ __m512 DEFAULT_FN_ATTRS
 _mm512_sqrt_ps(__m512 a)
 {
   return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)a,
@@ -742,7 +745,7 @@ _mm512_sqrt_ps(__m512 a)
                                                _MM_FROUND_CUR_DIRECTION);
 }
 
-static  __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
+static  __inline__ __m512d DEFAULT_FN_ATTRS
 _mm512_rsqrt14_pd(__m512d __A)
 {
   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
@@ -750,7 +753,7 @@ _mm512_rsqrt14_pd(__m512d __A)
                  _mm512_setzero_pd (),
                  (__mmask8) -1);}
 
-static  __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
+static  __inline__ __m512 DEFAULT_FN_ATTRS
 _mm512_rsqrt14_ps(__m512 __A)
 {
   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
@@ -759,7 +762,7 @@ _mm512_rsqrt14_ps(__m512 __A)
                 (__mmask16) -1);
 }
 
-static  __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static  __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
 {
   return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
@@ -769,7 +772,7 @@ _mm_rsqrt14_ss(__m128 __A, __m128 __B)
              (__mmask8) -1);
 }
 
-static  __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static  __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
 {
   return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
@@ -779,7 +782,7 @@ _mm_rsqrt14_sd(__m128d __A, __m128d __B)
               (__mmask8) -1);
 }
 
-static  __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
+static  __inline__ __m512d DEFAULT_FN_ATTRS
 _mm512_rcp14_pd(__m512d __A)
 {
   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
@@ -788,7 +791,7 @@ _mm512_rcp14_pd(__m512d __A)
                (__mmask8) -1);
 }
 
-static  __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
+static  __inline__ __m512 DEFAULT_FN_ATTRS
 _mm512_rcp14_ps(__m512 __A)
 {
   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
@@ -796,7 +799,7 @@ _mm512_rcp14_ps(__m512 __A)
               _mm512_setzero_ps (),
               (__mmask16) -1);
 }
-static  __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static  __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_rcp14_ss(__m128 __A, __m128 __B)
 {
   return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
@@ -806,7 +809,7 @@ _mm_rcp14_ss(__m128 __A, __m128 __B)
                  (__mmask8) -1);
 }
 
-static  __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static  __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_rcp14_sd(__m128d __A, __m128d __B)
 {
   return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
@@ -816,7 +819,7 @@ _mm_rcp14_sd(__m128d __A, __m128d __B)
             (__mmask8) -1);
 }
 
-static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512 DEFAULT_FN_ATTRS
 _mm512_floor_ps(__m512 __A)
 {
   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
@@ -825,7 +828,7 @@ _mm512_floor_ps(__m512 __A)
                                                   _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512d DEFAULT_FN_ATTRS
 _mm512_floor_pd(__m512d __A)
 {
   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
@@ -834,7 +837,7 @@ _mm512_floor_pd(__m512d __A)
                                                    _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512 DEFAULT_FN_ATTRS
 _mm512_ceil_ps(__m512 __A)
 {
   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
@@ -843,7 +846,7 @@ _mm512_ceil_ps(__m512 __A)
                                                   _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512d DEFAULT_FN_ATTRS
 _mm512_ceil_pd(__m512d __A)
 {
   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
@@ -852,7 +855,7 @@ _mm512_ceil_pd(__m512d __A)
                                                    _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline __m512i __attribute__ (( __always_inline__, __nodebug__))
+static __inline __m512i DEFAULT_FN_ATTRS
 _mm512_abs_epi64(__m512i __A)
 {
   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
@@ -861,7 +864,7 @@ _mm512_abs_epi64(__m512i __A)
              (__mmask8) -1);
 }
 
-static __inline __m512i __attribute__ (( __always_inline__, __nodebug__))
+static __inline __m512i DEFAULT_FN_ATTRS
 _mm512_abs_epi32(__m512i __A)
 {
   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
@@ -878,7 +881,7 @@ _mm512_abs_epi32(__m512i __A)
   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(A), (B), (__v8df)(A), \
                                           -1, _MM_FROUND_CUR_DIRECTION); })
 
-static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512d DEFAULT_FN_ATTRS
 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
 {
   return (__m512d)
@@ -889,7 +892,7 @@ _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
                                     _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512d DEFAULT_FN_ATTRS
 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
 {
   return (__m512d)
@@ -900,7 +903,7 @@ _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
                                     _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512d DEFAULT_FN_ATTRS
 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
 {
   return (__m512d)
@@ -911,7 +914,7 @@ _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
                                      _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512 DEFAULT_FN_ATTRS
 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
 {
   return (__m512)
@@ -922,7 +925,7 @@ _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
                                     _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512 DEFAULT_FN_ATTRS
 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
 {
   return (__m512)
@@ -933,7 +936,7 @@ _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
                                     _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m512 DEFAULT_FN_ATTRS
 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
 {
   return (__m512)
@@ -946,7 +949,7 @@ _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
 
 /* Vector permutations */
 
-static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512i DEFAULT_FN_ATTRS
 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
 {
   return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
@@ -955,7 +958,7 @@ _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
                                                        (__v16si) __B,
                                                        (__mmask16) -1);
 }
-static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512i DEFAULT_FN_ATTRS
 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
 {
   return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
@@ -965,7 +968,7 @@ _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
                                                        (__mmask8) -1);
 }
 
-static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512d DEFAULT_FN_ATTRS
 _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
 {
   return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
@@ -974,7 +977,7 @@ _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
                                                         (__v8df) __B,
                                                         (__mmask8) -1);
 }
-static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512 DEFAULT_FN_ATTRS
 _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
 {
   return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
@@ -1016,7 +1019,7 @@ _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
 
 /* Vector Blend */
 
-static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512d DEFAULT_FN_ATTRS
 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
 {
   return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
@@ -1024,7 +1027,7 @@ _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
                  (__mmask8) __U);
 }
 
-static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512 DEFAULT_FN_ATTRS
 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
 {
   return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
@@ -1032,7 +1035,7 @@ _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
                 (__mmask16) __U);
 }
 
-static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512i DEFAULT_FN_ATTRS
 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
 {
   return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
@@ -1040,7 +1043,7 @@ _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
                 (__mmask8) __U);
 }
 
-static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512i DEFAULT_FN_ATTRS
 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
 {
   return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
@@ -1084,7 +1087,7 @@ _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
 
 /* Conversion */
 
-static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512i DEFAULT_FN_ATTRS
 _mm512_cvttps_epu32(__m512 __A)
 {
   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
@@ -1104,7 +1107,7 @@ _mm512_cvttps_epu32(__m512 __A)
                                            (__v16sf)_mm512_setzero_ps(), \
                                            (__mmask16)-1, (R)); })
 
-static __inline __m512d __attribute__ (( __always_inline__, __nodebug__))
+static __inline __m512d DEFAULT_FN_ATTRS
 _mm512_cvtepi32_pd(__m256i __A)
 {
   return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
@@ -1113,7 +1116,7 @@ _mm512_cvtepi32_pd(__m256i __A)
                 (__mmask8) -1);
 }
 
-static __inline __m512d __attribute__ (( __always_inline__, __nodebug__))
+static __inline __m512d DEFAULT_FN_ATTRS
 _mm512_cvtepu32_pd(__m256i __A)
 {
   return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
@@ -1132,7 +1135,7 @@ _mm512_cvtepu32_pd(__m256i __A)
                                             (__v16hi)_mm256_setzero_si256(), \
                                             -1); })
 
-static  __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
+static  __inline __m512 DEFAULT_FN_ATTRS
 _mm512_cvtph_ps(__m256i __A)
 {
   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
@@ -1142,7 +1145,7 @@ _mm512_cvtph_ps(__m256i __A)
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline __m512i __attribute__((__always_inline__, __nodebug__))
+static __inline __m512i DEFAULT_FN_ATTRS
 _mm512_cvttps_epi32(__m512 a)
 {
   return (__m512i)
@@ -1151,7 +1154,7 @@ _mm512_cvttps_epi32(__m512 a)
                                      (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline __m256i DEFAULT_FN_ATTRS
 _mm512_cvttpd_epi32(__m512d a)
 {
   return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) a,
@@ -1191,19 +1194,19 @@ _mm512_cvttpd_epi32(__m512d a)
                                             (__mmask8) -1, (R)); })
 
 /* Unpack and Interleave */
-static __inline __m512d __attribute__((__always_inline__, __nodebug__))
+static __inline __m512d DEFAULT_FN_ATTRS
 _mm512_unpackhi_pd(__m512d __a, __m512d __b)
 {
   return __builtin_shufflevector(__a, __b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
 }
 
-static __inline __m512d __attribute__((__always_inline__, __nodebug__))
+static __inline __m512d DEFAULT_FN_ATTRS
 _mm512_unpacklo_pd(__m512d __a, __m512d __b)
 {
   return __builtin_shufflevector(__a, __b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
 }
 
-static __inline __m512 __attribute__((__always_inline__, __nodebug__))
+static __inline __m512 DEFAULT_FN_ATTRS
 _mm512_unpackhi_ps(__m512 __a, __m512 __b)
 {
   return __builtin_shufflevector(__a, __b,
@@ -1213,7 +1216,7 @@ _mm512_unpackhi_ps(__m512 __a, __m512 __b)
                                  2+12, 18+12, 3+12, 19+12);
 }
 
-static __inline __m512 __attribute__((__always_inline__, __nodebug__))
+static __inline __m512 DEFAULT_FN_ATTRS
 _mm512_unpacklo_ps(__m512 __a, __m512 __b)
 {
   return __builtin_shufflevector(__a, __b,
@@ -1225,7 +1228,7 @@ _mm512_unpacklo_ps(__m512 __a, __m512 __b)
 
 /* Bit Test */
 
-static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__))
+static __inline __mmask16 DEFAULT_FN_ATTRS
 _mm512_test_epi32_mask(__m512i __A, __m512i __B)
 {
   return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
@@ -1233,7 +1236,7 @@ _mm512_test_epi32_mask(__m512i __A, __m512i __B)
             (__mmask16) -1);
 }
 
-static __inline __mmask8 __attribute__ ((__always_inline__, __nodebug__))
+static __inline __mmask8 DEFAULT_FN_ATTRS
 _mm512_test_epi64_mask(__m512i __A, __m512i __B)
 {
   return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
@@ -1243,7 +1246,7 @@ _mm512_test_epi64_mask(__m512i __A, __m512i __B)
 
 /* SIMD load ops */
 
-static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512i DEFAULT_FN_ATTRS
 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
 {
   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *)__P,
@@ -1252,7 +1255,7 @@ _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
                                                      (__mmask16) __U);
 }
 
-static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512i DEFAULT_FN_ATTRS
 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
 {
   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *)__P,
@@ -1261,7 +1264,7 @@ _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
                                                      (__mmask8) __U);
 }
 
-static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512 DEFAULT_FN_ATTRS
 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
 {
   return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *)__P,
@@ -1270,7 +1273,7 @@ _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
                                                   (__mmask16) __U);
 }
 
-static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512d DEFAULT_FN_ATTRS
 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
 {
   return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P,
@@ -1279,7 +1282,7 @@ _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
                                                    (__mmask8) __U);
 }
 
-static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512 DEFAULT_FN_ATTRS
 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
 {
   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
@@ -1288,7 +1291,7 @@ _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
                                                   (__mmask16) __U);
 }
 
-static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
+static __inline __m512d DEFAULT_FN_ATTRS
 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
 {
   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
@@ -1297,7 +1300,7 @@ _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
                                                    (__mmask8) __U);
 }
 
-static __inline __m512d __attribute__((__always_inline__, __nodebug__))
+static __inline __m512d DEFAULT_FN_ATTRS
 _mm512_loadu_pd(double const *__p)
 {
   struct __loadu_pd {
@@ -1306,7 +1309,7 @@ _mm512_loadu_pd(double const *__p)
   return ((struct __loadu_pd*)__p)->__v;
 }
 
-static __inline __m512 __attribute__((__always_inline__, __nodebug__))
+static __inline __m512 DEFAULT_FN_ATTRS
 _mm512_loadu_ps(float const *__p)
 {
   struct __loadu_ps {
@@ -1315,7 +1318,7 @@ _mm512_loadu_ps(float const *__p)
   return ((struct __loadu_ps*)__p)->__v;
 }
 
-static __inline __m512 __attribute__((__always_inline__, __nodebug__))
+static __inline __m512 DEFAULT_FN_ATTRS
 _mm512_load_ps(double const *__p)
 {
   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p,
@@ -1324,7 +1327,7 @@ _mm512_load_ps(double const *__p)
                                                   (__mmask16) -1);
 }
 
-static __inline __m512d __attribute__((__always_inline__, __nodebug__))
+static __inline __m512d DEFAULT_FN_ATTRS
 _mm512_load_pd(float const *__p)
 {
   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p,
@@ -1335,65 +1338,65 @@ _mm512_load_pd(float const *__p)
 
 /* SIMD store ops */
 
-static __inline void __attribute__ ((__always_inline__, __nodebug__))
+static __inline void DEFAULT_FN_ATTRS
 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
 {
   __builtin_ia32_storedqudi512_mask ((__v8di *)__P, (__v8di) __A,
                                      (__mmask8) __U);
 }
 
-static __inline void __attribute__ ((__always_inline__, __nodebug__))
+static __inline void DEFAULT_FN_ATTRS
 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
 {
   __builtin_ia32_storedqusi512_mask ((__v16si *)__P, (__v16si) __A,
                                      (__mmask16) __U);
 }
 
-static __inline void __attribute__ ((__always_inline__, __nodebug__))
+static __inline void DEFAULT_FN_ATTRS
 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
 {
   __builtin_ia32_storeupd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
 }
 
-static __inline void __attribute__ ((__always_inline__, __nodebug__))
+static __inline void DEFAULT_FN_ATTRS
 _mm512_storeu_pd(void *__P, __m512d __A)
 {
   __builtin_ia32_storeupd512_mask((__v8df *)__P, (__v8df)__A, (__mmask8)-1);
 }
 
-static __inline void __attribute__ ((__always_inline__, __nodebug__))
+static __inline void DEFAULT_FN_ATTRS
 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
 {
   __builtin_ia32_storeups512_mask ((__v16sf *)__P, (__v16sf) __A,
                                    (__mmask16) __U);
 }
 
-static __inline void __attribute__ ((__always_inline__, __nodebug__))
+static __inline void DEFAULT_FN_ATTRS
 _mm512_storeu_ps(void *__P, __m512 __A)
 {
   __builtin_ia32_storeups512_mask((__v16sf *)__P, (__v16sf)__A, (__mmask16)-1);
 }
 
-static __inline void __attribute__ ((__always_inline__, __nodebug__))
+static __inline void DEFAULT_FN_ATTRS
 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
 {
   __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
 }
 
-static __inline void __attribute__ ((__always_inline__, __nodebug__))
+static __inline void DEFAULT_FN_ATTRS
 _mm512_store_pd(void *__P, __m512d __A)
 {
   *(__m512d*)__P = __A;
 }
 
-static __inline void __attribute__ ((__always_inline__, __nodebug__))
+static __inline void DEFAULT_FN_ATTRS
 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
 {
   __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
                                    (__mmask16) __U);
 }
 
-static __inline void __attribute__ ((__always_inline__, __nodebug__))
+static __inline void DEFAULT_FN_ATTRS
 _mm512_store_ps(void *__P, __m512 __A)
 {
   *(__m512*)__P = __A;
@@ -1401,7 +1404,7 @@ _mm512_store_ps(void *__P, __m512 __A)
 
 /* Mask ops */
 
-static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__))
+static __inline __mmask16 DEFAULT_FN_ATTRS
 _mm512_knot(__mmask16 __M)
 {
   return __builtin_ia32_knothi(__M);
@@ -1409,289 +1412,289 @@ _mm512_knot(__mmask16 __M)
 
 /* Integer compare */
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
                                                    (__mmask16)-1);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
                                                    __u);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
                                                  (__mmask16)-1);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
                                                  __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
                                                   __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
                                                 __u);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
                                                 (__mmask16)-1);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
                                                 __u);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
                                                  (__mmask16)-1);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
                                                  __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
                                                __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
                                                 __u);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
                                                    (__mmask16)-1);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
                                                    __u);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
                                                  (__mmask16)-1);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
                                                  __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
                                                   __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
                                                 __u);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm512_cmple_epi32_mask(__m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
                                                 (__mmask16)-1);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
                                                 __u);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm512_cmple_epu32_mask(__m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
                                                  (__mmask16)-1);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
                                                  __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm512_cmple_epi64_mask(__m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
                                                __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm512_cmple_epu64_mask(__m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
                                                 __u);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
                                                 (__mmask16)-1);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
                                                 __u);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
                                                  (__mmask16)-1);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
                                                  __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
                                                __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
                                                 __u);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
                                                 (__mmask16)-1);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
                                                 __u);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
                                                  (__mmask16)-1);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
                                                  __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
                                                __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
                                                 __u);
@@ -1744,4 +1747,7 @@ _mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   __m512i __b = (b); \
   (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
                                          (__mmask8)(m)); })
+
+#undef DEFAULT_FN_ATTRS
+
 #endif // __AVX512FINTRIN_H
diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512vlbwintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512vlbwintrin.h
index c3b087e..1fbffd4 100644
--- a/contrib/llvm/tools/clang/lib/Headers/avx512vlbwintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/avx512vlbwintrin.h
@@ -28,585 +28,588 @@
 #ifndef __AVX512VLBWINTRIN_H
 #define __AVX512VLBWINTRIN_H
 
+/* Define the default attributes for the functions in this file. */
+#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw")))
+
 /* Integer compare */
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm_cmpeq_epi8_mask(__m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_pcmpeqb128_mask((__v16qi)__a, (__v16qi)__b,
                                                    (__mmask16)-1);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm_mask_cmpeq_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_pcmpeqb128_mask((__v16qi)__a, (__v16qi)__b,
                                                    __u);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm_cmpeq_epu8_mask(__m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 0,
                                                  (__mmask16)-1);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm_mask_cmpeq_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 0,
                                                  __u);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm256_cmpeq_epi8_mask(__m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_pcmpeqb256_mask((__v32qi)__a, (__v32qi)__b,
                                                    (__mmask32)-1);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm256_mask_cmpeq_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_pcmpeqb256_mask((__v32qi)__a, (__v32qi)__b,
                                                    __u);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm256_cmpeq_epu8_mask(__m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 0,
                                                  (__mmask32)-1);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm256_mask_cmpeq_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 0,
                                                  __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_cmpeq_epi16_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqw128_mask((__v8hi)__a, (__v8hi)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_mask_cmpeq_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqw128_mask((__v8hi)__a, (__v8hi)__b,
                                                   __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_cmpeq_epu16_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 0,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_mask_cmpeq_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 0,
                                                 __u);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm256_cmpeq_epi16_mask(__m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_pcmpeqw256_mask((__v16hi)__a, (__v16hi)__b,
                                                    (__mmask16)-1);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm256_mask_cmpeq_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_pcmpeqw256_mask((__v16hi)__a, (__v16hi)__b,
                                                    __u);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm256_cmpeq_epu16_mask(__m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 0,
                                                  (__mmask16)-1);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm256_mask_cmpeq_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 0,
                                                  __u);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm_cmpge_epi8_mask(__m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 5,
                                                 (__mmask16)-1);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm_mask_cmpge_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 5,
                                                 __u);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm_cmpge_epu8_mask(__m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 5,
                                                  (__mmask16)-1);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm_mask_cmpge_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 5,
                                                  __u);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm256_cmpge_epi8_mask(__m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 5,
                                                 (__mmask32)-1);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm256_mask_cmpge_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 5,
                                                 __u);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm256_cmpge_epu8_mask(__m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 5,
                                                  (__mmask32)-1);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm256_mask_cmpge_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 5,
                                                  __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_cmpge_epi16_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 5,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_mask_cmpge_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 5,
                                                __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_cmpge_epu16_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 5,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_mask_cmpge_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 5,
                                                 __u);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm256_cmpge_epi16_mask(__m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 5,
                                                 (__mmask16)-1);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm256_mask_cmpge_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 5,
                                                 __u);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm256_cmpge_epu16_mask(__m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 5,
                                                  (__mmask16)-1);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm256_mask_cmpge_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 5,
                                                  __u);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm_cmpgt_epi8_mask(__m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_pcmpgtb128_mask((__v16qi)__a, (__v16qi)__b,
                                                    (__mmask16)-1);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm_mask_cmpgt_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_pcmpgtb128_mask((__v16qi)__a, (__v16qi)__b,
                                                    __u);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm_cmpgt_epu8_mask(__m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 6,
                                                  (__mmask16)-1);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm_mask_cmpgt_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 6,
                                                  __u);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm256_cmpgt_epi8_mask(__m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_pcmpgtb256_mask((__v32qi)__a, (__v32qi)__b,
                                                    (__mmask32)-1);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm256_mask_cmpgt_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_pcmpgtb256_mask((__v32qi)__a, (__v32qi)__b,
                                                    __u);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm256_cmpgt_epu8_mask(__m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 6,
                                                  (__mmask32)-1);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm256_mask_cmpgt_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 6,
                                                  __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_cmpgt_epi16_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtw128_mask((__v8hi)__a, (__v8hi)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_mask_cmpgt_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtw128_mask((__v8hi)__a, (__v8hi)__b,
                                                   __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_cmpgt_epu16_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 6,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_mask_cmpgt_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 6,
                                                 __u);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm256_cmpgt_epi16_mask(__m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_pcmpgtw256_mask((__v16hi)__a, (__v16hi)__b,
                                                    (__mmask16)-1);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm256_mask_cmpgt_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_pcmpgtw256_mask((__v16hi)__a, (__v16hi)__b,
                                                    __u);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm256_cmpgt_epu16_mask(__m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 6,
                                                  (__mmask16)-1);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm256_mask_cmpgt_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 6,
                                                  __u);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm_cmple_epi8_mask(__m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 2,
                                                 (__mmask16)-1);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm_mask_cmple_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 2,
                                                 __u);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm_cmple_epu8_mask(__m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 2,
                                                  (__mmask16)-1);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm_mask_cmple_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 2,
                                                  __u);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm256_cmple_epi8_mask(__m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 2,
                                                 (__mmask32)-1);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm256_mask_cmple_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 2,
                                                 __u);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm256_cmple_epu8_mask(__m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 2,
                                                  (__mmask32)-1);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm256_mask_cmple_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 2,
                                                  __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_cmple_epi16_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 2,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_mask_cmple_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 2,
                                                __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_cmple_epu16_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 2,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_mask_cmple_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 2,
                                                 __u);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm256_cmple_epi16_mask(__m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 2,
                                                 (__mmask16)-1);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm256_mask_cmple_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 2,
                                                 __u);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm256_cmple_epu16_mask(__m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 2,
                                                  (__mmask16)-1);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm256_mask_cmple_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 2,
                                                  __u);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm_cmplt_epi8_mask(__m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 1,
                                                 (__mmask16)-1);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm_mask_cmplt_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 1,
                                                 __u);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm_cmplt_epu8_mask(__m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 1,
                                                  (__mmask16)-1);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm_mask_cmplt_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 1,
                                                  __u);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm256_cmplt_epi8_mask(__m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 1,
                                                 (__mmask32)-1);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm256_mask_cmplt_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 1,
                                                 __u);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm256_cmplt_epu8_mask(__m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 1,
                                                  (__mmask32)-1);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm256_mask_cmplt_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 1,
                                                  __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_cmplt_epi16_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 1,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_mask_cmplt_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 1,
                                                __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_cmplt_epu16_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 1,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_mask_cmplt_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 1,
                                                 __u);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm256_cmplt_epi16_mask(__m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 1,
                                                 (__mmask16)-1);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm256_mask_cmplt_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 1,
                                                 __u);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm256_cmplt_epu16_mask(__m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 1,
                                                  (__mmask16)-1);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm256_mask_cmplt_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 1,
                                                  __u);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm_cmpneq_epi8_mask(__m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 4,
                                                 (__mmask16)-1);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm_mask_cmpneq_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 4,
                                                 __u);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm_cmpneq_epu8_mask(__m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 4,
                                                  (__mmask16)-1);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm_mask_cmpneq_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 4,
                                                  __u);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm256_cmpneq_epi8_mask(__m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 4,
                                                 (__mmask32)-1);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm256_mask_cmpneq_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 4,
                                                 __u);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm256_cmpneq_epu8_mask(__m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 4,
                                                  (__mmask32)-1);
 }
 
-static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask32 DEFAULT_FN_ATTRS
 _mm256_mask_cmpneq_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 4,
                                                  __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_cmpneq_epi16_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 4,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_mask_cmpneq_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 4,
                                                __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_cmpneq_epu16_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 4,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_mask_cmpneq_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 4,
                                                 __u);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm256_cmpneq_epi16_mask(__m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 4,
                                                 (__mmask16)-1);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm256_mask_cmpneq_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 4,
                                                 __u);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm256_cmpneq_epu16_mask(__m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 4,
                                                  (__mmask16)-1);
 }
 
-static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask16 DEFAULT_FN_ATTRS
 _mm256_mask_cmpneq_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 4,
                                                  __u);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_mask_add_epi8 (__m256i __W, __mmask32 __U, __m256i __A, __m256i __B){
   return (__m256i) __builtin_ia32_paddb256_mask ((__v32qi) __A,
              (__v32qi) __B,
@@ -614,7 +617,7 @@ _mm256_mask_add_epi8 (__m256i __W, __mmask32 __U, __m256i __A, __m256i __B){
              (__mmask32) __U);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_maskz_add_epi8 (__mmask32 __U, __m256i __A, __m256i __B) {
   return (__m256i) __builtin_ia32_paddb256_mask ((__v32qi) __A,
              (__v32qi) __B,
@@ -623,7 +626,7 @@ _mm256_maskz_add_epi8 (__mmask32 __U, __m256i __A, __m256i __B) {
              (__mmask32) __U);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_mask_add_epi16 (__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i) __builtin_ia32_paddw256_mask ((__v16hi) __A,
              (__v16hi) __B,
@@ -631,7 +634,7 @@ _mm256_mask_add_epi16 (__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
              (__mmask16) __U);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_maskz_add_epi16 (__mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i) __builtin_ia32_paddw256_mask ((__v16hi) __A,
              (__v16hi) __B,
@@ -640,7 +643,7 @@ _mm256_maskz_add_epi16 (__mmask16 __U, __m256i __A, __m256i __B) {
              (__mmask16) __U);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_mask_sub_epi8 (__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
   return (__m256i) __builtin_ia32_psubb256_mask ((__v32qi) __A,
              (__v32qi) __B,
@@ -648,7 +651,7 @@ _mm256_mask_sub_epi8 (__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
              (__mmask32) __U);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_maskz_sub_epi8 (__mmask32 __U, __m256i __A, __m256i __B) {
   return (__m256i) __builtin_ia32_psubb256_mask ((__v32qi) __A,
              (__v32qi) __B,
@@ -657,7 +660,7 @@ _mm256_maskz_sub_epi8 (__mmask32 __U, __m256i __A, __m256i __B) {
              (__mmask32) __U);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_mask_sub_epi16 (__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i) __builtin_ia32_psubw256_mask ((__v16hi) __A,
              (__v16hi) __B,
@@ -665,7 +668,7 @@ _mm256_mask_sub_epi16 (__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
              (__mmask16) __U);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_maskz_sub_epi16 (__mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i) __builtin_ia32_psubw256_mask ((__v16hi) __A,
              (__v16hi) __B,
@@ -673,7 +676,7 @@ _mm256_maskz_sub_epi16 (__mmask16 __U, __m256i __A, __m256i __B) {
              _mm256_setzero_si256 (),
              (__mmask16) __U);
 }
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_mask_add_epi8 (__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
   return (__m128i) __builtin_ia32_paddb128_mask ((__v16qi) __A,
              (__v16qi) __B,
@@ -681,7 +684,7 @@ _mm_mask_add_epi8 (__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
              (__mmask16) __U);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_maskz_add_epi8 (__mmask16 __U, __m128i __A, __m128i __B) {
   return (__m128i) __builtin_ia32_paddb128_mask ((__v16qi) __A,
              (__v16qi) __B,
@@ -690,7 +693,7 @@ _mm_maskz_add_epi8 (__mmask16 __U, __m128i __A, __m128i __B) {
              (__mmask16) __U);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_mask_add_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i) __builtin_ia32_paddw128_mask ((__v8hi) __A,
              (__v8hi) __B,
@@ -698,7 +701,7 @@ _mm_mask_add_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_maskz_add_epi16 (__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i) __builtin_ia32_paddw128_mask ((__v8hi) __A,
              (__v8hi) __B,
@@ -707,7 +710,7 @@ _mm_maskz_add_epi16 (__mmask8 __U, __m128i __A, __m128i __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_mask_sub_epi8 (__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
   return (__m128i) __builtin_ia32_psubb128_mask ((__v16qi) __A,
              (__v16qi) __B,
@@ -715,7 +718,7 @@ _mm_mask_sub_epi8 (__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
              (__mmask16) __U);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_maskz_sub_epi8 (__mmask16 __U, __m128i __A, __m128i __B) {
   return (__m128i) __builtin_ia32_psubb128_mask ((__v16qi) __A,
              (__v16qi) __B,
@@ -724,7 +727,7 @@ _mm_maskz_sub_epi8 (__mmask16 __U, __m128i __A, __m128i __B) {
              (__mmask16) __U);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_mask_sub_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i) __builtin_ia32_psubw128_mask ((__v8hi) __A,
              (__v8hi) __B,
@@ -732,7 +735,7 @@ _mm_mask_sub_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_maskz_sub_epi16 (__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i) __builtin_ia32_psubw128_mask ((__v8hi) __A,
              (__v8hi) __B,
@@ -741,7 +744,7 @@ _mm_maskz_sub_epi16 (__mmask8 __U, __m128i __A, __m128i __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_mask_mullo_epi16 (__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i) __builtin_ia32_pmullw256_mask ((__v16hi) __A,
               (__v16hi) __B,
@@ -749,7 +752,7 @@ _mm256_mask_mullo_epi16 (__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
               (__mmask16) __U);
 }
 
-static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_maskz_mullo_epi16 (__mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i) __builtin_ia32_pmullw256_mask ((__v16hi) __A,
               (__v16hi) __B,
@@ -758,7 +761,7 @@ _mm256_maskz_mullo_epi16 (__mmask16 __U, __m256i __A, __m256i __B) {
               (__mmask16) __U);
 }
 
-static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_mask_mullo_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i) __builtin_ia32_pmullw128_mask ((__v8hi) __A,
               (__v8hi) __B,
@@ -766,7 +769,7 @@ _mm_mask_mullo_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
               (__mmask8) __U);
 }
 
-static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_maskz_mullo_epi16 (__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i) __builtin_ia32_pmullw128_mask ((__v8hi) __A,
               (__v8hi) __B,
@@ -854,4 +857,6 @@ _mm_maskz_mullo_epi16 (__mmask8 __U, __m128i __A, __m128i __B) {
                                           (__v16hi)(__m256i)(b), \
                                           (p), (__mmask16)(m)); })
 
+#undef DEFAULT_FN_ATTRS
+
 #endif /* __AVX512VLBWINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512vldqintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512vldqintrin.h
index 4024446..2a32edd 100644
--- a/contrib/llvm/tools/clang/lib/Headers/avx512vldqintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/avx512vldqintrin.h
@@ -28,13 +28,15 @@
 #ifndef __AVX512VLDQINTRIN_H
 #define __AVX512VLDQINTRIN_H
 
+/* Define the default attributes for the functions in this file. */
+#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq")))
 
-static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_mullo_epi64 (__m256i __A, __m256i __B) {
   return (__m256i) ((__v4di) __A * (__v4di) __B);
 }
 
-static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
               (__v4di) __B,
@@ -42,7 +44,7 @@ _mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
               (__mmask8) __U);
 }
 
-static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
               (__v4di) __B,
@@ -51,12 +53,12 @@ _mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) {
               (__mmask8) __U);
 }
 
-static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_mullo_epi64 (__m128i __A, __m128i __B) {
   return (__m128i) ((__v2di) __A * (__v2di) __B);
 }
 
-static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
               (__v2di) __B,
@@ -64,7 +66,7 @@ _mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
               (__mmask8) __U);
 }
 
-static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
               (__v2di) __B,
@@ -73,7 +75,7 @@ _mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) {
               (__mmask8) __U);
 }
 
-static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256d DEFAULT_FN_ATTRS
 _mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
               (__v4df) __B,
@@ -81,7 +83,7 @@ _mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
               (__mmask8) __U);
 }
 
-static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256d DEFAULT_FN_ATTRS
 _mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
               (__v4df) __B,
@@ -90,7 +92,7 @@ _mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) {
               (__mmask8) __U);
 }
 
-static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
               (__v2df) __B,
@@ -98,7 +100,7 @@ _mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
               (__mmask8) __U);
 }
 
-static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
               (__v2df) __B,
@@ -107,7 +109,7 @@ _mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) {
               (__mmask8) __U);
 }
 
-static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256 DEFAULT_FN_ATTRS
 _mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
              (__v8sf) __B,
@@ -115,7 +117,7 @@ _mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256 DEFAULT_FN_ATTRS
 _mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
              (__v8sf) __B,
@@ -124,7 +126,7 @@ _mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
              (__v4sf) __B,
@@ -132,7 +134,7 @@ _mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
              (__v4sf) __B,
@@ -141,7 +143,7 @@ _mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256d DEFAULT_FN_ATTRS
 _mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
              (__v4df) __B,
@@ -149,7 +151,7 @@ _mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256d DEFAULT_FN_ATTRS
 _mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
              (__v4df) __B,
@@ -158,7 +160,7 @@ _mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
              (__v2df) __B,
@@ -166,7 +168,7 @@ _mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
              (__v2df) __B,
@@ -175,7 +177,7 @@ _mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256 DEFAULT_FN_ATTRS
 _mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
             (__v8sf) __B,
@@ -183,7 +185,7 @@ _mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
             (__mmask8) __U);
 }
 
-static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256 DEFAULT_FN_ATTRS
 _mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
             (__v8sf) __B,
@@ -192,7 +194,7 @@ _mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) {
             (__mmask8) __U);
 }
 
-static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
             (__v4sf) __B,
@@ -200,7 +202,7 @@ _mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
             (__mmask8) __U);
 }
 
-static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
             (__v4sf) __B,
@@ -209,7 +211,7 @@ _mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) {
             (__mmask8) __U);
 }
 
-static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256d DEFAULT_FN_ATTRS
 _mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A,
         __m256d __B) {
   return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
@@ -218,7 +220,7 @@ _mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A,
              (__mmask8) __U);
 }
 
-static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256d DEFAULT_FN_ATTRS
 _mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
              (__v4df) __B,
@@ -227,7 +229,7 @@ _mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
              (__v2df) __B,
@@ -235,7 +237,7 @@ _mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
              (__v2df) __B,
@@ -244,7 +246,7 @@ _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256 DEFAULT_FN_ATTRS
 _mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
             (__v8sf) __B,
@@ -252,7 +254,7 @@ _mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
             (__mmask8) __U);
 }
 
-static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256 DEFAULT_FN_ATTRS
 _mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
             (__v8sf) __B,
@@ -261,7 +263,7 @@ _mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) {
             (__mmask8) __U);
 }
 
-static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
             (__v4sf) __B,
@@ -269,7 +271,7 @@ _mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
             (__mmask8) __U);
 }
 
-static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
             (__v4sf) __B,
@@ -278,7 +280,7 @@ _mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) {
             (__mmask8) __U);
 }
 
-static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256d DEFAULT_FN_ATTRS
 _mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
             (__v4df) __B,
@@ -286,7 +288,7 @@ _mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
             (__mmask8) __U);
 }
 
-static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256d DEFAULT_FN_ATTRS
 _mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
             (__v4df) __B,
@@ -295,7 +297,7 @@ _mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) {
             (__mmask8) __U);
 }
 
-static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
             (__v2df) __B,
@@ -303,7 +305,7 @@ _mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
             (__mmask8) __U);
 }
 
-static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
             (__v2df) __B,
@@ -312,7 +314,7 @@ _mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) {
             (__mmask8) __U);
 }
 
-static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256 DEFAULT_FN_ATTRS
 _mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
                  (__v8sf) __B,
@@ -320,7 +322,7 @@ _mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
                  (__mmask8) __U);
 }
 
-static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256 DEFAULT_FN_ATTRS
 _mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
                  (__v8sf) __B,
@@ -329,7 +331,7 @@ _mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) {
                  (__mmask8) __U);
 }
 
-static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
                  (__v4sf) __B,
@@ -337,7 +339,7 @@ _mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
                  (__mmask8) __U);
 }
 
-static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
                  (__v4sf) __B,
@@ -346,4 +348,6 @@ _mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) {
                  (__mmask8) __U);
 }
 
+#undef DEFAULT_FN_ATTRS
+
 #endif
diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512vlintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512vlintrin.h
index 9de0cf4..59ff5eb 100644
--- a/contrib/llvm/tools/clang/lib/Headers/avx512vlintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/avx512vlintrin.h
@@ -28,196 +28,199 @@
 #ifndef __AVX512VLINTRIN_H
 #define __AVX512VLINTRIN_H
 
+/* Define the default attributes for the functions in this file. */
+#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl")))
+
 /* Integer compare */
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_cmpeq_epi32_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_mask_cmpeq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b,
                                                   __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_cmpeq_epu32_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 0,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_mask_cmpeq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 0,
                                                 __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_cmpeq_epi32_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_mask_cmpeq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b,
                                                   __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_cmpeq_epu32_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 0,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_mask_cmpeq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 0,
                                                 __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_cmpeq_epi64_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_mask_cmpeq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b,
                                                   __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_cmpeq_epu64_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 0,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_mask_cmpeq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 0,
                                                 __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_cmpeq_epi64_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_mask_cmpeq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b,
                                                   __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_cmpeq_epu64_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 0,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_mask_cmpeq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 0,
                                                 __u);
 }
 
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_cmpge_epi32_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 5,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_mask_cmpge_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 5,
                                                __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_cmpge_epu32_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 5,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_mask_cmpge_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 5,
                                                 __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_cmpge_epi32_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 5,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_mask_cmpge_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 5,
                                                __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_cmpge_epu32_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 5,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_mask_cmpge_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 5,
                                                 __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_cmpge_epi64_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 5,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_mask_cmpge_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 5,
                                                __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_cmpge_epu64_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 5,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_mask_cmpge_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 5,
                                                 __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_cmpge_epi64_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 5,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_mask_cmpge_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 5,
                                                __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_cmpge_epu64_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 5,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_mask_cmpge_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 5,
                                                 __u);
@@ -226,391 +229,391 @@ _mm256_mask_cmpge_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
 
 
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_cmpgt_epi32_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_mask_cmpgt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b,
                                                   __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_cmpgt_epu32_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 6,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_mask_cmpgt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 6,
                                                 __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_cmpgt_epi32_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_mask_cmpgt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b,
                                                   __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_cmpgt_epu32_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 6,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_mask_cmpgt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 6,
                                                 __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_cmpgt_epi64_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_mask_cmpgt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b,
                                                   __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_cmpgt_epu64_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 6,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_mask_cmpgt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 6,
                                                 __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_cmpgt_epi64_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_mask_cmpgt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b,
                                                   __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_cmpgt_epu64_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 6,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_mask_cmpgt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 6,
                                                 __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_cmple_epi32_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 2,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_mask_cmple_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 2,
                                                __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_cmple_epu32_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 2,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_mask_cmple_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 2,
                                                 __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_cmple_epi32_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 2,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_mask_cmple_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 2,
                                                __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_cmple_epu32_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 2,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_mask_cmple_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 2,
                                                 __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_cmple_epi64_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 2,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_mask_cmple_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 2,
                                                __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_cmple_epu64_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 2,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_mask_cmple_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 2,
                                                 __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_cmple_epi64_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 2,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_mask_cmple_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 2,
                                                __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_cmple_epu64_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 2,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_mask_cmple_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 2,
                                                 __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_cmplt_epi32_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 1,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_mask_cmplt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 1,
                                                __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_cmplt_epu32_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 1,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_mask_cmplt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 1,
                                                 __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_cmplt_epi32_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 1,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_mask_cmplt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 1,
                                                __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_cmplt_epu32_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 1,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_mask_cmplt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 1,
                                                 __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_cmplt_epi64_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 1,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_mask_cmplt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 1,
                                                __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_cmplt_epu64_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 1,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_mask_cmplt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 1,
                                                 __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_cmplt_epi64_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 1,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_mask_cmplt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 1,
                                                __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_cmplt_epu64_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 1,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_mask_cmplt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 1,
                                                 __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_cmpneq_epi32_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 4,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_mask_cmpneq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 4,
                                                __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_cmpneq_epu32_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 4,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_mask_cmpneq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 4,
                                                 __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_cmpneq_epi32_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 4,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_mask_cmpneq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 4,
                                                __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_cmpneq_epu32_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 4,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_mask_cmpneq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 4,
                                                 __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_cmpneq_epi64_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 4,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_mask_cmpneq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 4,
                                                __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_cmpneq_epu64_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 4,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm_mask_cmpneq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 4,
                                                 __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_cmpneq_epi64_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 4,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_mask_cmpneq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 4,
                                                __u);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_cmpneq_epu64_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 4,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __mmask8 DEFAULT_FN_ATTRS
 _mm256_mask_cmpneq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 4,
                                                 __u);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_mask_add_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
            __m256i __B)
 {
@@ -620,7 +623,7 @@ _mm256_mask_add_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
              (__mmask8) __U);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_maskz_add_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
@@ -630,7 +633,7 @@ _mm256_maskz_add_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
              (__mmask8) __U);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_mask_add_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
            __m256i __B)
 {
@@ -640,7 +643,7 @@ _mm256_mask_add_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
              (__mmask8) __U);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_maskz_add_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
@@ -650,7 +653,7 @@ _mm256_maskz_add_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
              (__mmask8) __U);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_mask_sub_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
            __m256i __B)
 {
@@ -660,7 +663,7 @@ _mm256_mask_sub_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
              (__mmask8) __U);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_maskz_sub_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
@@ -670,7 +673,7 @@ _mm256_maskz_sub_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
              (__mmask8) __U);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_mask_sub_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
            __m256i __B)
 {
@@ -680,7 +683,7 @@ _mm256_mask_sub_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
              (__mmask8) __U);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_maskz_sub_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
@@ -690,7 +693,7 @@ _mm256_maskz_sub_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
              (__mmask8) __U);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_mask_add_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
         __m128i __B)
 {
@@ -700,7 +703,7 @@ _mm_mask_add_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
              (__mmask8) __U);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_maskz_add_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
@@ -710,7 +713,7 @@ _mm_maskz_add_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
              (__mmask8) __U);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_mask_add_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
         __m128i __B)
 {
@@ -720,7 +723,7 @@ _mm_mask_add_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
              (__mmask8) __U);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_maskz_add_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
@@ -730,7 +733,7 @@ _mm_maskz_add_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
              (__mmask8) __U);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_mask_sub_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
         __m128i __B)
 {
@@ -740,7 +743,7 @@ _mm_mask_sub_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
              (__mmask8) __U);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_maskz_sub_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
@@ -750,7 +753,7 @@ _mm_maskz_sub_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
              (__mmask8) __U);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_mask_sub_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
         __m128i __B)
 {
@@ -760,7 +763,7 @@ _mm_mask_sub_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
              (__mmask8) __U);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
@@ -770,7 +773,7 @@ _mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
              (__mmask8) __U);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
            __m256i __Y)
 {
@@ -779,7 +782,7 @@ _mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
               (__v4di) __W, __M);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
 {
   return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
@@ -789,7 +792,7 @@ _mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
               __M);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X,
         __m128i __Y)
 {
@@ -798,7 +801,7 @@ _mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X,
               (__v2di) __W, __M);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y)
 {
   return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
@@ -808,7 +811,7 @@ _mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y)
               __M);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X,
            __m256i __Y)
 {
@@ -817,7 +820,7 @@ _mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X,
                (__v4di) __W, __M);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y)
 {
   return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
@@ -827,7 +830,7 @@ _mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y)
                __M);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X,
         __m128i __Y)
 {
@@ -836,7 +839,7 @@ _mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X,
                (__v2di) __W, __M);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y)
 {
   return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
@@ -846,7 +849,7 @@ _mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y)
                __M);
 }
 
-static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
@@ -856,7 +859,7 @@ _mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
               __M);
 }
 
-static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
        __m256i __B)
 {
@@ -865,7 +868,7 @@ _mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
               (__v8si) __W, __M);
 }
 
-static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_maskz_mullo_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
@@ -875,7 +878,7 @@ _mm_maskz_mullo_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
               __M);
 }
 
-static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_mask_mullo_epi32 (__m128i __W, __mmask16 __M, __m128i __A,
           __m128i __B)
 {
@@ -884,7 +887,7 @@ _mm_mask_mullo_epi32 (__m128i __W, __mmask16 __M, __m128i __A,
               (__v4si) __W, __M);
 }
 
-static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_mask_and_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
            __m256i __B)
 {
@@ -894,7 +897,7 @@ _mm256_mask_and_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
              (__mmask8) __U);
 }
 
-static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_maskz_and_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
@@ -904,7 +907,7 @@ _mm256_maskz_and_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
              (__mmask8) __U);
 }
 
-static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_mask_and_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
@@ -913,7 +916,7 @@ _mm_mask_and_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
              (__mmask8) __U);
 }
 
-static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_maskz_and_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
@@ -923,7 +926,7 @@ _mm_maskz_and_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
              (__mmask8) __U);
 }
 
-static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
         __m256i __B)
 {
@@ -933,7 +936,7 @@ _mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
               (__mmask8) __U);
 }
 
-static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
@@ -943,7 +946,7 @@ _mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
               (__mmask8) __U);
 }
 
-static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
            __m128i __B)
 {
@@ -953,7 +956,7 @@ _mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
               (__mmask8) __U);
 }
 
-static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
@@ -963,7 +966,7 @@ _mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
               (__mmask8) __U);
 }
 
-static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
           __m256i __B)
 {
@@ -973,7 +976,7 @@ _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
             (__mmask8) __U);
 }
 
-static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_maskz_or_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
@@ -983,7 +986,7 @@ _mm256_maskz_or_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
             (__mmask8) __U);
 }
 
-static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_mask_or_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
@@ -992,7 +995,7 @@ _mm_mask_or_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
             (__mmask8) __U);
 }
 
-static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_maskz_or_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
@@ -1002,7 +1005,7 @@ _mm_maskz_or_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
             (__mmask8) __U);
 }
 
-static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_mask_xor_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
            __m256i __B)
 {
@@ -1012,7 +1015,7 @@ _mm256_mask_xor_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
              (__mmask8) __U);
 }
 
-static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_maskz_xor_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
@@ -1022,7 +1025,7 @@ _mm256_maskz_xor_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
              (__mmask8) __U);
 }
 
-static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_mask_xor_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
         __m128i __B)
 {
@@ -1032,7 +1035,7 @@ _mm_mask_xor_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
              (__mmask8) __U);
 }
 
-static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_maskz_xor_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
@@ -1042,7 +1045,7 @@ _mm_maskz_xor_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
              (__mmask8) __U);
 }
 
-static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_mask_and_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
            __m256i __B)
 {
@@ -1051,7 +1054,7 @@ _mm256_mask_and_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
              (__v4di) __W, __U);
 }
 
-static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_maskz_and_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
@@ -1061,7 +1064,7 @@ _mm256_maskz_and_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
              __U);
 }
 
-static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_mask_and_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
         __m128i __B)
 {
@@ -1070,7 +1073,7 @@ _mm_mask_and_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
              (__v2di) __W, __U);
 }
 
-static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_maskz_and_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
@@ -1080,7 +1083,7 @@ _mm_maskz_and_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
              __U);
 }
 
-static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
         __m256i __B)
 {
@@ -1089,7 +1092,7 @@ _mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
               (__v4di) __W, __U);
 }
 
-static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
@@ -1099,7 +1102,7 @@ _mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
               __U);
 }
 
-static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
            __m128i __B)
 {
@@ -1108,7 +1111,7 @@ _mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
               (__v2di) __W, __U);
 }
 
-static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_maskz_andnot_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
@@ -1118,7 +1121,7 @@ _mm_maskz_andnot_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
               __U);
 }
 
-static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_mask_or_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
           __m256i __B)
 {
@@ -1128,7 +1131,7 @@ _mm256_mask_or_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
             (__mmask8) __U);
 }
 
-static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_maskz_or_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
@@ -1138,7 +1141,7 @@ _mm256_maskz_or_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
             (__mmask8) __U);
 }
 
-static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_mask_or_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
@@ -1147,7 +1150,7 @@ _mm_mask_or_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
             (__mmask8) __U);
 }
 
-static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_maskz_or_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
@@ -1157,7 +1160,7 @@ _mm_maskz_or_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
             (__mmask8) __U);
 }
 
-static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_mask_xor_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
            __m256i __B)
 {
@@ -1167,7 +1170,7 @@ _mm256_mask_xor_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
              (__mmask8) __U);
 }
 
-static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_maskz_xor_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
@@ -1177,7 +1180,7 @@ _mm256_maskz_xor_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
              (__mmask8) __U);
 }
 
-static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_mask_xor_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
         __m128i __B)
 {
@@ -1187,7 +1190,7 @@ _mm_mask_xor_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
              (__mmask8) __U);
 }
 
-static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
@@ -1316,4 +1319,7 @@ _mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
   (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128)(a), \
                                          (__v2df)(__m128)(b), \
                                          (p), (__mmask8)(m)); })
+
+#undef DEFAULT_FN_ATTRS
+
 #endif /* __AVX512VLINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/avxintrin.h b/contrib/llvm/tools/clang/lib/Headers/avxintrin.h
index 4907965..d28a915 100644
--- a/contrib/llvm/tools/clang/lib/Headers/avxintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/avxintrin.h
@@ -39,110 +39,113 @@ typedef float __m256 __attribute__ ((__vector_size__ (32)));
 typedef double __m256d __attribute__((__vector_size__(32)));
 typedef long long __m256i __attribute__((__vector_size__(32)));
 
+/* Define the default attributes for the functions in this file. */
+#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx")))
+
 /* Arithmetic */
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline __m256d DEFAULT_FN_ATTRS
 _mm256_add_pd(__m256d __a, __m256d __b)
 {
   return __a+__b;
 }
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_add_ps(__m256 __a, __m256 __b)
 {
   return __a+__b;
 }
 
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline __m256d DEFAULT_FN_ATTRS
 _mm256_sub_pd(__m256d __a, __m256d __b)
 {
   return __a-__b;
 }
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_sub_ps(__m256 __a, __m256 __b)
 {
   return __a-__b;
 }
 
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline __m256d DEFAULT_FN_ATTRS
 _mm256_addsub_pd(__m256d __a, __m256d __b)
 {
   return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b);
 }
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_addsub_ps(__m256 __a, __m256 __b)
 {
   return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b);
 }
 
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline __m256d DEFAULT_FN_ATTRS
 _mm256_div_pd(__m256d __a, __m256d __b)
 {
   return __a / __b;
 }
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_div_ps(__m256 __a, __m256 __b)
 {
   return __a / __b;
 }
 
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline __m256d DEFAULT_FN_ATTRS
 _mm256_max_pd(__m256d __a, __m256d __b)
 {
   return (__m256d)__builtin_ia32_maxpd256((__v4df)__a, (__v4df)__b);
 }
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_max_ps(__m256 __a, __m256 __b)
 {
   return (__m256)__builtin_ia32_maxps256((__v8sf)__a, (__v8sf)__b);
 }
 
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline __m256d DEFAULT_FN_ATTRS
 _mm256_min_pd(__m256d __a, __m256d __b)
 {
   return (__m256d)__builtin_ia32_minpd256((__v4df)__a, (__v4df)__b);
 }
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_min_ps(__m256 __a, __m256 __b)
 {
   return (__m256)__builtin_ia32_minps256((__v8sf)__a, (__v8sf)__b);
 }
 
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline __m256d DEFAULT_FN_ATTRS
 _mm256_mul_pd(__m256d __a, __m256d __b)
 {
   return __a * __b;
 }
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_mul_ps(__m256 __a, __m256 __b)
 {
   return __a * __b;
 }
 
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline __m256d DEFAULT_FN_ATTRS
 _mm256_sqrt_pd(__m256d __a)
 {
   return (__m256d)__builtin_ia32_sqrtpd256((__v4df)__a);
 }
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_sqrt_ps(__m256 __a)
 {
   return (__m256)__builtin_ia32_sqrtps256((__v8sf)__a);
 }
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_rsqrt_ps(__m256 __a)
 {
   return (__m256)__builtin_ia32_rsqrtps256((__v8sf)__a);
 }
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_rcp_ps(__m256 __a)
 {
   return (__m256)__builtin_ia32_rcpps256((__v8sf)__a);
@@ -162,99 +165,99 @@ _mm256_rcp_ps(__m256 __a)
 #define _mm256_floor_ps(V) _mm256_round_ps((V), _MM_FROUND_FLOOR)
 
 /* Logical */
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline __m256d DEFAULT_FN_ATTRS
 _mm256_and_pd(__m256d __a, __m256d __b)
 {
   return (__m256d)((__v4di)__a & (__v4di)__b);
 }
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_and_ps(__m256 __a, __m256 __b)
 {
   return (__m256)((__v8si)__a & (__v8si)__b);
 }
 
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline __m256d DEFAULT_FN_ATTRS
 _mm256_andnot_pd(__m256d __a, __m256d __b)
 {
   return (__m256d)(~(__v4di)__a & (__v4di)__b);
 }
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_andnot_ps(__m256 __a, __m256 __b)
 {
   return (__m256)(~(__v8si)__a & (__v8si)__b);
 }
 
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline __m256d DEFAULT_FN_ATTRS
 _mm256_or_pd(__m256d __a, __m256d __b)
 {
   return (__m256d)((__v4di)__a | (__v4di)__b);
 }
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_or_ps(__m256 __a, __m256 __b)
 {
   return (__m256)((__v8si)__a | (__v8si)__b);
 }
 
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline __m256d DEFAULT_FN_ATTRS
 _mm256_xor_pd(__m256d __a, __m256d __b)
 {
   return (__m256d)((__v4di)__a ^ (__v4di)__b);
 }
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_xor_ps(__m256 __a, __m256 __b)
 {
   return (__m256)((__v8si)__a ^ (__v8si)__b);
 }
 
 /* Horizontal arithmetic */
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline __m256d DEFAULT_FN_ATTRS
 _mm256_hadd_pd(__m256d __a, __m256d __b)
 {
   return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b);
 }
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_hadd_ps(__m256 __a, __m256 __b)
 {
   return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b);
 }
 
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline __m256d DEFAULT_FN_ATTRS
 _mm256_hsub_pd(__m256d __a, __m256d __b)
 {
   return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b);
 }
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_hsub_ps(__m256 __a, __m256 __b)
 {
   return (__m256)__builtin_ia32_hsubps256((__v8sf)__a, (__v8sf)__b);
 }
 
 /* Vector permutations */
-static __inline __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline __m128d DEFAULT_FN_ATTRS
 _mm_permutevar_pd(__m128d __a, __m128i __c)
 {
   return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c);
 }
 
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline __m256d DEFAULT_FN_ATTRS
 _mm256_permutevar_pd(__m256d __a, __m256i __c)
 {
   return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c);
 }
 
-static __inline __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline __m128 DEFAULT_FN_ATTRS
 _mm_permutevar_ps(__m128 __a, __m128i __c)
 {
   return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c);
 }
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_permutevar_ps(__m256 __a, __m256i __c)
 {
   return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c);
@@ -326,14 +329,14 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c)
                                   (((M) & 0x40) ? 14 : 6), \
                                   (((M) & 0x80) ? 15 : 7)); })
 
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline __m256d DEFAULT_FN_ATTRS
 _mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c)
 {
   return (__m256d)__builtin_ia32_blendvpd256(
     (__v4df)__a, (__v4df)__b, (__v4df)__c);
 }
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
 {
   return (__m256)__builtin_ia32_blendvps256(
@@ -429,21 +432,21 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
   __m128 __b = (b); \
   (__m128)__builtin_ia32_cmpss((__v4sf)__a, (__v4sf)__b, (c)); })
 
-static __inline int __attribute__((__always_inline__, __nodebug__))
+static __inline int DEFAULT_FN_ATTRS
 _mm256_extract_epi32(__m256i __a, const int __imm)
 {
   __v8si __b = (__v8si)__a;
   return __b[__imm & 7];
 }
 
-static __inline int __attribute__((__always_inline__, __nodebug__))
+static __inline int DEFAULT_FN_ATTRS
 _mm256_extract_epi16(__m256i __a, const int __imm)
 {
   __v16hi __b = (__v16hi)__a;
   return __b[__imm & 15];
 }
 
-static __inline int __attribute__((__always_inline__, __nodebug__))
+static __inline int DEFAULT_FN_ATTRS
 _mm256_extract_epi8(__m256i __a, const int __imm)
 {
   __v32qi __b = (__v32qi)__a;
@@ -451,7 +454,7 @@ _mm256_extract_epi8(__m256i __a, const int __imm)
 }
 
 #ifdef __x86_64__
-static __inline long long  __attribute__((__always_inline__, __nodebug__))
+static __inline long long  DEFAULT_FN_ATTRS
 _mm256_extract_epi64(__m256i __a, const int __imm)
 {
   __v4di __b = (__v4di)__a;
@@ -459,7 +462,7 @@ _mm256_extract_epi64(__m256i __a, const int __imm)
 }
 #endif
 
-static __inline __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline __m256i DEFAULT_FN_ATTRS
 _mm256_insert_epi32(__m256i __a, int __b, int const __imm)
 {
   __v8si __c = (__v8si)__a;
@@ -467,7 +470,7 @@ _mm256_insert_epi32(__m256i __a, int __b, int const __imm)
   return (__m256i)__c;
 }
 
-static __inline __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline __m256i DEFAULT_FN_ATTRS
 _mm256_insert_epi16(__m256i __a, int __b, int const __imm)
 {
   __v16hi __c = (__v16hi)__a;
@@ -475,7 +478,7 @@ _mm256_insert_epi16(__m256i __a, int __b, int const __imm)
   return (__m256i)__c;
 }
 
-static __inline __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline __m256i DEFAULT_FN_ATTRS
 _mm256_insert_epi8(__m256i __a, int __b, int const __imm)
 {
   __v32qi __c = (__v32qi)__a;
@@ -484,7 +487,7 @@ _mm256_insert_epi8(__m256i __a, int __b, int const __imm)
 }
 
 #ifdef __x86_64__
-static __inline __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline __m256i DEFAULT_FN_ATTRS
 _mm256_insert_epi64(__m256i __a, long long __b, int const __imm)
 {
   __v4di __c = (__v4di)__a;
@@ -494,263 +497,263 @@ _mm256_insert_epi64(__m256i __a, long long __b, int const __imm)
 #endif
 
 /* Conversion */
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline __m256d DEFAULT_FN_ATTRS
 _mm256_cvtepi32_pd(__m128i __a)
 {
   return (__m256d)__builtin_ia32_cvtdq2pd256((__v4si) __a);
 }
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_cvtepi32_ps(__m256i __a)
 {
   return (__m256)__builtin_ia32_cvtdq2ps256((__v8si) __a);
 }
 
-static __inline __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline __m128 DEFAULT_FN_ATTRS
 _mm256_cvtpd_ps(__m256d __a)
 {
   return (__m128)__builtin_ia32_cvtpd2ps256((__v4df) __a);
 }
 
-static __inline __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline __m256i DEFAULT_FN_ATTRS
 _mm256_cvtps_epi32(__m256 __a)
 {
   return (__m256i)__builtin_ia32_cvtps2dq256((__v8sf) __a);
 }
 
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline __m256d DEFAULT_FN_ATTRS
 _mm256_cvtps_pd(__m128 __a)
 {
   return (__m256d)__builtin_ia32_cvtps2pd256((__v4sf) __a);
 }
 
-static __inline __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline __m128i DEFAULT_FN_ATTRS
 _mm256_cvttpd_epi32(__m256d __a)
 {
   return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a);
 }
 
-static __inline __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline __m128i DEFAULT_FN_ATTRS
 _mm256_cvtpd_epi32(__m256d __a)
 {
   return (__m128i)__builtin_ia32_cvtpd2dq256((__v4df) __a);
 }
 
-static __inline __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline __m256i DEFAULT_FN_ATTRS
 _mm256_cvttps_epi32(__m256 __a)
 {
   return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a);
 }
 
 /* Vector replicate */
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_movehdup_ps(__m256 __a)
 {
   return __builtin_shufflevector(__a, __a, 1, 1, 3, 3, 5, 5, 7, 7);
 }
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_moveldup_ps(__m256 __a)
 {
   return __builtin_shufflevector(__a, __a, 0, 0, 2, 2, 4, 4, 6, 6);
 }
 
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline __m256d DEFAULT_FN_ATTRS
 _mm256_movedup_pd(__m256d __a)
 {
   return __builtin_shufflevector(__a, __a, 0, 0, 2, 2);
 }
 
 /* Unpack and Interleave */
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline __m256d DEFAULT_FN_ATTRS
 _mm256_unpackhi_pd(__m256d __a, __m256d __b)
 {
   return __builtin_shufflevector(__a, __b, 1, 5, 1+2, 5+2);
 }
 
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline __m256d DEFAULT_FN_ATTRS
 _mm256_unpacklo_pd(__m256d __a, __m256d __b)
 {
   return __builtin_shufflevector(__a, __b, 0, 4, 0+2, 4+2);
 }
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_unpackhi_ps(__m256 __a, __m256 __b)
 {
   return __builtin_shufflevector(__a, __b, 2, 10, 2+1, 10+1, 6, 14, 6+1, 14+1);
 }
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_unpacklo_ps(__m256 __a, __m256 __b)
 {
   return __builtin_shufflevector(__a, __b, 0, 8, 0+1, 8+1, 4, 12, 4+1, 12+1);
 }
 
 /* Bit Test */
-static __inline int __attribute__((__always_inline__, __nodebug__))
+static __inline int DEFAULT_FN_ATTRS
 _mm_testz_pd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b);
 }
 
-static __inline int __attribute__((__always_inline__, __nodebug__))
+static __inline int DEFAULT_FN_ATTRS
 _mm_testc_pd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b);
 }
 
-static __inline int __attribute__((__always_inline__, __nodebug__))
+static __inline int DEFAULT_FN_ATTRS
 _mm_testnzc_pd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b);
 }
 
-static __inline int __attribute__((__always_inline__, __nodebug__))
+static __inline int DEFAULT_FN_ATTRS
 _mm_testz_ps(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b);
 }
 
-static __inline int __attribute__((__always_inline__, __nodebug__))
+static __inline int DEFAULT_FN_ATTRS
 _mm_testc_ps(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b);
 }
 
-static __inline int __attribute__((__always_inline__, __nodebug__))
+static __inline int DEFAULT_FN_ATTRS
 _mm_testnzc_ps(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b);
 }
 
-static __inline int __attribute__((__always_inline__, __nodebug__))
+static __inline int DEFAULT_FN_ATTRS
 _mm256_testz_pd(__m256d __a, __m256d __b)
 {
   return __builtin_ia32_vtestzpd256((__v4df)__a, (__v4df)__b);
 }
 
-static __inline int __attribute__((__always_inline__, __nodebug__))
+static __inline int DEFAULT_FN_ATTRS
 _mm256_testc_pd(__m256d __a, __m256d __b)
 {
   return __builtin_ia32_vtestcpd256((__v4df)__a, (__v4df)__b);
 }
 
-static __inline int __attribute__((__always_inline__, __nodebug__))
+static __inline int DEFAULT_FN_ATTRS
 _mm256_testnzc_pd(__m256d __a, __m256d __b)
 {
   return __builtin_ia32_vtestnzcpd256((__v4df)__a, (__v4df)__b);
 }
 
-static __inline int __attribute__((__always_inline__, __nodebug__))
+static __inline int DEFAULT_FN_ATTRS
 _mm256_testz_ps(__m256 __a, __m256 __b)
 {
   return __builtin_ia32_vtestzps256((__v8sf)__a, (__v8sf)__b);
 }
 
-static __inline int __attribute__((__always_inline__, __nodebug__))
+static __inline int DEFAULT_FN_ATTRS
 _mm256_testc_ps(__m256 __a, __m256 __b)
 {
   return __builtin_ia32_vtestcps256((__v8sf)__a, (__v8sf)__b);
 }
 
-static __inline int __attribute__((__always_inline__, __nodebug__))
+static __inline int DEFAULT_FN_ATTRS
 _mm256_testnzc_ps(__m256 __a, __m256 __b)
 {
   return __builtin_ia32_vtestnzcps256((__v8sf)__a, (__v8sf)__b);
 }
 
-static __inline int __attribute__((__always_inline__, __nodebug__))
+static __inline int DEFAULT_FN_ATTRS
 _mm256_testz_si256(__m256i __a, __m256i __b)
 {
   return __builtin_ia32_ptestz256((__v4di)__a, (__v4di)__b);
 }
 
-static __inline int __attribute__((__always_inline__, __nodebug__))
+static __inline int DEFAULT_FN_ATTRS
 _mm256_testc_si256(__m256i __a, __m256i __b)
 {
   return __builtin_ia32_ptestc256((__v4di)__a, (__v4di)__b);
 }
 
-static __inline int __attribute__((__always_inline__, __nodebug__))
+static __inline int DEFAULT_FN_ATTRS
 _mm256_testnzc_si256(__m256i __a, __m256i __b)
 {
   return __builtin_ia32_ptestnzc256((__v4di)__a, (__v4di)__b);
 }
 
 /* Vector extract sign mask */
-static __inline int __attribute__((__always_inline__, __nodebug__))
+static __inline int DEFAULT_FN_ATTRS
 _mm256_movemask_pd(__m256d __a)
 {
   return __builtin_ia32_movmskpd256((__v4df)__a);
 }
 
-static __inline int __attribute__((__always_inline__, __nodebug__))
+static __inline int DEFAULT_FN_ATTRS
 _mm256_movemask_ps(__m256 __a)
 {
   return __builtin_ia32_movmskps256((__v8sf)__a);
 }
 
 /* Vector __zero */
-static __inline void __attribute__((__always_inline__, __nodebug__))
+static __inline void DEFAULT_FN_ATTRS
 _mm256_zeroall(void)
 {
   __builtin_ia32_vzeroall();
 }
 
-static __inline void __attribute__((__always_inline__, __nodebug__))
+static __inline void DEFAULT_FN_ATTRS
 _mm256_zeroupper(void)
 {
   __builtin_ia32_vzeroupper();
 }
 
 /* Vector load with broadcast */
-static __inline __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline __m128 DEFAULT_FN_ATTRS
 _mm_broadcast_ss(float const *__a)
 {
   float __f = *__a;
   return (__m128)(__v4sf){ __f, __f, __f, __f };
 }
 
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline __m256d DEFAULT_FN_ATTRS
 _mm256_broadcast_sd(double const *__a)
 {
   double __d = *__a;
   return (__m256d)(__v4df){ __d, __d, __d, __d };
 }
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_broadcast_ss(float const *__a)
 {
   float __f = *__a;
   return (__m256)(__v8sf){ __f, __f, __f, __f, __f, __f, __f, __f };
 }
 
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline __m256d DEFAULT_FN_ATTRS
 _mm256_broadcast_pd(__m128d const *__a)
 {
   return (__m256d)__builtin_ia32_vbroadcastf128_pd256(__a);
 }
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_broadcast_ps(__m128 const *__a)
 {
   return (__m256)__builtin_ia32_vbroadcastf128_ps256(__a);
 }
 
 /* SIMD load ops */
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline __m256d DEFAULT_FN_ATTRS
 _mm256_load_pd(double const *__p)
 {
   return *(__m256d *)__p;
 }
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_load_ps(float const *__p)
 {
   return *(__m256 *)__p;
 }
 
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline __m256d DEFAULT_FN_ATTRS
 _mm256_loadu_pd(double const *__p)
 {
   struct __loadu_pd {
@@ -759,7 +762,7 @@ _mm256_loadu_pd(double const *__p)
   return ((struct __loadu_pd*)__p)->__v;
 }
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_loadu_ps(float const *__p)
 {
   struct __loadu_ps {
@@ -768,13 +771,13 @@ _mm256_loadu_ps(float const *__p)
   return ((struct __loadu_ps*)__p)->__v;
 }
 
-static __inline __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline __m256i DEFAULT_FN_ATTRS
 _mm256_load_si256(__m256i const *__p)
 {
   return *__p;
 }
 
-static __inline __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline __m256i DEFAULT_FN_ATTRS
 _mm256_loadu_si256(__m256i const *__p)
 {
   struct __loadu_si256 {
@@ -783,141 +786,141 @@ _mm256_loadu_si256(__m256i const *__p)
   return ((struct __loadu_si256*)__p)->__v;
 }
 
-static __inline __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline __m256i DEFAULT_FN_ATTRS
 _mm256_lddqu_si256(__m256i const *__p)
 {
   return (__m256i)__builtin_ia32_lddqu256((char const *)__p);
 }
 
 /* SIMD store ops */
-static __inline void __attribute__((__always_inline__, __nodebug__))
+static __inline void DEFAULT_FN_ATTRS
 _mm256_store_pd(double *__p, __m256d __a)
 {
   *(__m256d *)__p = __a;
 }
 
-static __inline void __attribute__((__always_inline__, __nodebug__))
+static __inline void DEFAULT_FN_ATTRS
 _mm256_store_ps(float *__p, __m256 __a)
 {
   *(__m256 *)__p = __a;
 }
 
-static __inline void __attribute__((__always_inline__, __nodebug__))
+static __inline void DEFAULT_FN_ATTRS
 _mm256_storeu_pd(double *__p, __m256d __a)
 {
   __builtin_ia32_storeupd256(__p, (__v4df)__a);
 }
 
-static __inline void __attribute__((__always_inline__, __nodebug__))
+static __inline void DEFAULT_FN_ATTRS
 _mm256_storeu_ps(float *__p, __m256 __a)
 {
   __builtin_ia32_storeups256(__p, (__v8sf)__a);
 }
 
-static __inline void __attribute__((__always_inline__, __nodebug__))
+static __inline void DEFAULT_FN_ATTRS
 _mm256_store_si256(__m256i *__p, __m256i __a)
 {
   *__p = __a;
 }
 
-static __inline void __attribute__((__always_inline__, __nodebug__))
+static __inline void DEFAULT_FN_ATTRS
 _mm256_storeu_si256(__m256i *__p, __m256i __a)
 {
   __builtin_ia32_storedqu256((char *)__p, (__v32qi)__a);
 }
 
 /* Conditional load ops */
-static __inline __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline __m128d DEFAULT_FN_ATTRS
 _mm_maskload_pd(double const *__p, __m128d __m)
 {
   return (__m128d)__builtin_ia32_maskloadpd((const __v2df *)__p, (__v2df)__m);
 }
 
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline __m256d DEFAULT_FN_ATTRS
 _mm256_maskload_pd(double const *__p, __m256d __m)
 {
   return (__m256d)__builtin_ia32_maskloadpd256((const __v4df *)__p,
                                                (__v4df)__m);
 }
 
-static __inline __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline __m128 DEFAULT_FN_ATTRS
 _mm_maskload_ps(float const *__p, __m128 __m)
 {
   return (__m128)__builtin_ia32_maskloadps((const __v4sf *)__p, (__v4sf)__m);
 }
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_maskload_ps(float const *__p, __m256 __m)
 {
   return (__m256)__builtin_ia32_maskloadps256((const __v8sf *)__p, (__v8sf)__m);
 }
 
 /* Conditional store ops */
-static __inline void __attribute__((__always_inline__, __nodebug__))
+static __inline void DEFAULT_FN_ATTRS
 _mm256_maskstore_ps(float *__p, __m256 __m, __m256 __a)
 {
   __builtin_ia32_maskstoreps256((__v8sf *)__p, (__v8sf)__m, (__v8sf)__a);
 }
 
-static __inline void __attribute__((__always_inline__, __nodebug__))
+static __inline void DEFAULT_FN_ATTRS
 _mm_maskstore_pd(double *__p, __m128d __m, __m128d __a)
 {
   __builtin_ia32_maskstorepd((__v2df *)__p, (__v2df)__m, (__v2df)__a);
 }
 
-static __inline void __attribute__((__always_inline__, __nodebug__))
+static __inline void DEFAULT_FN_ATTRS
 _mm256_maskstore_pd(double *__p, __m256d __m, __m256d __a)
 {
   __builtin_ia32_maskstorepd256((__v4df *)__p, (__v4df)__m, (__v4df)__a);
 }
 
-static __inline void __attribute__((__always_inline__, __nodebug__))
+static __inline void DEFAULT_FN_ATTRS
 _mm_maskstore_ps(float *__p, __m128 __m, __m128 __a)
 {
   __builtin_ia32_maskstoreps((__v4sf *)__p, (__v4sf)__m, (__v4sf)__a);
 }
 
 /* Cacheability support ops */
-static __inline void __attribute__((__always_inline__, __nodebug__))
+static __inline void DEFAULT_FN_ATTRS
 _mm256_stream_si256(__m256i *__a, __m256i __b)
 {
   __builtin_ia32_movntdq256((__v4di *)__a, (__v4di)__b);
 }
 
-static __inline void __attribute__((__always_inline__, __nodebug__))
+static __inline void DEFAULT_FN_ATTRS
 _mm256_stream_pd(double *__a, __m256d __b)
 {
   __builtin_ia32_movntpd256(__a, (__v4df)__b);
 }
 
-static __inline void __attribute__((__always_inline__, __nodebug__))
+static __inline void DEFAULT_FN_ATTRS
 _mm256_stream_ps(float *__p, __m256 __a)
 {
   __builtin_ia32_movntps256(__p, (__v8sf)__a);
 }
 
 /* Create vectors */
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline __m256d DEFAULT_FN_ATTRS
 _mm256_set_pd(double __a, double __b, double __c, double __d)
 {
   return (__m256d){ __d, __c, __b, __a };
 }
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_set_ps(float __a, float __b, float __c, float __d,
               float __e, float __f, float __g, float __h)
 {
   return (__m256){ __h, __g, __f, __e, __d, __c, __b, __a };
 }
 
-static __inline __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline __m256i DEFAULT_FN_ATTRS
 _mm256_set_epi32(int __i0, int __i1, int __i2, int __i3,
                  int __i4, int __i5, int __i6, int __i7)
 {
   return (__m256i)(__v8si){ __i7, __i6, __i5, __i4, __i3, __i2, __i1, __i0 };
 }
 
-static __inline __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline __m256i DEFAULT_FN_ATTRS
 _mm256_set_epi16(short __w15, short __w14, short __w13, short __w12,
                  short __w11, short __w10, short __w09, short __w08,
                  short __w07, short __w06, short __w05, short __w04,
@@ -927,7 +930,7 @@ _mm256_set_epi16(short __w15, short __w14, short __w13, short __w12,
     __w07, __w08, __w09, __w10, __w11, __w12, __w13, __w14, __w15 };
 }
 
-static __inline __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline __m256i DEFAULT_FN_ATTRS
 _mm256_set_epi8(char __b31, char __b30, char __b29, char __b28,
                 char __b27, char __b26, char __b25, char __b24,
                 char __b23, char __b22, char __b21, char __b20,
@@ -945,34 +948,34 @@ _mm256_set_epi8(char __b31, char __b30, char __b29, char __b28,
   };
 }
 
-static __inline __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline __m256i DEFAULT_FN_ATTRS
 _mm256_set_epi64x(long long __a, long long __b, long long __c, long long __d)
 {
   return (__m256i)(__v4di){ __d, __c, __b, __a };
 }
 
 /* Create vectors with elements in reverse order */
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline __m256d DEFAULT_FN_ATTRS
 _mm256_setr_pd(double __a, double __b, double __c, double __d)
 {
   return (__m256d){ __a, __b, __c, __d };
 }
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_setr_ps(float __a, float __b, float __c, float __d,
                float __e, float __f, float __g, float __h)
 {
   return (__m256){ __a, __b, __c, __d, __e, __f, __g, __h };
 }
 
-static __inline __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline __m256i DEFAULT_FN_ATTRS
 _mm256_setr_epi32(int __i0, int __i1, int __i2, int __i3,
                   int __i4, int __i5, int __i6, int __i7)
 {
   return (__m256i)(__v8si){ __i0, __i1, __i2, __i3, __i4, __i5, __i6, __i7 };
 }
 
-static __inline __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline __m256i DEFAULT_FN_ATTRS
 _mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12,
        short __w11, short __w10, short __w09, short __w08,
        short __w07, short __w06, short __w05, short __w04,
@@ -982,7 +985,7 @@ _mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12,
     __w08, __w07, __w06, __w05, __w04, __w03, __w02, __w01, __w00 };
 }
 
-static __inline __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline __m256i DEFAULT_FN_ATTRS
 _mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28,
                  char __b27, char __b26, char __b25, char __b24,
                  char __b23, char __b22, char __b21, char __b20,
@@ -999,39 +1002,39 @@ _mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28,
     __b07, __b06, __b05, __b04, __b03, __b02, __b01, __b00 };
 }
 
-static __inline __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline __m256i DEFAULT_FN_ATTRS
 _mm256_setr_epi64x(long long __a, long long __b, long long __c, long long __d)
 {
   return (__m256i)(__v4di){ __a, __b, __c, __d };
 }
 
 /* Create vectors with repeated elements */
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline __m256d DEFAULT_FN_ATTRS
 _mm256_set1_pd(double __w)
 {
   return (__m256d){ __w, __w, __w, __w };
 }
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_set1_ps(float __w)
 {
   return (__m256){ __w, __w, __w, __w, __w, __w, __w, __w };
 }
 
-static __inline __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline __m256i DEFAULT_FN_ATTRS
 _mm256_set1_epi32(int __i)
 {
   return (__m256i)(__v8si){ __i, __i, __i, __i, __i, __i, __i, __i };
 }
 
-static __inline __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline __m256i DEFAULT_FN_ATTRS
 _mm256_set1_epi16(short __w)
 {
   return (__m256i)(__v16hi){ __w, __w, __w, __w, __w, __w, __w, __w, __w, __w,
     __w, __w, __w, __w, __w, __w };
 }
 
-static __inline __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline __m256i DEFAULT_FN_ATTRS
 _mm256_set1_epi8(char __b)
 {
   return (__m256i)(__v32qi){ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b,
@@ -1039,99 +1042,99 @@ _mm256_set1_epi8(char __b)
     __b, __b, __b, __b, __b, __b, __b };
 }
 
-static __inline __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline __m256i DEFAULT_FN_ATTRS
 _mm256_set1_epi64x(long long __q)
 {
   return (__m256i)(__v4di){ __q, __q, __q, __q };
 }
 
 /* Create __zeroed vectors */
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline __m256d DEFAULT_FN_ATTRS
 _mm256_setzero_pd(void)
 {
   return (__m256d){ 0, 0, 0, 0 };
 }
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_setzero_ps(void)
 {
   return (__m256){ 0, 0, 0, 0, 0, 0, 0, 0 };
 }
 
-static __inline __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline __m256i DEFAULT_FN_ATTRS
 _mm256_setzero_si256(void)
 {
   return (__m256i){ 0LL, 0LL, 0LL, 0LL };
 }
 
 /* Cast between vector types */
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_castpd_ps(__m256d __a)
 {
   return (__m256)__a;
 }
 
-static __inline __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline __m256i DEFAULT_FN_ATTRS
 _mm256_castpd_si256(__m256d __a)
 {
   return (__m256i)__a;
 }
 
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline __m256d DEFAULT_FN_ATTRS
 _mm256_castps_pd(__m256 __a)
 {
   return (__m256d)__a;
 }
 
-static __inline __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline __m256i DEFAULT_FN_ATTRS
 _mm256_castps_si256(__m256 __a)
 {
   return (__m256i)__a;
 }
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_castsi256_ps(__m256i __a)
 {
   return (__m256)__a;
 }
 
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline __m256d DEFAULT_FN_ATTRS
 _mm256_castsi256_pd(__m256i __a)
 {
   return (__m256d)__a;
 }
 
-static __inline __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline __m128d DEFAULT_FN_ATTRS
 _mm256_castpd256_pd128(__m256d __a)
 {
   return __builtin_shufflevector(__a, __a, 0, 1);
 }
 
-static __inline __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline __m128 DEFAULT_FN_ATTRS
 _mm256_castps256_ps128(__m256 __a)
 {
   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
 }
 
-static __inline __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline __m128i DEFAULT_FN_ATTRS
 _mm256_castsi256_si128(__m256i __a)
 {
   return __builtin_shufflevector(__a, __a, 0, 1);
 }
 
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline __m256d DEFAULT_FN_ATTRS
 _mm256_castpd128_pd256(__m128d __a)
 {
   return __builtin_shufflevector(__a, __a, 0, 1, -1, -1);
 }
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_castps128_ps256(__m128 __a)
 {
   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
 }
 
-static __inline __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline __m256i DEFAULT_FN_ATTRS
 _mm256_castsi128_si256(__m128i __a)
 {
   return __builtin_shufflevector(__a, __a, 0, 1, -1, -1);
@@ -1202,7 +1205,7 @@ _mm256_castsi128_si256(__m128i __a)
     (((M) & 1) ? 3 : 1) );})
 
 /* SIMD load ops (unaligned) */
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_loadu2_m128(float const *__addr_hi, float const *__addr_lo)
 {
   struct __loadu_ps {
@@ -1213,7 +1216,7 @@ _mm256_loadu2_m128(float const *__addr_hi, float const *__addr_lo)
   return _mm256_insertf128_ps(__v256, ((struct __loadu_ps*)__addr_hi)->__v, 1);
 }
 
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline __m256d DEFAULT_FN_ATTRS
 _mm256_loadu2_m128d(double const *__addr_hi, double const *__addr_lo)
 {
   struct __loadu_pd {
@@ -1224,7 +1227,7 @@ _mm256_loadu2_m128d(double const *__addr_hi, double const *__addr_lo)
   return _mm256_insertf128_pd(__v256, ((struct __loadu_pd*)__addr_hi)->__v, 1);
 }
 
-static __inline __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline __m256i DEFAULT_FN_ATTRS
 _mm256_loadu2_m128i(__m128i const *__addr_hi, __m128i const *__addr_lo)
 {
   struct __loadu_si128 {
@@ -1237,7 +1240,7 @@ _mm256_loadu2_m128i(__m128i const *__addr_hi, __m128i const *__addr_lo)
 }
 
 /* SIMD store ops (unaligned) */
-static __inline void __attribute__((__always_inline__, __nodebug__))
+static __inline void DEFAULT_FN_ATTRS
 _mm256_storeu2_m128(float *__addr_hi, float *__addr_lo, __m256 __a)
 {
   __m128 __v128;
@@ -1248,7 +1251,7 @@ _mm256_storeu2_m128(float *__addr_hi, float *__addr_lo, __m256 __a)
   __builtin_ia32_storeups(__addr_hi, __v128);
 }
 
-static __inline void __attribute__((__always_inline__, __nodebug__))
+static __inline void DEFAULT_FN_ATTRS
 _mm256_storeu2_m128d(double *__addr_hi, double *__addr_lo, __m256d __a)
 {
   __m128d __v128;
@@ -1259,7 +1262,7 @@ _mm256_storeu2_m128d(double *__addr_hi, double *__addr_lo, __m256d __a)
   __builtin_ia32_storeupd(__addr_hi, __v128);
 }
 
-static __inline void __attribute__((__always_inline__, __nodebug__))
+static __inline void DEFAULT_FN_ATTRS
 _mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo, __m256i __a)
 {
   __m128i __v128;
@@ -1270,34 +1273,36 @@ _mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo, __m256i __a)
   __builtin_ia32_storedqu((char *)__addr_hi, (__v16qi)__v128);
 }
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_set_m128 (__m128 __hi, __m128 __lo) {
   return (__m256) __builtin_shufflevector(__lo, __hi, 0, 1, 2, 3, 4, 5, 6, 7);
 }
 
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline __m256d DEFAULT_FN_ATTRS
 _mm256_set_m128d (__m128d __hi, __m128d __lo) {
   return (__m256d)_mm256_set_m128((__m128)__hi, (__m128)__lo);
 }
 
-static __inline __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline __m256i DEFAULT_FN_ATTRS
 _mm256_set_m128i (__m128i __hi, __m128i __lo) {
   return (__m256i)_mm256_set_m128((__m128)__hi, (__m128)__lo);
 }
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_setr_m128 (__m128 __lo, __m128 __hi) {
   return _mm256_set_m128(__hi, __lo);
 }
 
-static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline __m256d DEFAULT_FN_ATTRS
 _mm256_setr_m128d (__m128d __lo, __m128d __hi) {
   return (__m256d)_mm256_set_m128((__m128)__hi, (__m128)__lo);
 }
 
-static __inline __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline __m256i DEFAULT_FN_ATTRS
 _mm256_setr_m128i (__m128i __lo, __m128i __hi) {
   return (__m256i)_mm256_set_m128((__m128)__hi, (__m128)__lo);
 }
 
+#undef DEFAULT_FN_ATTRS
+
 #endif /* __AVXINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/bmi2intrin.h b/contrib/llvm/tools/clang/lib/Headers/bmi2intrin.h
index a05cfad..7818934 100644
--- a/contrib/llvm/tools/clang/lib/Headers/bmi2intrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/bmi2intrin.h
@@ -25,26 +25,25 @@
 #error "Never use <bmi2intrin.h> directly; include <x86intrin.h> instead."
 #endif
 
-#ifndef __BMI2__
-# error "BMI2 instruction set not enabled"
-#endif /* __BMI2__ */
-
 #ifndef __BMI2INTRIN_H
 #define __BMI2INTRIN_H
 
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+/* Define the default attributes for the functions in this file. */
+#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi2")))
+
+static __inline__ unsigned int DEFAULT_FN_ATTRS
 _bzhi_u32(unsigned int __X, unsigned int __Y)
 {
   return __builtin_ia32_bzhi_si(__X, __Y);
 }
 
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned int DEFAULT_FN_ATTRS
 _pdep_u32(unsigned int __X, unsigned int __Y)
 {
   return __builtin_ia32_pdep_si(__X, __Y);
 }
 
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned int DEFAULT_FN_ATTRS
 _pext_u32(unsigned int __X, unsigned int __Y)
 {
   return __builtin_ia32_pext_si(__X, __Y);
@@ -52,25 +51,25 @@ _pext_u32(unsigned int __X, unsigned int __Y)
 
 #ifdef  __x86_64__
 
-static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned long long DEFAULT_FN_ATTRS
 _bzhi_u64(unsigned long long __X, unsigned long long __Y)
 {
   return __builtin_ia32_bzhi_di(__X, __Y);
 }
 
-static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned long long DEFAULT_FN_ATTRS
 _pdep_u64(unsigned long long __X, unsigned long long __Y)
 {
   return __builtin_ia32_pdep_di(__X, __Y);
 }
 
-static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned long long DEFAULT_FN_ATTRS
 _pext_u64(unsigned long long __X, unsigned long long __Y)
 {
   return __builtin_ia32_pext_di(__X, __Y);
 }
 
-static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned long long DEFAULT_FN_ATTRS
 _mulx_u64 (unsigned long long __X, unsigned long long __Y,
 	   unsigned long long *__P)
 {
@@ -81,7 +80,7 @@ _mulx_u64 (unsigned long long __X, unsigned long long __Y,
 
 #else /* !__x86_64__ */
 
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned int DEFAULT_FN_ATTRS
 _mulx_u32 (unsigned int __X, unsigned int __Y, unsigned int *__P)
 {
   unsigned long long __res = (unsigned long long) __X * __Y;
@@ -91,4 +90,6 @@ _mulx_u32 (unsigned int __X, unsigned int __Y, unsigned int *__P)
 
 #endif /* !__x86_64__  */
 
+#undef DEFAULT_FN_ATTRS
+
 #endif /* __BMI2INTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/bmiintrin.h b/contrib/llvm/tools/clang/lib/Headers/bmiintrin.h
index 0e5fd55..0aad8f2 100644
--- a/contrib/llvm/tools/clang/lib/Headers/bmiintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/bmiintrin.h
@@ -25,10 +25,6 @@
 #error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead."
 #endif
 
-#ifndef __BMI__
-# error "BMI instruction set not enabled"
-#endif /* __BMI__ */
-
 #ifndef __BMIINTRIN_H
 #define __BMIINTRIN_H
 
@@ -40,51 +36,54 @@
 #define _blsr_u32(a)      (__blsr_u32((a)))
 #define _tzcnt_u32(a)     (__tzcnt_u32((a)))
 
-static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
+/* Define the default attributes for the functions in this file. */
+#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi")))
+
+static __inline__ unsigned short DEFAULT_FN_ATTRS
 __tzcnt_u16(unsigned short __X)
 {
   return __X ? __builtin_ctzs(__X) : 16;
 }
 
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned int DEFAULT_FN_ATTRS
 __andn_u32(unsigned int __X, unsigned int __Y)
 {
   return ~__X & __Y;
 }
 
 /* AMD-specified, double-leading-underscore version of BEXTR */
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned int DEFAULT_FN_ATTRS
 __bextr_u32(unsigned int __X, unsigned int __Y)
 {
   return __builtin_ia32_bextr_u32(__X, __Y);
 }
 
 /* Intel-specified, single-leading-underscore version of BEXTR */
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned int DEFAULT_FN_ATTRS
 _bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z)
 {
   return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
 }
 
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned int DEFAULT_FN_ATTRS
 __blsi_u32(unsigned int __X)
 {
   return __X & -__X;
 }
 
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned int DEFAULT_FN_ATTRS
 __blsmsk_u32(unsigned int __X)
 {
   return __X ^ (__X - 1);
 }
 
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned int DEFAULT_FN_ATTRS
 __blsr_u32(unsigned int __X)
 {
   return __X & (__X - 1);
 }
 
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned int DEFAULT_FN_ATTRS
 __tzcnt_u32(unsigned int __X)
 {
   return __X ? __builtin_ctz(__X) : 32;
@@ -99,45 +98,45 @@ __tzcnt_u32(unsigned int __X)
 #define _blsr_u64(a)      (__blsr_u64((a)))
 #define _tzcnt_u64(a)     (__tzcnt_u64((a)))
 
-static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned long long DEFAULT_FN_ATTRS
 __andn_u64 (unsigned long long __X, unsigned long long __Y)
 {
   return ~__X & __Y;
 }
 
 /* AMD-specified, double-leading-underscore version of BEXTR */
-static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned long long DEFAULT_FN_ATTRS
 __bextr_u64(unsigned long long __X, unsigned long long __Y)
 {
   return __builtin_ia32_bextr_u64(__X, __Y);
 }
 
 /* Intel-specified, single-leading-underscore version of BEXTR */
-static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned long long DEFAULT_FN_ATTRS
 _bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z)
 {
   return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
 }
 
-static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned long long DEFAULT_FN_ATTRS
 __blsi_u64(unsigned long long __X)
 {
   return __X & -__X;
 }
 
-static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned long long DEFAULT_FN_ATTRS
 __blsmsk_u64(unsigned long long __X)
 {
   return __X ^ (__X - 1);
 }
 
-static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned long long DEFAULT_FN_ATTRS
 __blsr_u64(unsigned long long __X)
 {
   return __X & (__X - 1);
 }
 
-static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned long long DEFAULT_FN_ATTRS
 __tzcnt_u64(unsigned long long __X)
 {
   return __X ? __builtin_ctzll(__X) : 64;
@@ -145,4 +144,6 @@ __tzcnt_u64(unsigned long long __X)
 
 #endif /* __x86_64__ */
 
+#undef DEFAULT_FN_ATTRS
+
 #endif /* __BMIINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/emmintrin.h b/contrib/llvm/tools/clang/lib/Headers/emmintrin.h
index c764d68..eee2428 100644
--- a/contrib/llvm/tools/clang/lib/Headers/emmintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/emmintrin.h
@@ -24,10 +24,6 @@
 #ifndef __EMMINTRIN_H
 #define __EMMINTRIN_H
 
-#ifndef __SSE2__
-#error "SSE2 instruction set not enabled"
-#else
-
 #include <xmmintrin.h>
 
 typedef double __m128d __attribute__((__vector_size__(16)));
@@ -39,433 +35,436 @@ typedef long long __v2di __attribute__ ((__vector_size__ (16)));
 typedef short __v8hi __attribute__((__vector_size__(16)));
 typedef char __v16qi __attribute__((__vector_size__(16)));
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+/* Define the default attributes for the functions in this file. */
+#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse2")))
+
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_add_sd(__m128d __a, __m128d __b)
 {
   __a[0] += __b[0];
   return __a;
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_add_pd(__m128d __a, __m128d __b)
 {
   return __a + __b;
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_sub_sd(__m128d __a, __m128d __b)
 {
   __a[0] -= __b[0];
   return __a;
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_sub_pd(__m128d __a, __m128d __b)
 {
   return __a - __b;
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_mul_sd(__m128d __a, __m128d __b)
 {
   __a[0] *= __b[0];
   return __a;
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_mul_pd(__m128d __a, __m128d __b)
 {
   return __a * __b;
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_div_sd(__m128d __a, __m128d __b)
 {
   __a[0] /= __b[0];
   return __a;
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_div_pd(__m128d __a, __m128d __b)
 {
   return __a / __b;
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_sqrt_sd(__m128d __a, __m128d __b)
 {
   __m128d __c = __builtin_ia32_sqrtsd(__b);
   return (__m128d) { __c[0], __a[1] };
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_sqrt_pd(__m128d __a)
 {
   return __builtin_ia32_sqrtpd(__a);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_min_sd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_minsd(__a, __b);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_min_pd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_minpd(__a, __b);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_max_sd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_maxsd(__a, __b);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_max_pd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_maxpd(__a, __b);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_and_pd(__m128d __a, __m128d __b)
 {
   return (__m128d)((__v4si)__a & (__v4si)__b);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_andnot_pd(__m128d __a, __m128d __b)
 {
   return (__m128d)(~(__v4si)__a & (__v4si)__b);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_or_pd(__m128d __a, __m128d __b)
 {
   return (__m128d)((__v4si)__a | (__v4si)__b);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_xor_pd(__m128d __a, __m128d __b)
 {
   return (__m128d)((__v4si)__a ^ (__v4si)__b);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_cmpeq_pd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmpeqpd(__a, __b);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_cmplt_pd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmpltpd(__a, __b);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_cmple_pd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmplepd(__a, __b);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_cmpgt_pd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmpltpd(__b, __a);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_cmpge_pd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmplepd(__b, __a);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_cmpord_pd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmpordpd(__a, __b);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_cmpunord_pd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmpunordpd(__a, __b);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_cmpneq_pd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmpneqpd(__a, __b);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_cmpnlt_pd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmpnltpd(__a, __b);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_cmpnle_pd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmpnlepd(__a, __b);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_cmpngt_pd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmpnltpd(__b, __a);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_cmpnge_pd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmpnlepd(__b, __a);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_cmpeq_sd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmpeqsd(__a, __b);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_cmplt_sd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmpltsd(__a, __b);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_cmple_sd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmplesd(__a, __b);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_cmpgt_sd(__m128d __a, __m128d __b)
 {
   __m128d __c = __builtin_ia32_cmpltsd(__b, __a);
   return (__m128d) { __c[0], __a[1] };
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_cmpge_sd(__m128d __a, __m128d __b)
 {
   __m128d __c = __builtin_ia32_cmplesd(__b, __a);
   return (__m128d) { __c[0], __a[1] };
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_cmpord_sd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmpordsd(__a, __b);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_cmpunord_sd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmpunordsd(__a, __b);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_cmpneq_sd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmpneqsd(__a, __b);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_cmpnlt_sd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmpnltsd(__a, __b);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_cmpnle_sd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmpnlesd(__a, __b);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_cmpngt_sd(__m128d __a, __m128d __b)
 {
   __m128d __c = __builtin_ia32_cmpnltsd(__b, __a);
   return (__m128d) { __c[0], __a[1] };
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_cmpnge_sd(__m128d __a, __m128d __b)
 {
   __m128d __c = __builtin_ia32_cmpnlesd(__b, __a);
   return (__m128d) { __c[0], __a[1] };
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_comieq_sd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_comisdeq(__a, __b);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_comilt_sd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_comisdlt(__a, __b);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_comile_sd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_comisdle(__a, __b);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_comigt_sd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_comisdgt(__a, __b);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_comige_sd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_comisdge(__a, __b);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_comineq_sd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_comisdneq(__a, __b);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_ucomieq_sd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_ucomisdeq(__a, __b);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_ucomilt_sd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_ucomisdlt(__a, __b);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_ucomile_sd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_ucomisdle(__a, __b);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_ucomigt_sd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_ucomisdgt(__a, __b);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_ucomige_sd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_ucomisdge(__a, __b);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_ucomineq_sd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_ucomisdneq(__a, __b);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cvtpd_ps(__m128d __a)
 {
   return __builtin_ia32_cvtpd2ps(__a);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_cvtps_pd(__m128 __a)
 {
   return __builtin_ia32_cvtps2pd(__a);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_cvtepi32_pd(__m128i __a)
 {
   return __builtin_ia32_cvtdq2pd((__v4si)__a);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_cvtpd_epi32(__m128d __a)
 {
   return __builtin_ia32_cvtpd2dq(__a);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_cvtsd_si32(__m128d __a)
 {
   return __builtin_ia32_cvtsd2si(__a);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cvtsd_ss(__m128 __a, __m128d __b)
 {
   __a[0] = __b[0];
   return __a;
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_cvtsi32_sd(__m128d __a, int __b)
 {
   __a[0] = __b;
   return __a;
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_cvtss_sd(__m128d __a, __m128 __b)
 {
   __a[0] = __b[0];
   return __a;
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_cvttpd_epi32(__m128d __a)
 {
   return (__m128i)__builtin_ia32_cvttpd2dq(__a);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_cvttsd_si32(__m128d __a)
 {
   return __a[0];
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_cvtpd_pi32(__m128d __a)
 {
   return (__m64)__builtin_ia32_cvtpd2pi(__a);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_cvttpd_pi32(__m128d __a)
 {
   return (__m64)__builtin_ia32_cvttpd2pi(__a);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_cvtpi32_pd(__m64 __a)
 {
   return __builtin_ia32_cvtpi2pd((__v2si)__a);
 }
 
-static __inline__ double __attribute__((__always_inline__, __nodebug__))
+static __inline__ double DEFAULT_FN_ATTRS
 _mm_cvtsd_f64(__m128d __a)
 {
   return __a[0];
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_load_pd(double const *__dp)
 {
   return *(__m128d*)__dp;
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_load1_pd(double const *__dp)
 {
   struct __mm_load1_pd_struct {
@@ -477,14 +476,14 @@ _mm_load1_pd(double const *__dp)
 
 #define        _mm_load_pd1(dp)        _mm_load1_pd(dp)
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_loadr_pd(double const *__dp)
 {
   __m128d __u = *(__m128d*)__dp;
   return __builtin_shufflevector(__u, __u, 1, 0);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_loadu_pd(double const *__dp)
 {
   struct __loadu_pd {
@@ -493,7 +492,7 @@ _mm_loadu_pd(double const *__dp)
   return ((struct __loadu_pd*)__dp)->__v;
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_load_sd(double const *__dp)
 {
   struct __mm_load_sd_struct {
@@ -503,7 +502,7 @@ _mm_load_sd(double const *__dp)
   return (__m128d){ __u, 0 };
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_loadh_pd(__m128d __a, double const *__dp)
 {
   struct __mm_loadh_pd_struct {
@@ -513,7 +512,7 @@ _mm_loadh_pd(__m128d __a, double const *__dp)
   return (__m128d){ __a[0], __u };
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_loadl_pd(__m128d __a, double const *__dp)
 {
   struct __mm_loadl_pd_struct {
@@ -523,43 +522,43 @@ _mm_loadl_pd(__m128d __a, double const *__dp)
   return (__m128d){ __u, __a[1] };
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_set_sd(double __w)
 {
   return (__m128d){ __w, 0 };
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_set1_pd(double __w)
 {
   return (__m128d){ __w, __w };
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_set_pd(double __w, double __x)
 {
   return (__m128d){ __x, __w };
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_setr_pd(double __w, double __x)
 {
   return (__m128d){ __w, __x };
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_setzero_pd(void)
 {
   return (__m128d){ 0, 0 };
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_move_sd(__m128d __a, __m128d __b)
 {
   return (__m128d){ __b[0], __a[1] };
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_store_sd(double *__dp, __m128d __a)
 {
   struct __mm_store_sd_struct {
@@ -568,7 +567,7 @@ _mm_store_sd(double *__dp, __m128d __a)
   ((struct __mm_store_sd_struct*)__dp)->__u = __a[0];
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_store1_pd(double *__dp, __m128d __a)
 {
   struct __mm_store1_pd_struct {
@@ -578,26 +577,26 @@ _mm_store1_pd(double *__dp, __m128d __a)
   ((struct __mm_store1_pd_struct*)__dp)->__u[1] = __a[0];
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_store_pd(double *__dp, __m128d __a)
 {
   *(__m128d *)__dp = __a;
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_storeu_pd(double *__dp, __m128d __a)
 {
   __builtin_ia32_storeupd(__dp, __a);
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_storer_pd(double *__dp, __m128d __a)
 {
   __a = __builtin_shufflevector(__a, __a, 1, 0);
   *(__m128d *)__dp = __a;
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_storeh_pd(double *__dp, __m128d __a)
 {
   struct __mm_storeh_pd_struct {
@@ -606,7 +605,7 @@ _mm_storeh_pd(double *__dp, __m128d __a)
   ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[1];
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_storel_pd(double *__dp, __m128d __a)
 {
   struct __mm_storeh_pd_struct {
@@ -615,211 +614,211 @@ _mm_storel_pd(double *__dp, __m128d __a)
   ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[0];
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_add_epi8(__m128i __a, __m128i __b)
 {
   return (__m128i)((__v16qi)__a + (__v16qi)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_add_epi16(__m128i __a, __m128i __b)
 {
   return (__m128i)((__v8hi)__a + (__v8hi)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_add_epi32(__m128i __a, __m128i __b)
 {
   return (__m128i)((__v4si)__a + (__v4si)__b);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_add_si64(__m64 __a, __m64 __b)
 {
   return __a + __b;
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_add_epi64(__m128i __a, __m128i __b)
 {
   return __a + __b;
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_adds_epi8(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_paddsb128((__v16qi)__a, (__v16qi)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_adds_epi16(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_paddsw128((__v8hi)__a, (__v8hi)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_adds_epu8(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_paddusb128((__v16qi)__a, (__v16qi)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_adds_epu16(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_avg_epu8(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_avg_epu16(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_madd_epi16(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_max_epi16(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_max_epu8(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_min_epi16(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_min_epu8(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_mulhi_epi16(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_mulhi_epu16(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_mullo_epi16(__m128i __a, __m128i __b)
 {
   return (__m128i)((__v8hi)__a * (__v8hi)__b);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_mul_su32(__m64 __a, __m64 __b)
 {
   return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_mul_epu32(__m128i __a, __m128i __b)
 {
   return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_sad_epu8(__m128i __a, __m128i __b)
 {
   return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_sub_epi8(__m128i __a, __m128i __b)
 {
   return (__m128i)((__v16qi)__a - (__v16qi)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_sub_epi16(__m128i __a, __m128i __b)
 {
   return (__m128i)((__v8hi)__a - (__v8hi)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_sub_epi32(__m128i __a, __m128i __b)
 {
   return (__m128i)((__v4si)__a - (__v4si)__b);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_sub_si64(__m64 __a, __m64 __b)
 {
   return __a - __b;
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_sub_epi64(__m128i __a, __m128i __b)
 {
   return __a - __b;
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_subs_epi8(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_psubsb128((__v16qi)__a, (__v16qi)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_subs_epi16(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_psubsw128((__v8hi)__a, (__v8hi)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_subs_epu8(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_psubusb128((__v16qi)__a, (__v16qi)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_subs_epu16(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_psubusw128((__v8hi)__a, (__v8hi)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_and_si128(__m128i __a, __m128i __b)
 {
   return __a & __b;
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_andnot_si128(__m128i __a, __m128i __b)
 {
   return ~__a & __b;
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_or_si128(__m128i __a, __m128i __b)
 {
   return __a | __b;
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_xor_si128(__m128i __a, __m128i __b)
 {
   return __a ^ __b;
@@ -848,61 +847,61 @@ _mm_xor_si128(__m128i __a, __m128i __b)
 #define _mm_bslli_si128(a, imm) \
   _mm_slli_si128((a), (imm))
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_slli_epi16(__m128i __a, int __count)
 {
   return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_sll_epi16(__m128i __a, __m128i __count)
 {
   return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_slli_epi32(__m128i __a, int __count)
 {
   return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_sll_epi32(__m128i __a, __m128i __count)
 {
   return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_slli_epi64(__m128i __a, int __count)
 {
   return __builtin_ia32_psllqi128(__a, __count);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_sll_epi64(__m128i __a, __m128i __count)
 {
   return __builtin_ia32_psllq128(__a, __count);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_srai_epi16(__m128i __a, int __count)
 {
   return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_sra_epi16(__m128i __a, __m128i __count)
 {
   return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_srai_epi32(__m128i __a, int __count)
 {
   return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_sra_epi32(__m128i __a, __m128i __count)
 {
   return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count);
@@ -931,61 +930,61 @@ _mm_sra_epi32(__m128i __a, __m128i __count)
 #define _mm_bsrli_si128(a, imm) \
   _mm_srli_si128((a), (imm))
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_srli_epi16(__m128i __a, int __count)
 {
   return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_srl_epi16(__m128i __a, __m128i __count)
 {
   return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_srli_epi32(__m128i __a, int __count)
 {
   return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_srl_epi32(__m128i __a, __m128i __count)
 {
   return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_srli_epi64(__m128i __a, int __count)
 {
   return __builtin_ia32_psrlqi128(__a, __count);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_srl_epi64(__m128i __a, __m128i __count)
 {
   return __builtin_ia32_psrlq128(__a, __count);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_cmpeq_epi8(__m128i __a, __m128i __b)
 {
   return (__m128i)((__v16qi)__a == (__v16qi)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_cmpeq_epi16(__m128i __a, __m128i __b)
 {
   return (__m128i)((__v8hi)__a == (__v8hi)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_cmpeq_epi32(__m128i __a, __m128i __b)
 {
   return (__m128i)((__v4si)__a == (__v4si)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_cmpgt_epi8(__m128i __a, __m128i __b)
 {
   /* This function always performs a signed comparison, but __v16qi is a char
@@ -994,90 +993,90 @@ _mm_cmpgt_epi8(__m128i __a, __m128i __b)
   return (__m128i)((__v16qs)__a > (__v16qs)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_cmpgt_epi16(__m128i __a, __m128i __b)
 {
   return (__m128i)((__v8hi)__a > (__v8hi)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_cmpgt_epi32(__m128i __a, __m128i __b)
 {
   return (__m128i)((__v4si)__a > (__v4si)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_cmplt_epi8(__m128i __a, __m128i __b)
 {
   return _mm_cmpgt_epi8(__b, __a);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_cmplt_epi16(__m128i __a, __m128i __b)
 {
   return _mm_cmpgt_epi16(__b, __a);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_cmplt_epi32(__m128i __a, __m128i __b)
 {
   return _mm_cmpgt_epi32(__b, __a);
 }
 
 #ifdef __x86_64__
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_cvtsi64_sd(__m128d __a, long long __b)
 {
   __a[0] = __b;
   return __a;
 }
 
-static __inline__ long long __attribute__((__always_inline__, __nodebug__))
+static __inline__ long long DEFAULT_FN_ATTRS
 _mm_cvtsd_si64(__m128d __a)
 {
   return __builtin_ia32_cvtsd2si64(__a);
 }
 
-static __inline__ long long __attribute__((__always_inline__, __nodebug__))
+static __inline__ long long DEFAULT_FN_ATTRS
 _mm_cvttsd_si64(__m128d __a)
 {
   return __a[0];
 }
 #endif
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cvtepi32_ps(__m128i __a)
 {
   return __builtin_ia32_cvtdq2ps((__v4si)__a);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_cvtps_epi32(__m128 __a)
 {
   return (__m128i)__builtin_ia32_cvtps2dq(__a);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_cvttps_epi32(__m128 __a)
 {
   return (__m128i)__builtin_ia32_cvttps2dq(__a);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_cvtsi32_si128(int __a)
 {
   return (__m128i)(__v4si){ __a, 0, 0, 0 };
 }
 
 #ifdef __x86_64__
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_cvtsi64_si128(long long __a)
 {
   return (__m128i){ __a, 0 };
 }
 #endif
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_cvtsi128_si32(__m128i __a)
 {
   __v4si __b = (__v4si)__a;
@@ -1085,20 +1084,20 @@ _mm_cvtsi128_si32(__m128i __a)
 }
 
 #ifdef __x86_64__
-static __inline__ long long __attribute__((__always_inline__, __nodebug__))
+static __inline__ long long DEFAULT_FN_ATTRS
 _mm_cvtsi128_si64(__m128i __a)
 {
   return __a[0];
 }
 #endif
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_load_si128(__m128i const *__p)
 {
   return *__p;
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_loadu_si128(__m128i const *__p)
 {
   struct __loadu_si128 {
@@ -1107,7 +1106,7 @@ _mm_loadu_si128(__m128i const *__p)
   return ((struct __loadu_si128*)__p)->__v;
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_loadl_epi64(__m128i const *__p)
 {
   struct __mm_loadl_epi64_struct {
@@ -1116,115 +1115,115 @@ _mm_loadl_epi64(__m128i const *__p)
   return (__m128i) { ((struct __mm_loadl_epi64_struct*)__p)->__u, 0};
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_set_epi64x(long long q1, long long q0)
 {
   return (__m128i){ q0, q1 };
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_set_epi64(__m64 q1, __m64 q0)
 {
   return (__m128i){ (long long)q0, (long long)q1 };
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_set_epi32(int i3, int i2, int i1, int i0)
 {
   return (__m128i)(__v4si){ i0, i1, i2, i3};
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_set_epi16(short w7, short w6, short w5, short w4, short w3, short w2, short w1, short w0)
 {
   return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 };
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_set_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0)
 {
   return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 };
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_set1_epi64x(long long __q)
 {
   return (__m128i){ __q, __q };
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_set1_epi64(__m64 __q)
 {
   return (__m128i){ (long long)__q, (long long)__q };
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_set1_epi32(int __i)
 {
   return (__m128i)(__v4si){ __i, __i, __i, __i };
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_set1_epi16(short __w)
 {
   return (__m128i)(__v8hi){ __w, __w, __w, __w, __w, __w, __w, __w };
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_set1_epi8(char __b)
 {
   return (__m128i)(__v16qi){ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b };
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_setr_epi64(__m64 q0, __m64 q1)
 {
   return (__m128i){ (long long)q0, (long long)q1 };
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_setr_epi32(int i0, int i1, int i2, int i3)
 {
   return (__m128i)(__v4si){ i0, i1, i2, i3};
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_setr_epi16(short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7)
 {
   return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 };
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_setr_epi8(char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7, char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b15)
 {
   return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 };
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_setzero_si128(void)
 {
   return (__m128i){ 0LL, 0LL };
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_store_si128(__m128i *__p, __m128i __b)
 {
   *__p = __b;
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_storeu_si128(__m128i *__p, __m128i __b)
 {
   __builtin_ia32_storedqu((char *)__p, (__v16qi)__b);
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p)
 {
   __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p);
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_storel_epi64(__m128i *__p, __m128i __a)
 {
   struct __mm_storel_epi64_struct {
@@ -1233,76 +1232,76 @@ _mm_storel_epi64(__m128i *__p, __m128i __a)
   ((struct __mm_storel_epi64_struct*)__p)->__u = __a[0];
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_stream_pd(double *__p, __m128d __a)
 {
   __builtin_ia32_movntpd(__p, __a);
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_stream_si128(__m128i *__p, __m128i __a)
 {
   __builtin_ia32_movntdq(__p, __a);
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_stream_si32(int *__p, int __a)
 {
   __builtin_ia32_movnti(__p, __a);
 }
 
 #ifdef __x86_64__
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_stream_si64(long long *__p, long long __a)
 {
   __builtin_ia32_movnti64(__p, __a);
 }
 #endif
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_clflush(void const *__p)
 {
   __builtin_ia32_clflush(__p);
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_lfence(void)
 {
   __builtin_ia32_lfence();
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_mfence(void)
 {
   __builtin_ia32_mfence();
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_packs_epi16(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_packs_epi32(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_packus_epi16(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_extract_epi16(__m128i __a, int __imm)
 {
   __v8hi __b = (__v8hi)__a;
   return (unsigned short)__b[__imm & 7];
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_insert_epi16(__m128i __a, int __b, int __imm)
 {
   __v8hi __c = (__v8hi)__a;
@@ -1310,7 +1309,7 @@ _mm_insert_epi16(__m128i __a, int __b, int __imm)
   return (__m128i)__c;
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_movemask_epi8(__m128i __a)
 {
   return __builtin_ia32_pmovmskb128((__v16qi)__a);
@@ -1338,85 +1337,85 @@ _mm_movemask_epi8(__m128i __a)
                                    4 + (((imm) & 0x30) >> 4), \
                                    4 + (((imm) & 0xc0) >> 6)); })
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_unpackhi_epi8(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_unpackhi_epi16(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_unpackhi_epi32(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4+2, 3, 4+3);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_unpackhi_epi64(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_shufflevector(__a, __b, 1, 2+1);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_unpacklo_epi8(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_unpacklo_epi16(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_unpacklo_epi32(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4+0, 1, 4+1);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_unpacklo_epi64(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_shufflevector(__a, __b, 0, 2+0);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_movepi64_pi64(__m128i __a)
 {
   return (__m64)__a[0];
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_movpi64_epi64(__m64 __a)
 {
   return (__m128i){ (long long)__a, 0 };
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_move_epi64(__m128i __a)
 {
   return __builtin_shufflevector(__a, (__m128i){ 0 }, 0, 2);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_unpackhi_pd(__m128d __a, __m128d __b)
 {
   return __builtin_shufflevector(__a, __b, 1, 2+1);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_unpacklo_pd(__m128d __a, __m128d __b)
 {
   return __builtin_shufflevector(__a, __b, 0, 2+0);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_movemask_pd(__m128d __a)
 {
   return __builtin_ia32_movmskpd(__a);
@@ -1426,50 +1425,50 @@ _mm_movemask_pd(__m128d __a)
   __builtin_shufflevector((__m128d)(a), (__m128d)(b), \
                           (i) & 1, (((i) & 2) >> 1) + 2); })
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_castpd_ps(__m128d __a)
 {
   return (__m128)__a;
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_castpd_si128(__m128d __a)
 {
   return (__m128i)__a;
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_castps_pd(__m128 __a)
 {
   return (__m128d)__a;
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_castps_si128(__m128 __a)
 {
   return (__m128i)__a;
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_castsi128_ps(__m128i __a)
 {
   return (__m128)__a;
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_castsi128_pd(__m128i __a)
 {
   return (__m128d)__a;
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_pause(void)
 {
   __asm__ volatile ("pause");
 }
 
-#define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))
+#undef DEFAULT_FN_ATTRS
 
-#endif /* __SSE2__ */
+#define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))
 
 #endif /* __EMMINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/f16cintrin.h b/contrib/llvm/tools/clang/lib/Headers/f16cintrin.h
index f3614c0..c56960c 100644
--- a/contrib/llvm/tools/clang/lib/Headers/f16cintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/f16cintrin.h
@@ -25,16 +25,15 @@
 #error "Never use <f16cintrin.h> directly; include <x86intrin.h> instead."
 #endif
 
-#ifndef __F16C__
-# error "F16C instruction is not enabled"
-#endif /* __F16C__ */
-
 #ifndef __F16CINTRIN_H
 #define __F16CINTRIN_H
 
 typedef float __v8sf __attribute__ ((__vector_size__ (32)));
 typedef float __m256 __attribute__ ((__vector_size__ (32)));
 
+/* Define the default attributes for the functions in this file. */
+#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("f16c")))
+
 #define _mm_cvtps_ph(a, imm) __extension__ ({ \
   __m128 __a = (a); \
  (__m128i)__builtin_ia32_vcvtps2ph((__v4sf)__a, (imm)); })
@@ -43,16 +42,18 @@ typedef float __m256 __attribute__ ((__vector_size__ (32)));
   __m256 __a = (a); \
  (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)__a, (imm)); })
 
-static __inline __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline __m128 DEFAULT_FN_ATTRS
 _mm_cvtph_ps(__m128i __a)
 {
   return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a);
 }
 
-static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline __m256 DEFAULT_FN_ATTRS
 _mm256_cvtph_ps(__m128i __a)
 {
   return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
 }
 
+#undef DEFAULT_FN_ATTRS
+
 #endif /* __F16CINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/fma4intrin.h b/contrib/llvm/tools/clang/lib/Headers/fma4intrin.h
index c30920d..5268805 100644
--- a/contrib/llvm/tools/clang/lib/Headers/fma4intrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/fma4intrin.h
@@ -28,204 +28,203 @@
 #ifndef __FMA4INTRIN_H
 #define __FMA4INTRIN_H
 
-#ifndef __FMA4__
-# error "FMA4 instruction set is not enabled"
-#else
-
 #include <pmmintrin.h>
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+/* Define the default attributes for the functions in this file. */
+#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fma4")))
+
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_macc_ps(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmaddps(__A, __B, __C);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_macc_pd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmaddpd(__A, __B, __C);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_macc_ss(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmaddss(__A, __B, __C);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_macc_sd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmaddsd(__A, __B, __C);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmsubps(__A, __B, __C);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_msub_pd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmsubpd(__A, __B, __C);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_msub_ss(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmsubss(__A, __B, __C);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmsubsd(__A, __B, __C);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfnmaddps(__A, __B, __C);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfnmaddpd(__A, __B, __C);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfnmaddss(__A, __B, __C);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfnmaddsd(__A, __B, __C);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfnmsubps(__A, __B, __C);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfnmsubpd(__A, __B, __C);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfnmsubss(__A, __B, __C);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfnmsubsd(__A, __B, __C);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmaddsubps(__A, __B, __C);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmaddsubpd(__A, __B, __C);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmsubaddps(__A, __B, __C);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmsubaddpd(__A, __B, __C);
 }
 
-static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256 DEFAULT_FN_ATTRS
 _mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256)__builtin_ia32_vfmaddps256(__A, __B, __C);
 }
 
-static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256d DEFAULT_FN_ATTRS
 _mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d)__builtin_ia32_vfmaddpd256(__A, __B, __C);
 }
 
-static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256 DEFAULT_FN_ATTRS
 _mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256)__builtin_ia32_vfmsubps256(__A, __B, __C);
 }
 
-static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256d DEFAULT_FN_ATTRS
 _mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d)__builtin_ia32_vfmsubpd256(__A, __B, __C);
 }
 
-static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256 DEFAULT_FN_ATTRS
 _mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256)__builtin_ia32_vfnmaddps256(__A, __B, __C);
 }
 
-static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256d DEFAULT_FN_ATTRS
 _mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d)__builtin_ia32_vfnmaddpd256(__A, __B, __C);
 }
 
-static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256 DEFAULT_FN_ATTRS
 _mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256)__builtin_ia32_vfnmsubps256(__A, __B, __C);
 }
 
-static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256d DEFAULT_FN_ATTRS
 _mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d)__builtin_ia32_vfnmsubpd256(__A, __B, __C);
 }
 
-static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256 DEFAULT_FN_ATTRS
 _mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256)__builtin_ia32_vfmaddsubps256(__A, __B, __C);
 }
 
-static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256d DEFAULT_FN_ATTRS
 _mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d)__builtin_ia32_vfmaddsubpd256(__A, __B, __C);
 }
 
-static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256 DEFAULT_FN_ATTRS
 _mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256)__builtin_ia32_vfmsubaddps256(__A, __B, __C);
 }
 
-static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256d DEFAULT_FN_ATTRS
 _mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d)__builtin_ia32_vfmsubaddpd256(__A, __B, __C);
 }
 
-#endif /* __FMA4__ */
+#undef DEFAULT_FN_ATTRS
 
 #endif /* __FMA4INTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/fmaintrin.h b/contrib/llvm/tools/clang/lib/Headers/fmaintrin.h
index 6bfd5a8..6f5d1b9 100644
--- a/contrib/llvm/tools/clang/lib/Headers/fmaintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/fmaintrin.h
@@ -28,202 +28,201 @@
 #ifndef __FMAINTRIN_H
 #define __FMAINTRIN_H
 
-#ifndef __FMA__
-# error "FMA instruction set is not enabled"
-#else
+/* Define the default attributes for the functions in this file. */
+#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fma")))
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmaddps(__A, __B, __C);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmaddpd(__A, __B, __C);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmaddss(__A, __B, __C);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmaddsd(__A, __B, __C);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmsubps(__A, __B, __C);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmsubpd(__A, __B, __C);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmsubss(__A, __B, __C);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmsubsd(__A, __B, __C);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfnmaddps(__A, __B, __C);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfnmaddpd(__A, __B, __C);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfnmaddss(__A, __B, __C);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfnmaddsd(__A, __B, __C);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfnmsubps(__A, __B, __C);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfnmsubpd(__A, __B, __C);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfnmsubss(__A, __B, __C);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfnmsubsd(__A, __B, __C);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmaddsubps(__A, __B, __C);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmaddsubpd(__A, __B, __C);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmsubaddps(__A, __B, __C);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmsubaddpd(__A, __B, __C);
 }
 
-static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256 DEFAULT_FN_ATTRS
 _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256)__builtin_ia32_vfmaddps256(__A, __B, __C);
 }
 
-static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256d DEFAULT_FN_ATTRS
 _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d)__builtin_ia32_vfmaddpd256(__A, __B, __C);
 }
 
-static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256 DEFAULT_FN_ATTRS
 _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256)__builtin_ia32_vfmsubps256(__A, __B, __C);
 }
 
-static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256d DEFAULT_FN_ATTRS
 _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d)__builtin_ia32_vfmsubpd256(__A, __B, __C);
 }
 
-static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256 DEFAULT_FN_ATTRS
 _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256)__builtin_ia32_vfnmaddps256(__A, __B, __C);
 }
 
-static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256d DEFAULT_FN_ATTRS
 _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d)__builtin_ia32_vfnmaddpd256(__A, __B, __C);
 }
 
-static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256 DEFAULT_FN_ATTRS
 _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256)__builtin_ia32_vfnmsubps256(__A, __B, __C);
 }
 
-static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256d DEFAULT_FN_ATTRS
 _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d)__builtin_ia32_vfnmsubpd256(__A, __B, __C);
 }
 
-static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256 DEFAULT_FN_ATTRS
 _mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256)__builtin_ia32_vfmaddsubps256(__A, __B, __C);
 }
 
-static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256d DEFAULT_FN_ATTRS
 _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d)__builtin_ia32_vfmaddsubpd256(__A, __B, __C);
 }
 
-static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256 DEFAULT_FN_ATTRS
 _mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256)__builtin_ia32_vfmsubaddps256(__A, __B, __C);
 }
 
-static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256d DEFAULT_FN_ATTRS
 _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d)__builtin_ia32_vfmsubaddpd256(__A, __B, __C);
 }
 
-#endif /* __FMA__ */
+#undef DEFAULT_FN_ATTRS
 
 #endif /* __FMAINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/immintrin.h b/contrib/llvm/tools/clang/lib/Headers/immintrin.h
index ac7d54a..4af407d 100644
--- a/contrib/llvm/tools/clang/lib/Headers/immintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/immintrin.h
@@ -24,179 +24,124 @@
 #ifndef __IMMINTRIN_H
 #define __IMMINTRIN_H
 
-#ifdef __MMX__
 #include <mmintrin.h>
-#endif
 
-#ifdef __SSE__
 #include <xmmintrin.h>
-#endif
 
-#ifdef __SSE2__
 #include <emmintrin.h>
-#endif
 
-#ifdef __SSE3__
 #include <pmmintrin.h>
-#endif
 
-#ifdef __SSSE3__
 #include <tmmintrin.h>
-#endif
 
-#if defined (__SSE4_2__) || defined (__SSE4_1__)
 #include <smmintrin.h>
-#endif
 
-#if defined (__AES__) || defined (__PCLMUL__)
 #include <wmmintrin.h>
-#endif
 
-#ifdef __AVX__
 #include <avxintrin.h>
-#endif
 
-#ifdef __AVX2__
 #include <avx2intrin.h>
-#endif
 
-#ifdef __BMI__
 #include <bmiintrin.h>
-#endif
 
-#ifdef __BMI2__
 #include <bmi2intrin.h>
-#endif
 
-#ifdef __LZCNT__
 #include <lzcntintrin.h>
-#endif
 
-#ifdef __FMA__
 #include <fmaintrin.h>
-#endif
 
-#ifdef __AVX512F__
 #include <avx512fintrin.h>
-#endif
 
-#ifdef __AVX512VL__
 #include <avx512vlintrin.h>
-#endif
 
-#ifdef __AVX512BW__
 #include <avx512bwintrin.h>
-#endif
 
-#ifdef __AVX512DQ__
 #include <avx512dqintrin.h>
-#endif
 
-#if defined (__AVX512VL__) && defined (__AVX512BW__)
 #include <avx512vlbwintrin.h>
-#endif
 
-#if defined (__AVX512VL__) && defined (__AVX512DQ__)
 #include <avx512vldqintrin.h>
-#endif
 
-#ifdef __AVX512ER__
 #include <avx512erintrin.h>
-#endif
 
-#ifdef __RDRND__
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
 _rdrand16_step(unsigned short *__p)
 {
   return __builtin_ia32_rdrand16_step(__p);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
 _rdrand32_step(unsigned int *__p)
 {
   return __builtin_ia32_rdrand32_step(__p);
 }
 
 #ifdef __x86_64__
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
 _rdrand64_step(unsigned long long *__p)
 {
   return __builtin_ia32_rdrand64_step(__p);
 }
 #endif
-#endif /* __RDRND__ */
 
-#ifdef __FSGSBASE__
 #ifdef __x86_64__
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
 _readfsbase_u32(void)
 {
   return __builtin_ia32_rdfsbase32();
 }
 
-static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
 _readfsbase_u64(void)
 {
   return __builtin_ia32_rdfsbase64();
 }
 
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
 _readgsbase_u32(void)
 {
   return __builtin_ia32_rdgsbase32();
 }
 
-static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
 _readgsbase_u64(void)
 {
   return __builtin_ia32_rdgsbase64();
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
 _writefsbase_u32(unsigned int __V)
 {
   return __builtin_ia32_wrfsbase32(__V);
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
 _writefsbase_u64(unsigned long long __V)
 {
   return __builtin_ia32_wrfsbase64(__V);
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
 _writegsbase_u32(unsigned int __V)
 {
   return __builtin_ia32_wrgsbase32(__V);
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
 _writegsbase_u64(unsigned long long __V)
 {
   return __builtin_ia32_wrgsbase64(__V);
 }
 #endif
-#endif /* __FSGSBASE__ */
 
-#ifdef __RTM__
 #include <rtmintrin.h>
-#endif
 
-/* FIXME: check __HLE__ as well when HLE is supported. */
-#if defined (__RTM__)
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
-_xtest(void)
-{
-  return __builtin_ia32_xtest();
-}
-#endif
+#include <xtestintrin.h>
 
-#ifdef __SHA__
 #include <shaintrin.h>
-#endif
 
-/* Some intrinsics inside adxintrin.h are available only if __ADX__ defined,
- * whereas others are also available if __ADX__ undefined */
+/* Some intrinsics inside adxintrin.h are available only on processors with ADX,
+ * whereas others are also available at all times. */
 #include <adxintrin.h>
 
 #endif /* __IMMINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/lzcntintrin.h b/contrib/llvm/tools/clang/lib/Headers/lzcntintrin.h
index 35d6659..41e61e9 100644
--- a/contrib/llvm/tools/clang/lib/Headers/lzcntintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/lzcntintrin.h
@@ -25,43 +25,44 @@
 #error "Never use <lzcntintrin.h> directly; include <x86intrin.h> instead."
 #endif
 
-#ifndef __LZCNT__
-# error "LZCNT instruction is not enabled"
-#endif /* __LZCNT__ */
-
 #ifndef __LZCNTINTRIN_H
 #define __LZCNTINTRIN_H
 
-static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
+/* Define the default attributes for the functions in this file. */
+#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("lzcnt")))
+
+static __inline__ unsigned short DEFAULT_FN_ATTRS
 __lzcnt16(unsigned short __X)
 {
   return __X ? __builtin_clzs(__X) : 16;
 }
 
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned int DEFAULT_FN_ATTRS
 __lzcnt32(unsigned int __X)
 {
   return __X ? __builtin_clz(__X) : 32;
 }
 
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned int DEFAULT_FN_ATTRS
 _lzcnt_u32(unsigned int __X)
 {
   return __X ? __builtin_clz(__X) : 32;
 }
 
 #ifdef __x86_64__
-static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned long long DEFAULT_FN_ATTRS
 __lzcnt64(unsigned long long __X)
 {
   return __X ? __builtin_clzll(__X) : 64;
 }
 
-static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned long long DEFAULT_FN_ATTRS
 _lzcnt_u64(unsigned long long __X)
 {
   return __X ? __builtin_clzll(__X) : 64;
 }
 #endif
 
+#undef DEFAULT_FN_ATTRS
+
 #endif /* __LZCNTINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/mm3dnow.h b/contrib/llvm/tools/clang/lib/Headers/mm3dnow.h
index 5242d99..70734e4 100644
--- a/contrib/llvm/tools/clang/lib/Headers/mm3dnow.h
+++ b/contrib/llvm/tools/clang/lib/Headers/mm3dnow.h
@@ -29,134 +29,139 @@
 
 typedef float __v2sf __attribute__((__vector_size__(8)));
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+/* Define the default attributes for the functions in this file. */
+#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnow")))
+
+static __inline__ void DEFAULT_FN_ATTRS
 _m_femms() {
   __builtin_ia32_femms();
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _m_pavgusb(__m64 __m1, __m64 __m2) {
   return (__m64)__builtin_ia32_pavgusb((__v8qi)__m1, (__v8qi)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _m_pf2id(__m64 __m) {
   return (__m64)__builtin_ia32_pf2id((__v2sf)__m);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _m_pfacc(__m64 __m1, __m64 __m2) {
   return (__m64)__builtin_ia32_pfacc((__v2sf)__m1, (__v2sf)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _m_pfadd(__m64 __m1, __m64 __m2) {
   return (__m64)__builtin_ia32_pfadd((__v2sf)__m1, (__v2sf)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _m_pfcmpeq(__m64 __m1, __m64 __m2) {
   return (__m64)__builtin_ia32_pfcmpeq((__v2sf)__m1, (__v2sf)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _m_pfcmpge(__m64 __m1, __m64 __m2) {
   return (__m64)__builtin_ia32_pfcmpge((__v2sf)__m1, (__v2sf)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _m_pfcmpgt(__m64 __m1, __m64 __m2) {
   return (__m64)__builtin_ia32_pfcmpgt((__v2sf)__m1, (__v2sf)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _m_pfmax(__m64 __m1, __m64 __m2) {
   return (__m64)__builtin_ia32_pfmax((__v2sf)__m1, (__v2sf)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _m_pfmin(__m64 __m1, __m64 __m2) {
   return (__m64)__builtin_ia32_pfmin((__v2sf)__m1, (__v2sf)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _m_pfmul(__m64 __m1, __m64 __m2) {
   return (__m64)__builtin_ia32_pfmul((__v2sf)__m1, (__v2sf)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _m_pfrcp(__m64 __m) {
   return (__m64)__builtin_ia32_pfrcp((__v2sf)__m);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _m_pfrcpit1(__m64 __m1, __m64 __m2) {
   return (__m64)__builtin_ia32_pfrcpit1((__v2sf)__m1, (__v2sf)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _m_pfrcpit2(__m64 __m1, __m64 __m2) {
   return (__m64)__builtin_ia32_pfrcpit2((__v2sf)__m1, (__v2sf)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _m_pfrsqrt(__m64 __m) {
   return (__m64)__builtin_ia32_pfrsqrt((__v2sf)__m);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _m_pfrsqrtit1(__m64 __m1, __m64 __m2) {
   return (__m64)__builtin_ia32_pfrsqit1((__v2sf)__m1, (__v2sf)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _m_pfsub(__m64 __m1, __m64 __m2) {
   return (__m64)__builtin_ia32_pfsub((__v2sf)__m1, (__v2sf)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _m_pfsubr(__m64 __m1, __m64 __m2) {
   return (__m64)__builtin_ia32_pfsubr((__v2sf)__m1, (__v2sf)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _m_pi2fd(__m64 __m) {
   return (__m64)__builtin_ia32_pi2fd((__v2si)__m);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _m_pmulhrw(__m64 __m1, __m64 __m2) {
   return (__m64)__builtin_ia32_pmulhrw((__v4hi)__m1, (__v4hi)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _m_pf2iw(__m64 __m) {
   return (__m64)__builtin_ia32_pf2iw((__v2sf)__m);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _m_pfnacc(__m64 __m1, __m64 __m2) {
   return (__m64)__builtin_ia32_pfnacc((__v2sf)__m1, (__v2sf)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _m_pfpnacc(__m64 __m1, __m64 __m2) {
   return (__m64)__builtin_ia32_pfpnacc((__v2sf)__m1, (__v2sf)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _m_pi2fw(__m64 __m) {
   return (__m64)__builtin_ia32_pi2fw((__v2si)__m);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _m_pswapdsf(__m64 __m) {
   return (__m64)__builtin_ia32_pswapdsf((__v2sf)__m);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _m_pswapdsi(__m64 __m) {
   return (__m64)__builtin_ia32_pswapdsi((__v2si)__m);
 }
 
+#undef DEFAULT_FN_ATTRS
+
 #endif
diff --git a/contrib/llvm/tools/clang/lib/Headers/mmintrin.h b/contrib/llvm/tools/clang/lib/Headers/mmintrin.h
index 986870a..d1d729f 100644
--- a/contrib/llvm/tools/clang/lib/Headers/mmintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/mmintrin.h
@@ -24,377 +24,376 @@
 #ifndef __MMINTRIN_H
 #define __MMINTRIN_H
 
-#ifndef __MMX__
-#error "MMX instruction set not enabled"
-#else
-
 typedef long long __m64 __attribute__((__vector_size__(8)));
 
 typedef int __v2si __attribute__((__vector_size__(8)));
 typedef short __v4hi __attribute__((__vector_size__(8)));
 typedef char __v8qi __attribute__((__vector_size__(8)));
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+/* Define the default attributes for the functions in this file. */
+#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx")))
+
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_empty(void)
 {
     __builtin_ia32_emms();
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_cvtsi32_si64(int __i)
 {
     return (__m64)__builtin_ia32_vec_init_v2si(__i, 0);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_cvtsi64_si32(__m64 __m)
 {
     return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_cvtsi64_m64(long long __i)
 {
     return (__m64)__i;
 }
 
-static __inline__ long long __attribute__((__always_inline__, __nodebug__))
+static __inline__ long long DEFAULT_FN_ATTRS
 _mm_cvtm64_si64(__m64 __m)
 {
     return (long long)__m;
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_packs_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_packs_pi32(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_packs_pu16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_add_pi8(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_add_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_add_pi32(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_adds_pi8(__m64 __m1, __m64 __m2) 
 {
     return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_adds_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);    
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_adds_pu8(__m64 __m1, __m64 __m2) 
 {
     return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);
 }
  
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_adds_pu16(__m64 __m1, __m64 __m2) 
 {
     return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_sub_pi8(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2);
 }
  
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_sub_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2);
 }
  
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_sub_pi32(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_subs_pi8(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_subs_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_subs_pu8(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);
 }
  
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_subs_pu16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_madd_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_mulhi_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);
 }
  
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_mullo_pi16(__m64 __m1, __m64 __m2) 
 {
     return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_sll_pi16(__m64 __m, __m64 __count)
 {
     return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_slli_pi16(__m64 __m, int __count)
 {
     return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);    
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_sll_pi32(__m64 __m, __m64 __count)
 {
     return (__m64)__builtin_ia32_pslld((__v2si)__m, __count);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_slli_pi32(__m64 __m, int __count)
 {
     return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_sll_si64(__m64 __m, __m64 __count)
 {
     return (__m64)__builtin_ia32_psllq(__m, __count);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_slli_si64(__m64 __m, int __count)
 {
     return (__m64)__builtin_ia32_psllqi(__m, __count);    
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_sra_pi16(__m64 __m, __m64 __count)
 {
     return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count);    
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_srai_pi16(__m64 __m, int __count)
 {
     return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_sra_pi32(__m64 __m, __m64 __count)
 {
     return (__m64)__builtin_ia32_psrad((__v2si)__m, __count);    
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_srai_pi32(__m64 __m, int __count)
 {
     return (__m64)__builtin_ia32_psradi((__v2si)__m, __count);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_srl_pi16(__m64 __m, __m64 __count)
 {
     return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count);    
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_srli_pi16(__m64 __m, int __count)
 {
     return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);    
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_srl_pi32(__m64 __m, __m64 __count)
 {
     return (__m64)__builtin_ia32_psrld((__v2si)__m, __count);       
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_srli_pi32(__m64 __m, int __count)
 {
     return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_srl_si64(__m64 __m, __m64 __count)
 {
     return (__m64)__builtin_ia32_psrlq(__m, __count);    
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_srli_si64(__m64 __m, int __count)
 {
     return (__m64)__builtin_ia32_psrlqi(__m, __count);    
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_and_si64(__m64 __m1, __m64 __m2)
 {
     return __builtin_ia32_pand(__m1, __m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_andnot_si64(__m64 __m1, __m64 __m2)
 {
     return __builtin_ia32_pandn(__m1, __m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_or_si64(__m64 __m1, __m64 __m2)
 {
     return __builtin_ia32_por(__m1, __m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_xor_si64(__m64 __m1, __m64 __m2)
 {
     return __builtin_ia32_pxor(__m1, __m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_setzero_si64(void)
 {
     return (__m64){ 0LL };
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_set_pi32(int __i1, int __i0)
 {
     return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
 {
     return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
             char __b1, char __b0)
 {
@@ -402,43 +401,44 @@ _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
                                                __b4, __b5, __b6, __b7);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_set1_pi32(int __i)
 {
     return _mm_set_pi32(__i, __i);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_set1_pi16(short __w)
 {
     return _mm_set_pi16(__w, __w, __w, __w);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_set1_pi8(char __b)
 {
     return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_setr_pi32(int __i0, int __i1)
 {
     return _mm_set_pi32(__i1, __i0);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
 {
     return _mm_set_pi16(__w3, __w2, __w1, __w0);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
              char __b6, char __b7)
 {
     return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
 }
 
+#undef DEFAULT_FN_ATTRS
 
 /* Aliases for compatibility. */
 #define _m_empty _mm_empty
@@ -497,7 +497,5 @@ _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
 #define _m_pcmpgtw _mm_cmpgt_pi16
 #define _m_pcmpgtd _mm_cmpgt_pi32
 
-#endif /* __MMX__ */
-
 #endif /* __MMINTRIN_H */
 
diff --git a/contrib/llvm/tools/clang/lib/Headers/nmmintrin.h b/contrib/llvm/tools/clang/lib/Headers/nmmintrin.h
index f12622d..57fec15 100644
--- a/contrib/llvm/tools/clang/lib/Headers/nmmintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/nmmintrin.h
@@ -24,12 +24,7 @@
 #ifndef _NMMINTRIN_H
 #define _NMMINTRIN_H
 
-#ifndef __SSE4_2__
-#error "SSE4.2 instruction set not enabled"
-#else
-
 /* To match expectations of gcc we put the sse4.2 definitions into smmintrin.h,
    just include it now then.  */
 #include <smmintrin.h>
-#endif /* __SSE4_2__ */
 #endif /* _NMMINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/pmmintrin.h b/contrib/llvm/tools/clang/lib/Headers/pmmintrin.h
index 6f1fc32..6e61539 100644
--- a/contrib/llvm/tools/clang/lib/Headers/pmmintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/pmmintrin.h
@@ -24,61 +24,60 @@
 #ifndef __PMMINTRIN_H
 #define __PMMINTRIN_H
 
-#ifndef __SSE3__
-#error "SSE3 instruction set not enabled"
-#else
-
 #include <emmintrin.h>
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+/* Define the default attributes for the functions in this file. */
+#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse3")))
+
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_lddqu_si128(__m128i const *__p)
 {
   return (__m128i)__builtin_ia32_lddqu((char const *)__p);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_addsub_ps(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_addsubps(__a, __b);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_hadd_ps(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_haddps(__a, __b);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_hsub_ps(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_hsubps(__a, __b);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_movehdup_ps(__m128 __a)
 {
   return __builtin_shufflevector(__a, __a, 1, 1, 3, 3);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_moveldup_ps(__m128 __a)
 {
   return __builtin_shufflevector(__a, __a, 0, 0, 2, 2);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_addsub_pd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_addsubpd(__a, __b);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_hadd_pd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_haddpd(__a, __b);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_hsub_pd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_hsubpd(__a, __b);
@@ -86,7 +85,7 @@ _mm_hsub_pd(__m128d __a, __m128d __b)
 
 #define        _mm_loaddup_pd(dp)        _mm_load1_pd(dp)
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_movedup_pd(__m128d __a)
 {
   return __builtin_shufflevector(__a, __a, 0, 0);
@@ -100,18 +99,18 @@ _mm_movedup_pd(__m128d __a)
 #define _MM_GET_DENORMALS_ZERO_MODE() (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
 #define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x)))
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_monitor(void const *__p, unsigned __extensions, unsigned __hints)
 {
   __builtin_ia32_monitor((void *)__p, __extensions, __hints);
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_mwait(unsigned __extensions, unsigned __hints)
 {
   __builtin_ia32_mwait(__extensions, __hints);
 }
 
-#endif /* __SSE3__ */
+#undef DEFAULT_FN_ATTRS
 
 #endif /* __PMMINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/popcntintrin.h b/contrib/llvm/tools/clang/lib/Headers/popcntintrin.h
index d439daa..fede8da 100644
--- a/contrib/llvm/tools/clang/lib/Headers/popcntintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/popcntintrin.h
@@ -21,25 +21,26 @@
  *===-----------------------------------------------------------------------===
  */
 
-#ifndef __POPCNT__
-#error "POPCNT instruction set not enabled"
-#endif
-
 #ifndef _POPCNTINTRIN_H
 #define _POPCNTINTRIN_H
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+/* Define the default attributes for the functions in this file. */
+#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("popcnt")))
+
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_popcnt_u32(unsigned int __A)
 {
   return __builtin_popcount(__A);
 }
 
 #ifdef __x86_64__
-static __inline__ long long __attribute__((__always_inline__, __nodebug__))
+static __inline__ long long DEFAULT_FN_ATTRS
 _mm_popcnt_u64(unsigned long long __A)
 {
   return __builtin_popcountll(__A);
 }
 #endif /* __x86_64__ */
 
+#undef DEFAULT_FN_ATTRS
+
 #endif /* _POPCNTINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/rdseedintrin.h b/contrib/llvm/tools/clang/lib/Headers/rdseedintrin.h
index 0fef1fa..ac9ec4f 100644
--- a/contrib/llvm/tools/clang/lib/Headers/rdseedintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/rdseedintrin.h
@@ -28,25 +28,29 @@
 #ifndef __RDSEEDINTRIN_H
 #define __RDSEEDINTRIN_H
 
-#ifdef __RDSEED__
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+/* Define the default attributes for the functions in this file. */
+#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("rdseed")))
+
+static __inline__ int DEFAULT_FN_ATTRS
 _rdseed16_step(unsigned short *__p)
 {
   return __builtin_ia32_rdseed16_step(__p);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _rdseed32_step(unsigned int *__p)
 {
   return __builtin_ia32_rdseed32_step(__p);
 }
 
 #ifdef __x86_64__
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _rdseed64_step(unsigned long long *__p)
 {
   return __builtin_ia32_rdseed64_step(__p);
 }
 #endif
-#endif /* __RDSEED__ */
+
+#undef DEFAULT_FN_ATTRS
+
 #endif /* __RDSEEDINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/rtmintrin.h b/contrib/llvm/tools/clang/lib/Headers/rtmintrin.h
index 26149ca..8709a12 100644
--- a/contrib/llvm/tools/clang/lib/Headers/rtmintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/rtmintrin.h
@@ -37,13 +37,16 @@
 #define _XABORT_NESTED    (1 << 5)
 #define _XABORT_CODE(x)   (((x) >> 24) & 0xFF)
 
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+/* Define the default attributes for the functions in this file. */
+#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("rtm")))
+
+static __inline__ unsigned int DEFAULT_FN_ATTRS
 _xbegin(void)
 {
   return __builtin_ia32_xbegin();
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _xend(void)
 {
   __builtin_ia32_xend();
@@ -51,4 +54,6 @@ _xend(void)
 
 #define _xabort(imm) __builtin_ia32_xabort((imm))
 
+#undef DEFAULT_FN_ATTRS
+
 #endif /* __RTMINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/shaintrin.h b/contrib/llvm/tools/clang/lib/Headers/shaintrin.h
index 391a4bb..4b74291 100644
--- a/contrib/llvm/tools/clang/lib/Headers/shaintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/shaintrin.h
@@ -28,47 +28,48 @@
 #ifndef __SHAINTRIN_H
 #define __SHAINTRIN_H
 
-#if !defined (__SHA__)
-#  error "SHA instructions not enabled"
-#endif
+/* Define the default attributes for the functions in this file. */
+#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sha")))
 
 #define _mm_sha1rnds4_epu32(V1, V2, M) __extension__ ({ \
   __builtin_ia32_sha1rnds4((V1), (V2), (M)); })
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_sha1nexte_epu32(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_sha1nexte((__v4si)__X, (__v4si)__Y);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_sha1msg1_epu32(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_sha1msg1((__v4si)__X, (__v4si)__Y);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_sha1msg2_epu32(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_sha1msg2((__v4si)__X, (__v4si)__Y);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_sha256rnds2_epu32(__m128i __X, __m128i __Y, __m128i __Z)
 {
   return (__m128i)__builtin_ia32_sha256rnds2((__v4si)__X, (__v4si)__Y, (__v4si)__Z);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_sha256msg1_epu32(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_sha256msg1((__v4si)__X, (__v4si)__Y);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_sha256msg2_epu32(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_sha256msg2((__v4si)__X, (__v4si)__Y);
 }
 
+#undef DEFAULT_FN_ATTRS
+
 #endif /* __SHAINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/smmintrin.h b/contrib/llvm/tools/clang/lib/Headers/smmintrin.h
index 6e35734..f2cc909 100644
--- a/contrib/llvm/tools/clang/lib/Headers/smmintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/smmintrin.h
@@ -24,12 +24,11 @@
 #ifndef _SMMINTRIN_H
 #define _SMMINTRIN_H
 
-#ifndef __SSE4_1__
-#error "SSE4.1 instruction set not enabled"
-#else
-
 #include <tmmintrin.h>
 
+/* Define the default attributes for the functions in this file. */
+#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.1")))
+
 /* SSE4 Rounding macros. */
 #define _MM_FROUND_TO_NEAREST_INT    0x00
 #define _MM_FROUND_TO_NEG_INF        0x01
@@ -92,21 +91,21 @@
                                   (((M) & 0x04) ? 6 : 2), \
                                   (((M) & 0x08) ? 7 : 3)); })
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_blendv_pd (__m128d __V1, __m128d __V2, __m128d __M)
 {
   return (__m128d) __builtin_ia32_blendvpd ((__v2df)__V1, (__v2df)__V2,
                                             (__v2df)__M);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_blendv_ps (__m128 __V1, __m128 __V2, __m128 __M)
 {
   return (__m128) __builtin_ia32_blendvps ((__v4sf)__V1, (__v4sf)__V2,
                                            (__v4sf)__M);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M)
 {
   return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__V1, (__v16qi)__V2,
@@ -127,13 +126,13 @@ _mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M)
                                    (((M) & 0x80) ? 15 : 7)); })
 
 /* SSE4 Dword Multiply Instructions.  */
-static __inline__  __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__  __m128i DEFAULT_FN_ATTRS
 _mm_mullo_epi32 (__m128i __V1, __m128i __V2)
 {
   return (__m128i) ((__v4si)__V1 * (__v4si)__V2);
 }
 
-static __inline__  __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__  __m128i DEFAULT_FN_ATTRS
 _mm_mul_epi32 (__m128i __V1, __m128i __V2)
 {
   return (__m128i) __builtin_ia32_pmuldq128 ((__v4si)__V1, (__v4si)__V2);
@@ -151,56 +150,56 @@ _mm_mul_epi32 (__m128i __V1, __m128i __V2)
   (__m128d) __builtin_ia32_dppd((__v2df)__X, (__v2df)__Y, (M)); })
 
 /* SSE4 Streaming Load Hint Instruction.  */
-static __inline__  __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__  __m128i DEFAULT_FN_ATTRS
 _mm_stream_load_si128 (__m128i *__V)
 {
   return (__m128i) __builtin_ia32_movntdqa ((__v2di *) __V);
 }
 
 /* SSE4 Packed Integer Min/Max Instructions.  */
-static __inline__  __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__  __m128i DEFAULT_FN_ATTRS
 _mm_min_epi8 (__m128i __V1, __m128i __V2)
 {
   return (__m128i) __builtin_ia32_pminsb128 ((__v16qi) __V1, (__v16qi) __V2);
 }
 
-static __inline__  __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__  __m128i DEFAULT_FN_ATTRS
 _mm_max_epi8 (__m128i __V1, __m128i __V2)
 {
   return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi) __V1, (__v16qi) __V2);
 }
 
-static __inline__  __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__  __m128i DEFAULT_FN_ATTRS
 _mm_min_epu16 (__m128i __V1, __m128i __V2)
 {
   return (__m128i) __builtin_ia32_pminuw128 ((__v8hi) __V1, (__v8hi) __V2);
 }
 
-static __inline__  __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__  __m128i DEFAULT_FN_ATTRS
 _mm_max_epu16 (__m128i __V1, __m128i __V2)
 {
   return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi) __V1, (__v8hi) __V2);
 }
 
-static __inline__  __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__  __m128i DEFAULT_FN_ATTRS
 _mm_min_epi32 (__m128i __V1, __m128i __V2)
 {
   return (__m128i) __builtin_ia32_pminsd128 ((__v4si) __V1, (__v4si) __V2);
 }
 
-static __inline__  __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__  __m128i DEFAULT_FN_ATTRS
 _mm_max_epi32 (__m128i __V1, __m128i __V2)
 {
   return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si) __V1, (__v4si) __V2);
 }
 
-static __inline__  __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__  __m128i DEFAULT_FN_ATTRS
 _mm_min_epu32 (__m128i __V1, __m128i __V2)
 {
   return (__m128i) __builtin_ia32_pminud128((__v4si) __V1, (__v4si) __V2);
 }
 
-static __inline__  __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__  __m128i DEFAULT_FN_ATTRS
 _mm_max_epu32 (__m128i __V1, __m128i __V2)
 {
   return (__m128i) __builtin_ia32_pmaxud128((__v4si) __V1, (__v4si) __V2);
@@ -254,19 +253,19 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
 #endif /* __x86_64 */
 
 /* SSE4 128-bit Packed Integer Comparisons.  */
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_testz_si128(__m128i __M, __m128i __V)
 {
   return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_testc_si128(__m128i __M, __m128i __V)
 {
   return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_testnzc_si128(__m128i __M, __m128i __V)
 {
   return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V);
@@ -277,88 +276,88 @@ _mm_testnzc_si128(__m128i __M, __m128i __V)
 #define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V))
 
 /* SSE4 64-bit Packed Integer Comparisons.  */
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_cmpeq_epi64(__m128i __V1, __m128i __V2)
 {
   return (__m128i)((__v2di)__V1 == (__v2di)__V2);
 }
 
 /* SSE4 Packed Integer Sign-Extension.  */
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_cvtepi8_epi16(__m128i __V)
 {
   return (__m128i) __builtin_ia32_pmovsxbw128((__v16qi) __V);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_cvtepi8_epi32(__m128i __V)
 {
   return (__m128i) __builtin_ia32_pmovsxbd128((__v16qi) __V);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_cvtepi8_epi64(__m128i __V)
 {
   return (__m128i) __builtin_ia32_pmovsxbq128((__v16qi) __V);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_cvtepi16_epi32(__m128i __V)
 {
   return (__m128i) __builtin_ia32_pmovsxwd128((__v8hi) __V); 
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_cvtepi16_epi64(__m128i __V)
 {
   return (__m128i) __builtin_ia32_pmovsxwq128((__v8hi)__V);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_cvtepi32_epi64(__m128i __V)
 {
   return (__m128i) __builtin_ia32_pmovsxdq128((__v4si)__V);
 }
 
 /* SSE4 Packed Integer Zero-Extension.  */
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_cvtepu8_epi16(__m128i __V)
 {
   return (__m128i) __builtin_ia32_pmovzxbw128((__v16qi) __V);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_cvtepu8_epi32(__m128i __V)
 {
   return (__m128i) __builtin_ia32_pmovzxbd128((__v16qi)__V);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_cvtepu8_epi64(__m128i __V)
 {
   return (__m128i) __builtin_ia32_pmovzxbq128((__v16qi)__V);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_cvtepu16_epi32(__m128i __V)
 {
   return (__m128i) __builtin_ia32_pmovzxwd128((__v8hi)__V);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_cvtepu16_epi64(__m128i __V)
 {
   return (__m128i) __builtin_ia32_pmovzxwq128((__v8hi)__V);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_cvtepu32_epi64(__m128i __V)
 {
   return (__m128i) __builtin_ia32_pmovzxdq128((__v4si)__V);
 }
 
 /* SSE4 Pack with Unsigned Saturation.  */
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_packus_epi32(__m128i __V1, __m128i __V2)
 {
   return (__m128i) __builtin_ia32_packusdw128((__v4si)__V1, (__v4si)__V2);
@@ -370,15 +369,19 @@ _mm_packus_epi32(__m128i __V1, __m128i __V2)
   __m128i __Y = (Y); \
   (__m128i) __builtin_ia32_mpsadbw128((__v16qi)__X, (__v16qi)__Y, (M)); })
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_minpos_epu16(__m128i __V)
 {
   return (__m128i) __builtin_ia32_phminposuw128((__v8hi)__V);
 }
 
+/* Handle the sse4.2 definitions here. */
+
 /* These definitions are normally in nmmintrin.h, but gcc puts them in here
    so we'll do the same.  */
-#ifdef __SSE4_2__
+
+#undef DEFAULT_FN_ATTRS
+#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
 
 /* These specify the type of data that we're comparing.  */
 #define _SIDD_UBYTE_OPS                 0x00
@@ -439,44 +442,43 @@ _mm_minpos_epu16(__m128i __V)
      __builtin_ia32_pcmpestriz128((A), (LA), (B), (LB), (M))
 
 /* SSE4.2 Compare Packed Data -- Greater Than.  */
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_cmpgt_epi64(__m128i __V1, __m128i __V2)
 {
   return (__m128i)((__v2di)__V1 > (__v2di)__V2);
 }
 
 /* SSE4.2 Accumulate CRC32.  */
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned int DEFAULT_FN_ATTRS
 _mm_crc32_u8(unsigned int __C, unsigned char __D)
 {
   return __builtin_ia32_crc32qi(__C, __D);
 }
 
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned int DEFAULT_FN_ATTRS
 _mm_crc32_u16(unsigned int __C, unsigned short __D)
 {
   return __builtin_ia32_crc32hi(__C, __D);
 }
 
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned int DEFAULT_FN_ATTRS
 _mm_crc32_u32(unsigned int __C, unsigned int __D)
 {
   return __builtin_ia32_crc32si(__C, __D);
 }
 
 #ifdef __x86_64__
-static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned long long DEFAULT_FN_ATTRS
 _mm_crc32_u64(unsigned long long __C, unsigned long long __D)
 {
   return __builtin_ia32_crc32di(__C, __D);
 }
 #endif /* __x86_64__ */
 
+#undef DEFAULT_FN_ATTRS
+
 #ifdef __POPCNT__
 #include <popcntintrin.h>
 #endif
 
-#endif /* __SSE4_2__ */
-#endif /* __SSE4_1__ */
-
 #endif /* _SMMINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/tbmintrin.h b/contrib/llvm/tools/clang/lib/Headers/tbmintrin.h
index f95e34f..1926df9 100644
--- a/contrib/llvm/tools/clang/lib/Headers/tbmintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/tbmintrin.h
@@ -21,10 +21,6 @@
  *===-----------------------------------------------------------------------===
  */
 
-#ifndef __TBM__
-#error "TBM instruction set is not enabled"
-#endif
-
 #ifndef __X86INTRIN_H
 #error "Never use <tbmintrin.h> directly; include <x86intrin.h> instead."
 #endif
@@ -32,57 +28,60 @@
 #ifndef __TBMINTRIN_H
 #define __TBMINTRIN_H
 
+/* Define the default attributes for the functions in this file. */
+#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("tbm")))
+
 #define __bextri_u32(a, b) (__builtin_ia32_bextri_u32((a), (b)))
 
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned int DEFAULT_FN_ATTRS
 __blcfill_u32(unsigned int a)
 {
   return a & (a + 1);
 }
 
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned int DEFAULT_FN_ATTRS
 __blci_u32(unsigned int a)
 {
   return a | ~(a + 1);
 }
 
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned int DEFAULT_FN_ATTRS
 __blcic_u32(unsigned int a)
 {
   return ~a & (a + 1);
 }
 
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned int DEFAULT_FN_ATTRS
 __blcmsk_u32(unsigned int a)
 {
   return a ^ (a + 1);
 }
 
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned int DEFAULT_FN_ATTRS
 __blcs_u32(unsigned int a)
 {
   return a | (a + 1);
 }
 
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned int DEFAULT_FN_ATTRS
 __blsfill_u32(unsigned int a)
 {
   return a | (a - 1);
 }
 
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned int DEFAULT_FN_ATTRS
 __blsic_u32(unsigned int a)
 {
   return ~a | (a - 1);
 }
 
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned int DEFAULT_FN_ATTRS
 __t1mskc_u32(unsigned int a)
 {
   return ~a | (a + 1);
 }
 
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned int DEFAULT_FN_ATTRS
 __tzmsk_u32(unsigned int a)
 {
   return ~a & (a - 1);
@@ -91,68 +90,61 @@ __tzmsk_u32(unsigned int a)
 #ifdef __x86_64__
 #define __bextri_u64(a, b) (__builtin_ia32_bextri_u64((a), (int)(b)))
 
-static __inline__ unsigned long long __attribute__((__always_inline__,
-                                                    __nodebug__))
+static __inline__ unsigned long long DEFAULT_FN_ATTRS
 __blcfill_u64(unsigned long long a)
 {
   return a & (a + 1);
 }
 
-static __inline__ unsigned long long __attribute__((__always_inline__,
-                                                    __nodebug__))
+static __inline__ unsigned long long DEFAULT_FN_ATTRS
 __blci_u64(unsigned long long a)
 {
   return a | ~(a + 1);
 }
 
-static __inline__ unsigned long long __attribute__((__always_inline__,
-                                                    __nodebug__))
+static __inline__ unsigned long long DEFAULT_FN_ATTRS
 __blcic_u64(unsigned long long a)
 {
   return ~a & (a + 1);
 }
 
-static __inline__ unsigned long long __attribute__((__always_inline__,
-                                                    __nodebug__))
+static __inline__ unsigned long long DEFAULT_FN_ATTRS
 __blcmsk_u64(unsigned long long a)
 {
   return a ^ (a + 1);
 }
 
-static __inline__ unsigned long long __attribute__((__always_inline__,
-                                                    __nodebug__))
+static __inline__ unsigned long long DEFAULT_FN_ATTRS
 __blcs_u64(unsigned long long a)
 {
   return a | (a + 1);
 }
 
-static __inline__ unsigned long long __attribute__((__always_inline__,
-                                                    __nodebug__))
+static __inline__ unsigned long long DEFAULT_FN_ATTRS
 __blsfill_u64(unsigned long long a)
 {
   return a | (a - 1);
 }
 
-static __inline__ unsigned long long __attribute__((__always_inline__,
-                                                    __nodebug__))
+static __inline__ unsigned long long DEFAULT_FN_ATTRS
 __blsic_u64(unsigned long long a)
 {
   return ~a | (a - 1);
 }
 
-static __inline__ unsigned long long __attribute__((__always_inline__,
-                                                    __nodebug__))
+static __inline__ unsigned long long DEFAULT_FN_ATTRS
 __t1mskc_u64(unsigned long long a)
 {
   return ~a | (a + 1);
 }
 
-static __inline__ unsigned long long __attribute__((__always_inline__,
-                                                    __nodebug__))
+static __inline__ unsigned long long DEFAULT_FN_ATTRS
 __tzmsk_u64(unsigned long long a)
 {
   return ~a & (a - 1);
 }
 #endif
 
+#undef DEFAULT_FN_ATTRS
+
 #endif /* __TBMINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/tmmintrin.h b/contrib/llvm/tools/clang/lib/Headers/tmmintrin.h
index 4238f5b..2474c94 100644
--- a/contrib/llvm/tools/clang/lib/Headers/tmmintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/tmmintrin.h
@@ -24,43 +24,42 @@
 #ifndef __TMMINTRIN_H
 #define __TMMINTRIN_H
 
-#ifndef __SSSE3__
-#error "SSSE3 instruction set not enabled"
-#else
-
 #include <pmmintrin.h>
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+/* Define the default attributes for the functions in this file. */
+#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3")))
+
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_abs_pi8(__m64 __a)
 {
     return (__m64)__builtin_ia32_pabsb((__v8qi)__a);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_abs_epi8(__m128i __a)
 {
     return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_abs_pi16(__m64 __a)
 {
     return (__m64)__builtin_ia32_pabsw((__v4hi)__a);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_abs_epi16(__m128i __a)
 {
     return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_abs_pi32(__m64 __a)
 {
     return (__m64)__builtin_ia32_pabsd((__v2si)__a);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_abs_epi32(__m128i __a)
 {
     return (__m128i)__builtin_ia32_pabsd128((__v4si)__a);
@@ -76,150 +75,150 @@ _mm_abs_epi32(__m128i __a)
   __m64 __b = (b); \
   (__m64)__builtin_ia32_palignr((__v8qi)__a, (__v8qi)__b, (n)); })
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_hadd_epi16(__m128i __a, __m128i __b)
 {
     return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_hadd_epi32(__m128i __a, __m128i __b)
 {
     return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_hadd_pi16(__m64 __a, __m64 __b)
 {
     return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_hadd_pi32(__m64 __a, __m64 __b)
 {
     return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_hadds_epi16(__m128i __a, __m128i __b)
 {
     return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_hadds_pi16(__m64 __a, __m64 __b)
 {
     return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_hsub_epi16(__m128i __a, __m128i __b)
 {
     return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_hsub_epi32(__m128i __a, __m128i __b)
 {
     return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_hsub_pi16(__m64 __a, __m64 __b)
 {
     return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_hsub_pi32(__m64 __a, __m64 __b)
 {
     return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_hsubs_epi16(__m128i __a, __m128i __b)
 {
     return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_hsubs_pi16(__m64 __a, __m64 __b)
 {
     return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_maddubs_epi16(__m128i __a, __m128i __b)
 {
     return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_maddubs_pi16(__m64 __a, __m64 __b)
 {
     return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_mulhrs_epi16(__m128i __a, __m128i __b)
 {
     return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_mulhrs_pi16(__m64 __a, __m64 __b)
 {
     return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_shuffle_epi8(__m128i __a, __m128i __b)
 {
     return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_shuffle_pi8(__m64 __a, __m64 __b)
 {
     return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_sign_epi8(__m128i __a, __m128i __b)
 {
     return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_sign_epi16(__m128i __a, __m128i __b)
 {
     return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_sign_epi32(__m128i __a, __m128i __b)
 {
     return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_sign_pi8(__m64 __a, __m64 __b)
 {
     return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_sign_pi16(__m64 __a, __m64 __b)
 {
     return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_sign_pi32(__m64 __a, __m64 __b)
 {
     return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);
 }
 
-#endif /* __SSSE3__ */
+#undef DEFAULT_FN_ATTRS
 
 #endif /* __TMMINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/wmmintrin.h b/contrib/llvm/tools/clang/lib/Headers/wmmintrin.h
index 369e3c2..a2d9310 100644
--- a/contrib/llvm/tools/clang/lib/Headers/wmmintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/wmmintrin.h
@@ -26,17 +26,8 @@
 
 #include <emmintrin.h>
 
-#if !defined (__AES__) && !defined (__PCLMUL__)
-# error "AES/PCLMUL instructions not enabled"
-#else
-
-#ifdef __AES__
 #include <__wmmintrin_aes.h>
-#endif /* __AES__ */
 
-#ifdef __PCLMUL__
 #include <__wmmintrin_pclmul.h>
-#endif /* __PCLMUL__ */
 
-#endif /* __AES__ || __PCLMUL__ */
 #endif /* _WMMINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/x86intrin.h b/contrib/llvm/tools/clang/lib/Headers/x86intrin.h
index 21a43da..4d8077e 100644
--- a/contrib/llvm/tools/clang/lib/Headers/x86intrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/x86intrin.h
@@ -28,53 +28,29 @@
 
 #include <immintrin.h>
 
-#ifdef __3dNOW__
 #include <mm3dnow.h>
-#endif
 
-#ifdef __BMI__
 #include <bmiintrin.h>
-#endif
 
-#ifdef __BMI2__
 #include <bmi2intrin.h>
-#endif
 
-#ifdef __LZCNT__
 #include <lzcntintrin.h>
-#endif
 
-#ifdef __POPCNT__
 #include <popcntintrin.h>
-#endif
 
-#ifdef __RDSEED__
 #include <rdseedintrin.h>
-#endif
 
-#ifdef __PRFCHW__
 #include <prfchwintrin.h>
-#endif
 
-#ifdef __SSE4A__
 #include <ammintrin.h>
-#endif
 
-#ifdef __FMA4__
 #include <fma4intrin.h>
-#endif
 
-#ifdef __XOP__
 #include <xopintrin.h>
-#endif
 
-#ifdef __TBM__
 #include <tbmintrin.h>
-#endif
 
-#ifdef __F16C__
 #include <f16cintrin.h>
-#endif
 
 /* FIXME: LWP */
 
diff --git a/contrib/llvm/tools/clang/lib/Headers/xmmintrin.h b/contrib/llvm/tools/clang/lib/Headers/xmmintrin.h
index 3a6b95e..0085eb4 100644
--- a/contrib/llvm/tools/clang/lib/Headers/xmmintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/xmmintrin.h
@@ -24,10 +24,6 @@
 #ifndef __XMMINTRIN_H
 #define __XMMINTRIN_H
  
-#ifndef __SSE__
-#error "SSE instruction set not enabled"
-#else
-
 #include <mmintrin.h>
 
 typedef int __v4si __attribute__((__vector_size__(16)));
@@ -40,182 +36,185 @@ typedef float __m128 __attribute__((__vector_size__(16)));
 #include <mm_malloc.h>
 #endif
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+/* Define the default attributes for the functions in this file. */
+#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse")))
+
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_add_ss(__m128 __a, __m128 __b)
 {
   __a[0] += __b[0];
   return __a;
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_add_ps(__m128 __a, __m128 __b)
 {
   return __a + __b;
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_sub_ss(__m128 __a, __m128 __b)
 {
   __a[0] -= __b[0];
   return __a;
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_sub_ps(__m128 __a, __m128 __b)
 {
   return __a - __b;
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_mul_ss(__m128 __a, __m128 __b)
 {
   __a[0] *= __b[0];
   return __a;
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_mul_ps(__m128 __a, __m128 __b)
 {
   return __a * __b;
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_div_ss(__m128 __a, __m128 __b)
 {
   __a[0] /= __b[0];
   return __a;
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_div_ps(__m128 __a, __m128 __b)
 {
   return __a / __b;
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_sqrt_ss(__m128 __a)
 {
   __m128 __c = __builtin_ia32_sqrtss(__a);
   return (__m128) { __c[0], __a[1], __a[2], __a[3] };
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_sqrt_ps(__m128 __a)
 {
   return __builtin_ia32_sqrtps(__a);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_rcp_ss(__m128 __a)
 {
   __m128 __c = __builtin_ia32_rcpss(__a);
   return (__m128) { __c[0], __a[1], __a[2], __a[3] };
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_rcp_ps(__m128 __a)
 {
   return __builtin_ia32_rcpps(__a);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_rsqrt_ss(__m128 __a)
 {
   __m128 __c = __builtin_ia32_rsqrtss(__a);
   return (__m128) { __c[0], __a[1], __a[2], __a[3] };
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_rsqrt_ps(__m128 __a)
 {
   return __builtin_ia32_rsqrtps(__a);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_min_ss(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_minss(__a, __b);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_min_ps(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_minps(__a, __b);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_max_ss(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_maxss(__a, __b);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_max_ps(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_maxps(__a, __b);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_and_ps(__m128 __a, __m128 __b)
 {
   return (__m128)((__v4si)__a & (__v4si)__b);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_andnot_ps(__m128 __a, __m128 __b)
 {
   return (__m128)(~(__v4si)__a & (__v4si)__b);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_or_ps(__m128 __a, __m128 __b)
 {
   return (__m128)((__v4si)__a | (__v4si)__b);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_xor_ps(__m128 __a, __m128 __b)
 {
   return (__m128)((__v4si)__a ^ (__v4si)__b);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cmpeq_ss(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpeqss(__a, __b);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cmpeq_ps(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpeqps(__a, __b);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cmplt_ss(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpltss(__a, __b);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cmplt_ps(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpltps(__a, __b);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cmple_ss(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpless(__a, __b);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cmple_ps(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpleps(__a, __b);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cmpgt_ss(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_shufflevector(__a,
@@ -223,13 +222,13 @@ _mm_cmpgt_ss(__m128 __a, __m128 __b)
                                          4, 1, 2, 3);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cmpgt_ps(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpltps(__b, __a);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cmpge_ss(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_shufflevector(__a,
@@ -237,49 +236,49 @@ _mm_cmpge_ss(__m128 __a, __m128 __b)
                                          4, 1, 2, 3);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cmpge_ps(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpleps(__b, __a);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cmpneq_ss(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpneqss(__a, __b);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cmpneq_ps(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpneqps(__a, __b);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cmpnlt_ss(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpnltss(__a, __b);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cmpnlt_ps(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpnltps(__a, __b);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cmpnle_ss(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpnless(__a, __b);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cmpnle_ps(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpnleps(__a, __b);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cmpngt_ss(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_shufflevector(__a,
@@ -287,13 +286,13 @@ _mm_cmpngt_ss(__m128 __a, __m128 __b)
                                          4, 1, 2, 3);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cmpngt_ps(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpnltps(__b, __a);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cmpnge_ss(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_shufflevector(__a,
@@ -301,115 +300,115 @@ _mm_cmpnge_ss(__m128 __a, __m128 __b)
                                          4, 1, 2, 3);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cmpnge_ps(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpnleps(__b, __a);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cmpord_ss(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpordss(__a, __b);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cmpord_ps(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpordps(__a, __b);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cmpunord_ss(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpunordss(__a, __b);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cmpunord_ps(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpunordps(__a, __b);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_comieq_ss(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_comieq(__a, __b);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_comilt_ss(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_comilt(__a, __b);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_comile_ss(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_comile(__a, __b);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_comigt_ss(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_comigt(__a, __b);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_comige_ss(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_comige(__a, __b);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_comineq_ss(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_comineq(__a, __b);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_ucomieq_ss(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_ucomieq(__a, __b);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_ucomilt_ss(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_ucomilt(__a, __b);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_ucomile_ss(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_ucomile(__a, __b);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_ucomigt_ss(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_ucomigt(__a, __b);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_ucomige_ss(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_ucomige(__a, __b);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_ucomineq_ss(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_ucomineq(__a, __b);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_cvtss_si32(__m128 __a)
 {
   return __builtin_ia32_cvtss2si(__a);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_cvt_ss2si(__m128 __a)
 {
   return _mm_cvtss_si32(__a);
@@ -417,7 +416,7 @@ _mm_cvt_ss2si(__m128 __a)
 
 #ifdef __x86_64__
 
-static __inline__ long long __attribute__((__always_inline__, __nodebug__))
+static __inline__ long long DEFAULT_FN_ATTRS
 _mm_cvtss_si64(__m128 __a)
 {
   return __builtin_ia32_cvtss2si64(__a);
@@ -425,56 +424,56 @@ _mm_cvtss_si64(__m128 __a)
 
 #endif
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_cvtps_pi32(__m128 __a)
 {
   return (__m64)__builtin_ia32_cvtps2pi(__a);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_cvt_ps2pi(__m128 __a)
 {
   return _mm_cvtps_pi32(__a);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_cvttss_si32(__m128 __a)
 {
   return __a[0];
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_cvtt_ss2si(__m128 __a)
 {
   return _mm_cvttss_si32(__a);
 }
 
-static __inline__ long long __attribute__((__always_inline__, __nodebug__))
+static __inline__ long long DEFAULT_FN_ATTRS
 _mm_cvttss_si64(__m128 __a)
 {
   return __a[0];
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_cvttps_pi32(__m128 __a)
 {
   return (__m64)__builtin_ia32_cvttps2pi(__a);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_cvtt_ps2pi(__m128 __a)
 {
   return _mm_cvttps_pi32(__a);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cvtsi32_ss(__m128 __a, int __b)
 {
   __a[0] = __b;
   return __a;
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cvt_si2ss(__m128 __a, int __b)
 {
   return _mm_cvtsi32_ss(__a, __b);
@@ -482,7 +481,7 @@ _mm_cvt_si2ss(__m128 __a, int __b)
 
 #ifdef __x86_64__
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cvtsi64_ss(__m128 __a, long long __b)
 {
   __a[0] = __b;
@@ -491,25 +490,25 @@ _mm_cvtsi64_ss(__m128 __a, long long __b)
 
 #endif
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cvtpi32_ps(__m128 __a, __m64 __b)
 {
   return __builtin_ia32_cvtpi2ps(__a, (__v2si)__b);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cvt_pi2ps(__m128 __a, __m64 __b)
 {
   return _mm_cvtpi32_ps(__a, __b);
 }
 
-static __inline__ float __attribute__((__always_inline__, __nodebug__))
+static __inline__ float DEFAULT_FN_ATTRS
 _mm_cvtss_f32(__m128 __a)
 {
   return __a[0];
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_loadh_pi(__m128 __a, const __m64 *__p)
 {
   typedef float __mm_loadh_pi_v2f32 __attribute__((__vector_size__(8)));
@@ -521,7 +520,7 @@ _mm_loadh_pi(__m128 __a, const __m64 *__p)
   return __builtin_shufflevector(__a, __bb, 0, 1, 4, 5);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_loadl_pi(__m128 __a, const __m64 *__p)
 {
   typedef float __mm_loadl_pi_v2f32 __attribute__((__vector_size__(8)));
@@ -533,7 +532,7 @@ _mm_loadl_pi(__m128 __a, const __m64 *__p)
   return __builtin_shufflevector(__a, __bb, 4, 5, 2, 3);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_load_ss(const float *__p)
 {
   struct __mm_load_ss_struct {
@@ -543,7 +542,7 @@ _mm_load_ss(const float *__p)
   return (__m128){ __u, 0, 0, 0 };
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_load1_ps(const float *__p)
 {
   struct __mm_load1_ps_struct {
@@ -555,13 +554,13 @@ _mm_load1_ps(const float *__p)
 
 #define        _mm_load_ps1(p) _mm_load1_ps(p)
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_load_ps(const float *__p)
 {
   return *(__m128*)__p;
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_loadu_ps(const float *__p)
 {
   struct __loadu_ps {
@@ -570,63 +569,63 @@ _mm_loadu_ps(const float *__p)
   return ((struct __loadu_ps*)__p)->__v;
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_loadr_ps(const float *__p)
 {
   __m128 __a = _mm_load_ps(__p);
   return __builtin_shufflevector(__a, __a, 3, 2, 1, 0);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_set_ss(float __w)
 {
   return (__m128){ __w, 0, 0, 0 };
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_set1_ps(float __w)
 {
   return (__m128){ __w, __w, __w, __w };
 }
 
 /* Microsoft specific. */
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_set_ps1(float __w)
 {
     return _mm_set1_ps(__w);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_set_ps(float __z, float __y, float __x, float __w)
 {
   return (__m128){ __w, __x, __y, __z };
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_setr_ps(float __z, float __y, float __x, float __w)
 {
   return (__m128){ __z, __y, __x, __w };
 }
 
-static __inline__ __m128 __attribute__((__always_inline__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_setzero_ps(void)
 {
   return (__m128){ 0, 0, 0, 0 };
 }
 
-static __inline__ void __attribute__((__always_inline__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_storeh_pi(__m64 *__p, __m128 __a)
 {
   __builtin_ia32_storehps((__v2si *)__p, __a);
 }
 
-static __inline__ void __attribute__((__always_inline__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_storel_pi(__m64 *__p, __m128 __a)
 {
   __builtin_ia32_storelps((__v2si *)__p, __a);
 }
 
-static __inline__ void __attribute__((__always_inline__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_store_ss(float *__p, __m128 __a)
 {
   struct __mm_store_ss_struct {
@@ -635,32 +634,32 @@ _mm_store_ss(float *__p, __m128 __a)
   ((struct __mm_store_ss_struct*)__p)->__u = __a[0];
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_storeu_ps(float *__p, __m128 __a)
 {
   __builtin_ia32_storeups(__p, __a);
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_store1_ps(float *__p, __m128 __a)
 {
   __a = __builtin_shufflevector(__a, __a, 0, 0, 0, 0);
   _mm_storeu_ps(__p, __a);
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_store_ps1(float *__p, __m128 __a)
 {
     return _mm_store1_ps(__p, __a);
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_store_ps(float *__p, __m128 __a)
 {
   *(__m128 *)__p = __a;
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_storer_ps(float *__p, __m128 __a)
 {
   __a = __builtin_shufflevector(__a, __a, 3, 2, 1, 0);
@@ -679,32 +678,32 @@ _mm_storer_ps(float *__p, __m128 __a)
 #define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), 0, (sel)))
 #endif
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_stream_pi(__m64 *__p, __m64 __a)
 {
   __builtin_ia32_movntq(__p, __a);
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_stream_ps(float *__p, __m128 __a)
 {
   __builtin_ia32_movntps(__p, __a);
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_sfence(void)
 {
   __builtin_ia32_sfence();
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_extract_pi16(__m64 __a, int __n)
 {
   __v4hi __b = (__v4hi)__a;
   return (unsigned short)__b[__n & 3];
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_insert_pi16(__m64 __a, int __d, int __n)
 {
    __v4hi __b = (__v4hi)__a;
@@ -712,37 +711,37 @@ _mm_insert_pi16(__m64 __a, int __d, int __n)
    return (__m64)__b;
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_max_pi16(__m64 __a, __m64 __b)
 {
   return (__m64)__builtin_ia32_pmaxsw((__v4hi)__a, (__v4hi)__b);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_max_pu8(__m64 __a, __m64 __b)
 {
   return (__m64)__builtin_ia32_pmaxub((__v8qi)__a, (__v8qi)__b);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_min_pi16(__m64 __a, __m64 __b)
 {
   return (__m64)__builtin_ia32_pminsw((__v4hi)__a, (__v4hi)__b);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_min_pu8(__m64 __a, __m64 __b)
 {
   return (__m64)__builtin_ia32_pminub((__v8qi)__a, (__v8qi)__b);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_movemask_pi8(__m64 __a)
 {
   return __builtin_ia32_pmovmskb((__v8qi)__a);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_mulhi_pu16(__m64 __a, __m64 __b)
 {
   return (__m64)__builtin_ia32_pmulhuw((__v4hi)__a, (__v4hi)__b);
@@ -752,37 +751,37 @@ _mm_mulhi_pu16(__m64 __a, __m64 __b)
   __m64 __a = (a); \
   (__m64)__builtin_ia32_pshufw((__v4hi)__a, (n)); })
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_maskmove_si64(__m64 __d, __m64 __n, char *__p)
 {
   __builtin_ia32_maskmovq((__v8qi)__d, (__v8qi)__n, __p);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_avg_pu8(__m64 __a, __m64 __b)
 {
   return (__m64)__builtin_ia32_pavgb((__v8qi)__a, (__v8qi)__b);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_avg_pu16(__m64 __a, __m64 __b)
 {
   return (__m64)__builtin_ia32_pavgw((__v4hi)__a, (__v4hi)__b);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_sad_pu8(__m64 __a, __m64 __b)
 {
   return (__m64)__builtin_ia32_psadbw((__v8qi)__a, (__v8qi)__b);
 }
 
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned int DEFAULT_FN_ATTRS
 _mm_getcsr(void)
 {
   return __builtin_ia32_stmxcsr();
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void DEFAULT_FN_ATTRS
 _mm_setcsr(unsigned int __i)
 {
   __builtin_ia32_ldmxcsr(__i);
@@ -796,37 +795,37 @@ _mm_setcsr(unsigned int __i)
                                   (((mask) & 0x30) >> 4) + 4, \
                                   (((mask) & 0xc0) >> 6) + 4); })
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_unpackhi_ps(__m128 __a, __m128 __b)
 {
   return __builtin_shufflevector(__a, __b, 2, 6, 3, 7);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_unpacklo_ps(__m128 __a, __m128 __b)
 {
   return __builtin_shufflevector(__a, __b, 0, 4, 1, 5);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_move_ss(__m128 __a, __m128 __b)
 {
   return __builtin_shufflevector(__a, __b, 4, 1, 2, 3);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_movehl_ps(__m128 __a, __m128 __b)
 {
   return __builtin_shufflevector(__a, __b, 6, 7, 2, 3);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_movelh_ps(__m128 __a, __m128 __b)
 {
   return __builtin_shufflevector(__a, __b, 0, 1, 4, 5);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cvtpi16_ps(__m64 __a)
 {
   __m64 __b, __c;
@@ -844,7 +843,7 @@ _mm_cvtpi16_ps(__m64 __a)
   return __r;
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cvtpu16_ps(__m64 __a)
 {
   __m64 __b, __c;
@@ -861,7 +860,7 @@ _mm_cvtpu16_ps(__m64 __a)
   return __r;
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cvtpi8_ps(__m64 __a)
 {
   __m64 __b;
@@ -873,7 +872,7 @@ _mm_cvtpi8_ps(__m64 __a)
   return _mm_cvtpi16_ps(__b);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cvtpu8_ps(__m64 __a)
 {
   __m64 __b;
@@ -884,7 +883,7 @@ _mm_cvtpu8_ps(__m64 __a)
   return _mm_cvtpi16_ps(__b);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_cvtpi32x2_ps(__m64 __a, __m64 __b)
 {
   __m128 __c;
@@ -896,7 +895,7 @@ _mm_cvtpi32x2_ps(__m64 __a, __m64 __b)
   return _mm_cvtpi32_ps(__c, __a);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_cvtps_pi16(__m128 __a)
 {
   __m64 __b, __c;
@@ -908,7 +907,7 @@ _mm_cvtps_pi16(__m128 __a)
   return _mm_packs_pi32(__b, __c);
 }
 
-static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m64 DEFAULT_FN_ATTRS
 _mm_cvtps_pi8(__m128 __a)
 {
   __m64 __b, __c;
@@ -919,7 +918,7 @@ _mm_cvtps_pi8(__m128 __a)
   return _mm_packs_pi16(__b, __c);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int DEFAULT_FN_ATTRS
 _mm_movemask_ps(__m128 __a)
 {
   return __builtin_ia32_movmskps(__a);
@@ -993,11 +992,11 @@ do { \
 #define _m_ _mm_
 #define _m_ _mm_
 
+#undef DEFAULT_FN_ATTRS
+
 /* Ugly hack for backwards-compatibility (compatible with gcc) */
 #if defined(__SSE2__) && !__has_feature(modules)
 #include <emmintrin.h>
 #endif
 
-#endif /* __SSE__ */
-
 #endif /* __XMMINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/xopintrin.h b/contrib/llvm/tools/clang/lib/Headers/xopintrin.h
index cc94ca0..8417d78 100644
--- a/contrib/llvm/tools/clang/lib/Headers/xopintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/xopintrin.h
@@ -28,211 +28,210 @@
 #ifndef __XOPINTRIN_H
 #define __XOPINTRIN_H
 
-#ifndef __XOP__
-# error "XOP instruction set is not enabled"
-#else
-
 #include <fma4intrin.h>
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+/* Define the default attributes for the functions in this file. */
+#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xop")))
+
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_vpmacswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_vpmacssdd((__v4si)__A, (__v4si)__B, (__v4si)__C);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_vpmacsdd((__v4si)__A, (__v4si)__B, (__v4si)__C);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_vpmacssdql((__v4si)__A, (__v4si)__B, (__v2di)__C);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_vpmacsdql((__v4si)__A, (__v4si)__B, (__v2di)__C);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_vpmacssdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_vpmacsdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_vpmadcsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_haddw_epi8(__m128i __A)
 {
   return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_haddd_epi8(__m128i __A)
 {
   return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_haddq_epi8(__m128i __A)
 {
   return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_haddd_epi16(__m128i __A)
 {
   return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_haddq_epi16(__m128i __A)
 {
   return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_haddq_epi32(__m128i __A)
 {
   return (__m128i)__builtin_ia32_vphadddq((__v4si)__A);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_haddw_epu8(__m128i __A)
 {
   return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_haddd_epu8(__m128i __A)
 {
   return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_haddq_epu8(__m128i __A)
 {
   return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_haddd_epu16(__m128i __A)
 {
   return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_haddq_epu16(__m128i __A)
 {
   return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_haddq_epu32(__m128i __A)
 {
   return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_hsubw_epi8(__m128i __A)
 {
   return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_hsubd_epi16(__m128i __A)
 {
   return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_hsubq_epi32(__m128i __A)
 {
   return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_vpcmov(__A, __B, __C);
 }
 
-static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256i DEFAULT_FN_ATTRS
 _mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C)
 {
   return (__m256i)__builtin_ia32_vpcmov_256(__A, __B, __C);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_rot_epi8(__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_rot_epi16(__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_rot_epi32(__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_rot_epi64(__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B);
@@ -254,49 +253,49 @@ _mm_rot_epi64(__m128i __A, __m128i __B)
   __m128i __A = (A); \
   (__m128i)__builtin_ia32_vprotqi((__v2di)__A, (N)); })
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_shl_epi8(__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_vpshlb((__v16qi)__A, (__v16qi)__B);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_shl_epi16(__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_vpshlw((__v8hi)__A, (__v8hi)__B);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_shl_epi32(__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_vpshld((__v4si)__A, (__v4si)__B);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_shl_epi64(__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_vpshlq((__v2di)__A, (__v2di)__B);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_sha_epi8(__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_vpshab((__v16qi)__A, (__v16qi)__B);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_sha_epi16(__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_vpshaw((__v8hi)__A, (__v8hi)__B);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_sha_epi32(__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_vpshad((__v4si)__A, (__v4si)__B);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_sha_epi64(__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B);
@@ -351,385 +350,385 @@ _mm_sha_epi64(__m128i __A, __m128i __B)
 #define _MM_PCOMCTRL_FALSE 6
 #define _MM_PCOMCTRL_TRUE  7
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comlt_epu8(__m128i __A, __m128i __B)
 {
   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LT);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comle_epu8(__m128i __A, __m128i __B)
 {
   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LE);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comgt_epu8(__m128i __A, __m128i __B)
 {
   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GT);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comge_epu8(__m128i __A, __m128i __B)
 {
   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GE);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comeq_epu8(__m128i __A, __m128i __B)
 {
   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_EQ);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comneq_epu8(__m128i __A, __m128i __B)
 {
   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_NEQ);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comfalse_epu8(__m128i __A, __m128i __B)
 {
   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_FALSE);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comtrue_epu8(__m128i __A, __m128i __B)
 {
   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_TRUE);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comlt_epu16(__m128i __A, __m128i __B)
 {
   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LT);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comle_epu16(__m128i __A, __m128i __B)
 {
   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LE);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comgt_epu16(__m128i __A, __m128i __B)
 {
   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GT);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comge_epu16(__m128i __A, __m128i __B)
 {
   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GE);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comeq_epu16(__m128i __A, __m128i __B)
 {
   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_EQ);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comneq_epu16(__m128i __A, __m128i __B)
 {
   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_NEQ);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comfalse_epu16(__m128i __A, __m128i __B)
 {
   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_FALSE);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comtrue_epu16(__m128i __A, __m128i __B)
 {
   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_TRUE);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comlt_epu32(__m128i __A, __m128i __B)
 {
   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LT);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comle_epu32(__m128i __A, __m128i __B)
 {
   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LE);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comgt_epu32(__m128i __A, __m128i __B)
 {
   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GT);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comge_epu32(__m128i __A, __m128i __B)
 {
   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GE);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comeq_epu32(__m128i __A, __m128i __B)
 {
   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_EQ);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comneq_epu32(__m128i __A, __m128i __B)
 {
   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_NEQ);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comfalse_epu32(__m128i __A, __m128i __B)
 {
   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_FALSE);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comtrue_epu32(__m128i __A, __m128i __B)
 {
   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_TRUE);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comlt_epu64(__m128i __A, __m128i __B)
 {
   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LT);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comle_epu64(__m128i __A, __m128i __B)
 {
   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LE);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comgt_epu64(__m128i __A, __m128i __B)
 {
   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GT);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comge_epu64(__m128i __A, __m128i __B)
 {
   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GE);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comeq_epu64(__m128i __A, __m128i __B)
 {
   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_EQ);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comneq_epu64(__m128i __A, __m128i __B)
 {
   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_NEQ);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comfalse_epu64(__m128i __A, __m128i __B)
 {
   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_FALSE);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comtrue_epu64(__m128i __A, __m128i __B)
 {
   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_TRUE);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comlt_epi8(__m128i __A, __m128i __B)
 {
   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LT);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comle_epi8(__m128i __A, __m128i __B)
 {
   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LE);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comgt_epi8(__m128i __A, __m128i __B)
 {
   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GT);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comge_epi8(__m128i __A, __m128i __B)
 {
   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GE);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comeq_epi8(__m128i __A, __m128i __B)
 {
   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_EQ);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comneq_epi8(__m128i __A, __m128i __B)
 {
   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_NEQ);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comfalse_epi8(__m128i __A, __m128i __B)
 {
   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_FALSE);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comtrue_epi8(__m128i __A, __m128i __B)
 {
   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_TRUE);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comlt_epi16(__m128i __A, __m128i __B)
 {
   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LT);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comle_epi16(__m128i __A, __m128i __B)
 {
   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LE);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comgt_epi16(__m128i __A, __m128i __B)
 {
   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GT);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comge_epi16(__m128i __A, __m128i __B)
 {
   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GE);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comeq_epi16(__m128i __A, __m128i __B)
 {
   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_EQ);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comneq_epi16(__m128i __A, __m128i __B)
 {
   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_NEQ);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comfalse_epi16(__m128i __A, __m128i __B)
 {
   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_FALSE);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comtrue_epi16(__m128i __A, __m128i __B)
 {
   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_TRUE);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comlt_epi32(__m128i __A, __m128i __B)
 {
   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LT);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comle_epi32(__m128i __A, __m128i __B)
 {
   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LE);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comgt_epi32(__m128i __A, __m128i __B)
 {
   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GT);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comge_epi32(__m128i __A, __m128i __B)
 {
   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GE);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comeq_epi32(__m128i __A, __m128i __B)
 {
   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_EQ);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comneq_epi32(__m128i __A, __m128i __B)
 {
   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_NEQ);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comfalse_epi32(__m128i __A, __m128i __B)
 {
   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_FALSE);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comtrue_epi32(__m128i __A, __m128i __B)
 {
   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_TRUE);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comlt_epi64(__m128i __A, __m128i __B)
 {
   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LT);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comle_epi64(__m128i __A, __m128i __B)
 {
   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LE);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comgt_epi64(__m128i __A, __m128i __B)
 {
   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GT);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comge_epi64(__m128i __A, __m128i __B)
 {
   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GE);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comeq_epi64(__m128i __A, __m128i __B)
 {
   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_EQ);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comneq_epi64(__m128i __A, __m128i __B)
 {
   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_NEQ);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comfalse_epi64(__m128i __A, __m128i __B)
 {
   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_FALSE);
 }
 
-static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128i DEFAULT_FN_ATTRS
 _mm_comtrue_epi64(__m128i __A, __m128i __B)
 {
   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE);
@@ -763,42 +762,42 @@ _mm_comtrue_epi64(__m128i __A, __m128i __B)
   (__m256)__builtin_ia32_vpermil2ps256((__v8sf)__X, (__v8sf)__Y, \
                                        (__v8si)__C, (I)); })
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_frcz_ss(__m128 __A)
 {
   return (__m128)__builtin_ia32_vfrczss((__v4sf)__A);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_frcz_sd(__m128d __A)
 {
   return (__m128d)__builtin_ia32_vfrczsd((__v2df)__A);
 }
 
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128 DEFAULT_FN_ATTRS
 _mm_frcz_ps(__m128 __A)
 {
   return (__m128)__builtin_ia32_vfrczps((__v4sf)__A);
 }
 
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m128d DEFAULT_FN_ATTRS
 _mm_frcz_pd(__m128d __A)
 {
   return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A);
 }
 
-static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256 DEFAULT_FN_ATTRS
 _mm256_frcz_ps(__m256 __A)
 {
   return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A);
 }
 
-static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
+static __inline__ __m256d DEFAULT_FN_ATTRS
 _mm256_frcz_pd(__m256d __A)
 {
   return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A);
 }
 
-#endif /* __XOP__ */
+#undef DEFAULT_FN_ATTRS
 
 #endif /* __XOPINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/xtestintrin.h b/contrib/llvm/tools/clang/lib/Headers/xtestintrin.h
new file mode 100644
index 0000000..9d3378f
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/xtestintrin.h
@@ -0,0 +1,41 @@
+/*===---- xtestintrin.h - XTEST intrinsic ---------------------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __IMMINTRIN_H
+#error "Never use <xtestintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __XTESTINTRIN_H
+#define __XTESTINTRIN_H
+
+/* xtest returns non-zero if the instruction is executed within an RTM or active
+ * HLE region. */
+/* FIXME: This can be an either or for RTM/HLE. Deal with this when HLE is
+ * supported. */
+static __inline__ int
+    __attribute__((__always_inline__, __nodebug__, __target__("rtm")))
+    _xtest(void) {
+  return __builtin_ia32_xtest();
+}
+
+#endif
diff --git a/contrib/llvm/tools/clang/lib/Lex/HeaderSearch.cpp b/contrib/llvm/tools/clang/lib/Lex/HeaderSearch.cpp
index ad7d344..7a98f54 100644
--- a/contrib/llvm/tools/clang/lib/Lex/HeaderSearch.cpp
+++ b/contrib/llvm/tools/clang/lib/Lex/HeaderSearch.cpp
@@ -158,7 +158,7 @@ std::string HeaderSearch::getModuleFileName(StringRef ModuleName,
 Module *HeaderSearch::lookupModule(StringRef ModuleName, bool AllowSearch) {
   // Look in the module map to determine if there is a module by this name.
   Module *Module = ModMap.findModule(ModuleName);
-  if (Module || !AllowSearch || !LangOpts.ModulesImplicitMaps)
+  if (Module || !AllowSearch || !HSOpts->ImplicitModuleMaps)
     return Module;
   
   // Look through the various header search paths to load any available module
@@ -1076,7 +1076,7 @@ StringRef HeaderSearch::getUniqueFrameworkName(StringRef Framework) {
 bool HeaderSearch::hasModuleMap(StringRef FileName, 
                                 const DirectoryEntry *Root,
                                 bool IsSystem) {
-  if (!HSOpts->ModuleMaps || !LangOpts.ModulesImplicitMaps)
+  if (!HSOpts->ImplicitModuleMaps)
     return false;
 
   SmallVector<const DirectoryEntry *, 2> FixUpDirectories;
@@ -1203,7 +1203,7 @@ HeaderSearch::loadModuleMapFileImpl(const FileEntry *File, bool IsSystem,
 
 const FileEntry *
 HeaderSearch::lookupModuleMapFile(const DirectoryEntry *Dir, bool IsFramework) {
-  if (!LangOpts.ModulesImplicitMaps)
+  if (!HSOpts->ImplicitModuleMaps)
     return nullptr;
   // For frameworks, the preferred spelling is Modules/module.modulemap, but
   // module.map at the framework root is also accepted.
@@ -1241,7 +1241,7 @@ Module *HeaderSearch::loadFrameworkModule(StringRef Name,
 
 
   // Try to infer a module map from the framework directory.
-  if (LangOpts.ModulesImplicitMaps)
+  if (HSOpts->ImplicitModuleMaps)
     return ModMap.inferFrameworkModule(Name, Dir, IsSystem, /*Parent=*/nullptr);
 
   return nullptr;
@@ -1282,7 +1282,7 @@ HeaderSearch::loadModuleMapFile(const DirectoryEntry *Dir, bool IsSystem,
 void HeaderSearch::collectAllModules(SmallVectorImpl<Module *> &Modules) {
   Modules.clear();
 
-  if (LangOpts.ModulesImplicitMaps) {
+  if (HSOpts->ImplicitModuleMaps) {
     // Load module maps for each of the header search directories.
     for (unsigned Idx = 0, N = SearchDirs.size(); Idx != N; ++Idx) {
       bool IsSystem = SearchDirs[Idx].isSystemHeaderDirectory();
@@ -1333,7 +1333,7 @@ void HeaderSearch::collectAllModules(SmallVectorImpl<Module *> &Modules) {
 }
 
 void HeaderSearch::loadTopLevelSystemModules() {
-  if (!LangOpts.ModulesImplicitMaps)
+  if (!HSOpts->ImplicitModuleMaps)
     return;
 
   // Load module maps for each of the header search directories.
@@ -1351,7 +1351,7 @@ void HeaderSearch::loadTopLevelSystemModules() {
 }
 
 void HeaderSearch::loadSubdirectoryModuleMaps(DirectoryLookup &SearchDir) {
-  assert(LangOpts.ModulesImplicitMaps &&
+  assert(HSOpts->ImplicitModuleMaps &&
          "Should not be loading subdirectory module maps");
 
   if (SearchDir.haveSearchedAllModuleMaps())
diff --git a/contrib/llvm/tools/clang/lib/Lex/ModuleMap.cpp b/contrib/llvm/tools/clang/lib/Lex/ModuleMap.cpp
index addad59..6c98d01 100644
--- a/contrib/llvm/tools/clang/lib/Lex/ModuleMap.cpp
+++ b/contrib/llvm/tools/clang/lib/Lex/ModuleMap.cpp
@@ -165,7 +165,8 @@ static bool isBuiltinHeader(StringRef FileName) {
 ModuleMap::HeadersMap::iterator
 ModuleMap::findKnownHeader(const FileEntry *File) {
   HeadersMap::iterator Known = Headers.find(File);
-  if (Known == Headers.end() && File->getDir() == BuiltinIncludeDir &&
+  if (HeaderInfo.getHeaderSearchOpts().ImplicitModuleMaps &&
+      Known == Headers.end() && File->getDir() == BuiltinIncludeDir &&
       isBuiltinHeader(llvm::sys::path::filename(File->getName()))) {
     HeaderInfo.loadTopLevelSystemModules();
     return Headers.find(File);
@@ -176,6 +177,9 @@ ModuleMap::findKnownHeader(const FileEntry *File) {
 ModuleMap::KnownHeader
 ModuleMap::findHeaderInUmbrellaDirs(const FileEntry *File,
                     SmallVectorImpl<const DirectoryEntry *> &IntermediateDirs) {
+  if (UmbrellaDirs.empty())
+    return KnownHeader();
+
   const DirectoryEntry *Dir = File->getDir();
   assert(Dir && "file in no directory");
 
@@ -330,41 +334,23 @@ static bool isBetterKnownHeader(const ModuleMap::KnownHeader &New,
   return false;
 }
 
-ModuleMap::KnownHeader
-ModuleMap::findModuleForHeader(const FileEntry *File,
-                               Module *RequestingModule) {
-  HeadersMap::iterator Known = findKnownHeader(File);
-
+ModuleMap::KnownHeader ModuleMap::findModuleForHeader(const FileEntry *File) {
   auto MakeResult = [&](ModuleMap::KnownHeader R) -> ModuleMap::KnownHeader {
     if (R.getRole() & ModuleMap::TextualHeader)
       return ModuleMap::KnownHeader();
     return R;
   };
 
+  HeadersMap::iterator Known = findKnownHeader(File);
   if (Known != Headers.end()) {
     ModuleMap::KnownHeader Result;
-
     // Iterate over all modules that 'File' is part of to find the best fit.
-    for (SmallVectorImpl<KnownHeader>::iterator I = Known->second.begin(),
-                                                E = Known->second.end();
-         I != E; ++I) {
+    for (KnownHeader &H : Known->second) {
       // Cannot use a module if it is unavailable.
-      if (!I->getModule()->isAvailable())
+      if (!H.getModule()->isAvailable())
         continue;
-
-      // If 'File' is part of 'RequestingModule', 'RequestingModule' is the
-      // module we are looking for.
-      if (I->getModule() == RequestingModule)
-        return MakeResult(*I);
-
-      // If uses need to be specified explicitly, we are only allowed to return
-      // modules that are explicitly used by the requesting module.
-      if (RequestingModule && LangOpts.ModulesDeclUse &&
-          !RequestingModule->directlyUses(I->getModule()))
-        continue;
-
-      if (!Result || isBetterKnownHeader(*I, Result))
-        Result = *I;
+      if (!Result || isBetterKnownHeader(H, Result))
+        Result = H;
     }
     return MakeResult(Result);
   }
diff --git a/contrib/llvm/tools/clang/lib/Lex/PPDirectives.cpp b/contrib/llvm/tools/clang/lib/Lex/PPDirectives.cpp
index ec06e79..33ce799 100644
--- a/contrib/llvm/tools/clang/lib/Lex/PPDirectives.cpp
+++ b/contrib/llvm/tools/clang/lib/Lex/PPDirectives.cpp
@@ -1575,6 +1575,15 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
     PragmaARCCFCodeAuditedLoc = SourceLocation();
   }
 
+  // Complain about attempts to #include files in an assume-nonnull pragma.
+  if (PragmaAssumeNonNullLoc.isValid()) {
+    Diag(HashLoc, diag::err_pp_include_in_assume_nonnull);
+    Diag(PragmaAssumeNonNullLoc, diag::note_pragma_entered_here);
+
+    // Immediately leave the pragma.
+    PragmaAssumeNonNullLoc = SourceLocation();
+  }
+
   if (HeaderInfo.HasIncludeAliasMap()) {
     // Map the filename with the brackets still attached.  If the name doesn't 
     // map to anything, fall back on the filename we've already gotten the 
@@ -1603,7 +1612,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
       FilenameLoc, LangOpts.MSVCCompat ? NormalizedPath.c_str() : Filename,
       isAngled, LookupFrom, LookupFromFile, CurDir,
       Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr,
-      HeaderInfo.getHeaderSearchOpts().ModuleMaps ? &SuggestedModule : nullptr);
+      &SuggestedModule);
 
   if (!File) {
     if (Callbacks) {
@@ -1620,9 +1629,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
               FilenameLoc,
               LangOpts.MSVCCompat ? NormalizedPath.c_str() : Filename, isAngled,
               LookupFrom, LookupFromFile, CurDir, nullptr, nullptr,
-              HeaderInfo.getHeaderSearchOpts().ModuleMaps ? &SuggestedModule
-                                                          : nullptr,
-              /*SkipCache*/ true);
+              &SuggestedModule, /*SkipCache*/ true);
         }
       }
     }
@@ -1638,8 +1645,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
             LookupFrom, LookupFromFile, CurDir,
             Callbacks ? &SearchPath : nullptr,
             Callbacks ? &RelativePath : nullptr,
-            HeaderInfo.getHeaderSearchOpts().ModuleMaps ? &SuggestedModule
-                                                        : nullptr);
+            &SuggestedModule);
         if (File) {
           SourceRange Range(FilenameTok.getLocation(), CharEnd);
           Diag(FilenameTok, diag::err_pp_file_not_found_not_fatal) << 
@@ -2012,8 +2018,8 @@ static bool isConfigurationPattern(Token &MacroName, MacroInfo *MI,
   }
 
   // #define inline
-  if ((MacroName.is(tok::kw_extern) || MacroName.is(tok::kw_inline) ||
-       MacroName.is(tok::kw_static) || MacroName.is(tok::kw_const)) &&
+  if (MacroName.isOneOf(tok::kw_extern, tok::kw_inline, tok::kw_static,
+                        tok::kw_const) &&
       MI->getNumTokens() == 0) {
     return true;
   }
diff --git a/contrib/llvm/tools/clang/lib/Lex/PPLexerChange.cpp b/contrib/llvm/tools/clang/lib/Lex/PPLexerChange.cpp
index e68fb7d..1a35d32 100644
--- a/contrib/llvm/tools/clang/lib/Lex/PPLexerChange.cpp
+++ b/contrib/llvm/tools/clang/lib/Lex/PPLexerChange.cpp
@@ -355,6 +355,17 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) {
     PragmaARCCFCodeAuditedLoc = SourceLocation();
   }
 
+  // Complain about reaching a true EOF within assume_nonnull.
+  // We don't want to complain about reaching the end of a macro
+  // instantiation or a _Pragma.
+  if (PragmaAssumeNonNullLoc.isValid() &&
+      !isEndOfMacro && !(CurLexer && CurLexer->Is_PragmaLexer)) {
+    Diag(PragmaAssumeNonNullLoc, diag::err_pp_eof_in_assume_nonnull);
+
+    // Recover by leaving immediately.
+    PragmaAssumeNonNullLoc = SourceLocation();
+  }
+
   // If this is a #include'd file, pop it off the include stack and continue
   // lexing the #includer file.
   if (!IncludeMacroStack.empty()) {
diff --git a/contrib/llvm/tools/clang/lib/Lex/PPMacroExpansion.cpp b/contrib/llvm/tools/clang/lib/Lex/PPMacroExpansion.cpp
index 03784e2..5e0b283 100644
--- a/contrib/llvm/tools/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/contrib/llvm/tools/clang/lib/Lex/PPMacroExpansion.cpp
@@ -726,10 +726,10 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
 
   unsigned NumActuals = 0;
   while (Tok.isNot(tok::r_paren)) {
-    if (ContainsCodeCompletionTok && (Tok.is(tok::eof) || Tok.is(tok::eod)))
+    if (ContainsCodeCompletionTok && Tok.isOneOf(tok::eof, tok::eod))
       break;
 
-    assert((Tok.is(tok::l_paren) || Tok.is(tok::comma)) &&
+    assert(Tok.isOneOf(tok::l_paren, tok::comma) &&
            "only expect argument separators here");
 
     unsigned ArgTokenStart = ArgTokens.size();
@@ -744,7 +744,7 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
       // an argument value in a macro could expand to ',' or '(' or ')'.
       LexUnexpandedToken(Tok);
 
-      if (Tok.is(tok::eof) || Tok.is(tok::eod)) { // "#if f(<eof>" & "#if f(\n"
+      if (Tok.isOneOf(tok::eof, tok::eod)) { // "#if f(<eof>" & "#if f(\n"
         if (!ContainsCodeCompletionTok) {
           Diag(MacroName, diag::err_unterm_macro_invoc);
           Diag(MI->getDefinitionLoc(), diag::note_macro_here)
@@ -1049,13 +1049,17 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {
     Feature = Feature.substr(2, Feature.size() - 4);
 
   return llvm::StringSwitch<bool>(Feature)
-      .Case("address_sanitizer", LangOpts.Sanitize.has(SanitizerKind::Address))
+      .Case("address_sanitizer",
+            LangOpts.Sanitize.hasOneOf(SanitizerKind::Address |
+                                       SanitizerKind::KernelAddress))
+      .Case("assume_nonnull", LangOpts.ObjC1 || LangOpts.GNUMode)
       .Case("attribute_analyzer_noreturn", true)
       .Case("attribute_availability", true)
       .Case("attribute_availability_with_message", true)
       .Case("attribute_availability_app_extension", true)
       .Case("attribute_cf_returns_not_retained", true)
       .Case("attribute_cf_returns_retained", true)
+      .Case("attribute_cf_returns_on_parameters", true)
       .Case("attribute_deprecated_with_message", true)
       .Case("attribute_ext_vector_type", true)
       .Case("attribute_ns_returns_not_retained", true)
@@ -1073,6 +1077,7 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {
       .Case("cxx_exceptions", LangOpts.CXXExceptions)
       .Case("cxx_rtti", LangOpts.RTTI)
       .Case("enumerator_attributes", true)
+      .Case("nullability", LangOpts.ObjC1 || LangOpts.GNUMode)
       .Case("memory_sanitizer", LangOpts.Sanitize.has(SanitizerKind::Memory))
       .Case("thread_sanitizer", LangOpts.Sanitize.has(SanitizerKind::Thread))
       .Case("dataflow_sanitizer", LangOpts.Sanitize.has(SanitizerKind::DataFlow))
@@ -1190,6 +1195,7 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {
       .Case("is_trivially_copyable", LangOpts.CPlusPlus)
       .Case("is_union", LangOpts.CPlusPlus)
       .Case("modules", LangOpts.Modules)
+      .Case("safe_stack", LangOpts.Sanitize.has(SanitizerKind::SafeStack))
       .Case("tls", PP.getTargetInfo().isTLSSupported())
       .Case("underlying_type", LangOpts.CPlusPlus)
       .Default(false);
@@ -1219,6 +1225,7 @@ static bool HasExtension(const Preprocessor &PP, const IdentifierInfo *II) {
   // Because we inherit the feature list from HasFeature, this string switch
   // must be less restrictive than HasFeature's.
   return llvm::StringSwitch<bool>(Extension)
+           .Case("nullability", true)
            // C11 features supported by other languages as extensions.
            .Case("c_alignas", true)
            .Case("c_alignof", true)
@@ -1746,7 +1753,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
       Diag(Tok.getLocation(), diag::err_pp_identifier_arg_not_identifier)
         << Tok.getKind();
       // Don't walk past anything that's not a real token.
-      if (Tok.is(tok::eof) || Tok.is(tok::eod) || Tok.isAnnotation())
+      if (Tok.isOneOf(tok::eof, tok::eod) || Tok.isAnnotation())
         return;
     }
 
diff --git a/contrib/llvm/tools/clang/lib/Lex/Pragma.cpp b/contrib/llvm/tools/clang/lib/Lex/Pragma.cpp
index 26ed674..5eb6655 100644
--- a/contrib/llvm/tools/clang/lib/Lex/Pragma.cpp
+++ b/contrib/llvm/tools/clang/lib/Lex/Pragma.cpp
@@ -1342,6 +1342,60 @@ struct PragmaARCCFCodeAuditedHandler : public PragmaHandler {
   }
 };
 
+/// PragmaAssumeNonNullHandler -
+///   \#pragma clang assume_nonnull begin/end
+struct PragmaAssumeNonNullHandler : public PragmaHandler {
+  PragmaAssumeNonNullHandler() : PragmaHandler("assume_nonnull") {}
+  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
+                    Token &NameTok) override {
+    SourceLocation Loc = NameTok.getLocation();
+    bool IsBegin;
+
+    Token Tok;
+
+    // Lex the 'begin' or 'end'.
+    PP.LexUnexpandedToken(Tok);
+    const IdentifierInfo *BeginEnd = Tok.getIdentifierInfo();
+    if (BeginEnd && BeginEnd->isStr("begin")) {
+      IsBegin = true;
+    } else if (BeginEnd && BeginEnd->isStr("end")) {
+      IsBegin = false;
+    } else {
+      PP.Diag(Tok.getLocation(), diag::err_pp_assume_nonnull_syntax);
+      return;
+    }
+
+    // Verify that this is followed by EOD.
+    PP.LexUnexpandedToken(Tok);
+    if (Tok.isNot(tok::eod))
+      PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma";
+
+    // The start location of the active audit.
+    SourceLocation BeginLoc = PP.getPragmaAssumeNonNullLoc();
+
+    // The start location we want after processing this.
+    SourceLocation NewLoc;
+
+    if (IsBegin) {
+      // Complain about attempts to re-enter an audit.
+      if (BeginLoc.isValid()) {
+        PP.Diag(Loc, diag::err_pp_double_begin_of_assume_nonnull);
+        PP.Diag(BeginLoc, diag::note_pragma_entered_here);
+      }
+      NewLoc = Loc;
+    } else {
+      // Complain about attempts to leave an audit that doesn't exist.
+      if (!BeginLoc.isValid()) {
+        PP.Diag(Loc, diag::err_pp_unmatched_end_of_assume_nonnull);
+        return;
+      }
+      NewLoc = SourceLocation();
+    }
+
+    PP.setPragmaAssumeNonNullLoc(NewLoc);
+  }
+};
+
 /// \brief Handle "\#pragma region [...]"
 ///
 /// The syntax is
@@ -1393,6 +1447,7 @@ void Preprocessor::RegisterBuiltinPragmas() {
   AddPragmaHandler("clang", new PragmaDependencyHandler());
   AddPragmaHandler("clang", new PragmaDiagnosticHandler("clang"));
   AddPragmaHandler("clang", new PragmaARCCFCodeAuditedHandler());
+  AddPragmaHandler("clang", new PragmaAssumeNonNullHandler());
 
   AddPragmaHandler("STDC", new PragmaSTDC_FENV_ACCESSHandler());
   AddPragmaHandler("STDC", new PragmaSTDC_CX_LIMITED_RANGEHandler());
diff --git a/contrib/llvm/tools/clang/lib/Lex/TokenConcatenation.cpp b/contrib/llvm/tools/clang/lib/Lex/TokenConcatenation.cpp
index 0832749..27b4eab 100644
--- a/contrib/llvm/tools/clang/lib/Lex/TokenConcatenation.cpp
+++ b/contrib/llvm/tools/clang/lib/Lex/TokenConcatenation.cpp
@@ -178,20 +178,20 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok,
 
   if (ConcatInfo & aci_avoid_equal) {
     // If the next token is '=' or '==', avoid concatenation.
-    if (Tok.is(tok::equal) || Tok.is(tok::equalequal))
+    if (Tok.isOneOf(tok::equal, tok::equalequal))
       return true;
     ConcatInfo &= ~aci_avoid_equal;
   }
   if (Tok.isAnnotation()) {
     // Modules annotation can show up when generated automatically for includes.
-    assert((Tok.is(tok::annot_module_include) ||
-            Tok.is(tok::annot_module_begin) ||
-            Tok.is(tok::annot_module_end)) &&
+    assert(Tok.isOneOf(tok::annot_module_include, tok::annot_module_begin,
+                       tok::annot_module_end) &&
            "unexpected annotation in AvoidConcat");
     ConcatInfo = 0;
   }
 
-  if (ConcatInfo == 0) return false;
+  if (ConcatInfo == 0)
+    return false;
 
   // Basic algorithm: we look at the first character of the second token, and
   // determine whether it, if appended to the first token, would form (or
@@ -238,11 +238,11 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok,
     if (Tok.is(tok::numeric_constant))
       return GetFirstChar(PP, Tok) != '.';
 
-    if (Tok.getIdentifierInfo() || Tok.is(tok::wide_string_literal) ||
-        Tok.is(tok::utf8_string_literal) || Tok.is(tok::utf16_string_literal) ||
-        Tok.is(tok::utf32_string_literal) || Tok.is(tok::wide_char_constant) ||
-        Tok.is(tok::utf8_char_constant) || Tok.is(tok::utf16_char_constant) ||
-        Tok.is(tok::utf32_char_constant))
+    if (Tok.getIdentifierInfo() ||
+        Tok.isOneOf(tok::wide_string_literal, tok::utf8_string_literal,
+                    tok::utf16_string_literal, tok::utf32_string_literal,
+                    tok::wide_char_constant, tok::utf8_char_constant,
+                    tok::utf16_char_constant, tok::utf32_char_constant))
       return true;
 
     // If this isn't identifier + string, we're done.
diff --git a/contrib/llvm/tools/clang/lib/Lex/TokenLexer.cpp b/contrib/llvm/tools/clang/lib/Lex/TokenLexer.cpp
index 83efbab..e7512fa 100644
--- a/contrib/llvm/tools/clang/lib/Lex/TokenLexer.cpp
+++ b/contrib/llvm/tools/clang/lib/Lex/TokenLexer.cpp
@@ -185,7 +185,7 @@ void TokenLexer::ExpandFunctionArguments() {
     if (i != 0 && !Tokens[i-1].is(tok::hashhash) && CurTok.hasLeadingSpace())
       NextTokGetsSpace = true;
 
-    if (CurTok.is(tok::hash) || CurTok.is(tok::hashat)) {
+    if (CurTok.isOneOf(tok::hash, tok::hashat)) {
       int ArgNo = Macro->getArgumentNum(Tokens[i+1].getIdentifierInfo());
       assert(ArgNo != -1 && "Token following # is not an argument?");
 
diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseCXXInlineMethods.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseCXXInlineMethods.cpp
index 5da70d0..ea67a52 100644
--- a/contrib/llvm/tools/clang/lib/Parse/ParseCXXInlineMethods.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/ParseCXXInlineMethods.cpp
@@ -29,8 +29,7 @@ NamedDecl *Parser::ParseCXXInlineMethodDef(AccessSpecifier AS,
                                       const VirtSpecifiers& VS,
                                       ExprResult& Init) {
   assert(D.isFunctionDeclarator() && "This isn't a function declarator!");
-  assert((Tok.is(tok::l_brace) || Tok.is(tok::colon) || Tok.is(tok::kw_try) ||
-          Tok.is(tok::equal)) &&
+  assert(Tok.isOneOf(tok::l_brace, tok::colon, tok::kw_try, tok::equal) &&
          "Current token not a '{', ':', '=', or 'try'!");
 
   MultiTemplateParamsArg TemplateParams(
@@ -191,7 +190,7 @@ NamedDecl *Parser::ParseCXXInlineMethodDef(AccessSpecifier AS,
 /// declaration. Now lex its initializer and store its tokens for parsing
 /// after the class is complete.
 void Parser::ParseCXXNonStaticMemberInitializer(Decl *VarD) {
-  assert((Tok.is(tok::l_brace) || Tok.is(tok::equal)) &&
+  assert(Tok.isOneOf(tok::l_brace, tok::equal) &&
          "Current token not a '{' or '='!");
 
   LateParsedMemberInitializer *MI =
@@ -511,7 +510,7 @@ void Parser::ParseLexedMethodDef(LexedMethod &LM) {
 
   // Consume the previously pushed token.
   ConsumeAnyToken(/*ConsumeCodeCompletionTok=*/true);
-  assert((Tok.is(tok::l_brace) || Tok.is(tok::colon) || Tok.is(tok::kw_try))
+  assert(Tok.isOneOf(tok::l_brace, tok::colon, tok::kw_try)
          && "Inline method not starting with '{', ':' or 'try'");
 
   // Parse the method body. Function body parsing code is similar enough
@@ -826,7 +825,7 @@ bool Parser::ConsumeAndStoreFunctionPrologue(CachedTokens &Toks) {
         }
       }
 
-      if (Tok.is(tok::identifier) || Tok.is(tok::kw_template)) {
+      if (Tok.isOneOf(tok::identifier, tok::kw_template)) {
         Toks.push_back(Tok);
         ConsumeToken();
       } else if (Tok.is(tok::code_completion)) {
diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseDecl.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseDecl.cpp
index bd114d7..1c52552 100644
--- a/contrib/llvm/tools/clang/lib/Parse/ParseDecl.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/ParseDecl.cpp
@@ -285,7 +285,7 @@ unsigned Parser::ParseAttributeArgsCommon(
     if (AttrKind == AttributeList::UnknownAttribute ||
         AttrKind == AttributeList::IgnoredAttribute) {
       const Token &Next = NextToken();
-      IsIdentifierArg = Next.is(tok::r_paren) || Next.is(tok::comma);
+      IsIdentifierArg = Next.isOneOf(tok::r_paren, tok::comma);
     }
 
     if (IsIdentifierArg)
@@ -687,6 +687,28 @@ void Parser::ParseOpenCLQualifiers(ParsedAttributes &Attrs) {
                AttributeList::AS_Keyword);
 }
 
+void Parser::ParseNullabilityTypeSpecifiers(ParsedAttributes &attrs) {
+  // Treat these like attributes, even though they're type specifiers.
+  while (true) {
+    switch (Tok.getKind()) {
+    case tok::kw___nonnull:
+    case tok::kw___nullable:
+    case tok::kw___null_unspecified: {
+      IdentifierInfo *AttrName = Tok.getIdentifierInfo();
+      SourceLocation AttrNameLoc = ConsumeToken();
+      if (!getLangOpts().ObjC1)
+        Diag(AttrNameLoc, diag::ext_nullability)
+          << AttrName;
+      attrs.addNew(AttrName, AttrNameLoc, nullptr, AttrNameLoc, nullptr, 0, 
+                   AttributeList::AS_Keyword);
+      break;
+    }
+    default:
+      return;
+    }
+  }
+}
+
 static bool VersionNumberSeparator(const char Separator) {
   return (Separator == '.' || Separator == '_');
 }
@@ -1599,7 +1621,7 @@ void Parser::SkipMalformedDecl() {
       // a malformed class or function definition or similar.
       ConsumeBrace();
       SkipUntil(tok::r_brace);
-      if (Tok.is(tok::comma) || Tok.is(tok::l_brace) || Tok.is(tok::kw_try)) {
+      if (Tok.isOneOf(tok::comma, tok::l_brace, tok::kw_try)) {
         // This declaration isn't over yet. Keep skipping.
         continue;
       }
@@ -1705,7 +1727,7 @@ Parser::DeclGroupPtrTy Parser::ParseDeclGroup(ParsingDeclSpec &DS,
       // and we don't have any other declarators in this declaration.
       bool Fixit = !DS.setFunctionSpecNoreturn(Loc, PrevSpec, DiagID);
       MaybeParseGNUAttributes(D, &LateParsedAttrs);
-      Fixit &= Tok.is(tok::semi) || Tok.is(tok::l_brace) || Tok.is(tok::kw_try);
+      Fixit &= Tok.isOneOf(tok::semi, tok::l_brace, tok::kw_try);
 
       Diag(Loc, diag::err_c11_noreturn_misplaced)
           << (Fixit ? FixItHint::CreateRemoval(Loc) : FixItHint())
@@ -2176,9 +2198,9 @@ void Parser::ParseSpecifierQualifierList(DeclSpec &DS, AccessSpecifier AS,
 ///    int (x)
 ///
 static bool isValidAfterIdentifierInDeclarator(const Token &T) {
-  return T.is(tok::l_square) || T.is(tok::l_paren) || T.is(tok::r_paren) ||
-         T.is(tok::semi) || T.is(tok::comma) || T.is(tok::equal) ||
-         T.is(tok::kw_asm) || T.is(tok::l_brace) || T.is(tok::colon);
+  return T.isOneOf(tok::l_square, tok::l_paren, tok::r_paren, tok::semi,
+                   tok::comma, tok::equal, tok::kw_asm, tok::l_brace,
+                   tok::colon);
 }
 
 
@@ -2438,7 +2460,7 @@ ExprResult Parser::ParseAlignArgument(SourceLocation Start,
 /// [C++11] 'alignas' '(' assignment-expression ...[opt] ')'
 void Parser::ParseAlignmentSpecifier(ParsedAttributes &Attrs,
                                      SourceLocation *EndLoc) {
-  assert((Tok.is(tok::kw_alignas) || Tok.is(tok::kw__Alignas)) &&
+  assert(Tok.isOneOf(tok::kw_alignas, tok::kw__Alignas) &&
          "Not an alignment-specifier!");
 
   IdentifierInfo *KWName = Tok.getIdentifierInfo();
@@ -2481,8 +2503,8 @@ Parser::DiagnoseMissingSemiAfterTagDefinition(DeclSpec &DS, AccessSpecifier AS,
   bool EnteringContext = (DSContext == DSC_class || DSContext == DSC_top_level);
 
   if (getLangOpts().CPlusPlus &&
-      (Tok.is(tok::identifier) || Tok.is(tok::coloncolon) ||
-       Tok.is(tok::kw_decltype) || Tok.is(tok::annot_template_id)) &&
+      Tok.isOneOf(tok::identifier, tok::coloncolon, tok::kw_decltype,
+                  tok::annot_template_id) &&
       TryAnnotateCXXScopeToken(EnteringContext)) {
     SkipMalformedDecl();
     return true;
@@ -2495,7 +2517,7 @@ Parser::DiagnoseMissingSemiAfterTagDefinition(DeclSpec &DS, AccessSpecifier AS,
   // Determine whether the following tokens could possibly be a
   // declarator.
   bool MightBeDeclarator = true;
-  if (Tok.is(tok::kw_typename) || Tok.is(tok::annot_typename)) {
+  if (Tok.isOneOf(tok::kw_typename, tok::annot_typename)) {
     // A declarator-id can't start with 'typename'.
     MightBeDeclarator = false;
   } else if (AfterScope.is(tok::annot_template_id)) {
@@ -2510,9 +2532,8 @@ Parser::DiagnoseMissingSemiAfterTagDefinition(DeclSpec &DS, AccessSpecifier AS,
 
     // These tokens cannot come after the declarator-id in a
     // simple-declaration, and are likely to come after a type-specifier.
-    if (Next.is(tok::star) || Next.is(tok::amp) || Next.is(tok::ampamp) ||
-        Next.is(tok::identifier) || Next.is(tok::annot_cxxscope) ||
-        Next.is(tok::coloncolon)) {
+    if (Next.isOneOf(tok::star, tok::amp, tok::ampamp, tok::identifier,
+                     tok::annot_cxxscope, tok::coloncolon)) {
       // Missing a semicolon.
       MightBeDeclarator = false;
     } else if (HasScope) {
@@ -2920,6 +2941,19 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
       if (DS.isTypeAltiVecVector())
         goto DoneWithDeclSpec;
 
+      if (DSContext == DSC_objc_method_result && isObjCInstancetype()) {
+        ParsedType TypeRep = Actions.ActOnObjCInstanceType(Loc);
+        assert(TypeRep);
+        isInvalid = DS.SetTypeSpecType(DeclSpec::TST_typename, Loc, PrevSpec,
+                                       DiagID, TypeRep, Policy);
+        if (isInvalid)
+          break;
+
+        DS.SetRangeEnd(Loc);
+        ConsumeToken();
+        continue;
+      }
+
       ParsedType TypeRep =
         Actions.getTypeName(*Tok.getIdentifierInfo(),
                             Tok.getLocation(), getCurScope());
@@ -3041,6 +3075,13 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
       ParseOpenCLAttributes(DS.getAttributes());
       continue;
 
+    // Nullability type specifiers.
+    case tok::kw___nonnull:
+    case tok::kw___nullable:
+    case tok::kw___null_unspecified:
+      ParseNullabilityTypeSpecifiers(DS.getAttributes());
+      continue;
+
     // storage-class-specifier
     case tok::kw_typedef:
       isInvalid = DS.SetStorageClassSpec(Actions, DeclSpec::SCS_typedef, Loc,
@@ -3654,7 +3695,7 @@ void Parser::ParseEnumSpecifier(SourceLocation StartLoc, DeclSpec &DS,
   bool IsScopedUsingClassTag = false;
 
   // In C++11, recognize 'enum class' and 'enum struct'.
-  if (Tok.is(tok::kw_class) || Tok.is(tok::kw_struct)) {
+  if (Tok.isOneOf(tok::kw_class, tok::kw_struct)) {
     Diag(Tok, getLangOpts().CPlusPlus11 ? diag::warn_cxx98_compat_scoped_enum
                                         : diag::ext_scoped_enum);
     IsScopedUsingClassTag = Tok.is(tok::kw_class);
@@ -4285,6 +4326,10 @@ bool Parser::isTypeSpecifierQualifier() {
   case tok::kw___pascal:
   case tok::kw___unaligned:
 
+  case tok::kw___nonnull:
+  case tok::kw___nullable:
+  case tok::kw___null_unspecified:
+
   case tok::kw___private:
   case tok::kw___local:
   case tok::kw___global:
@@ -4458,6 +4503,10 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) {
   case tok::kw___pascal:
   case tok::kw___unaligned:
 
+  case tok::kw___nonnull:
+  case tok::kw___nullable:
+  case tok::kw___null_unspecified:
+
   case tok::kw___private:
   case tok::kw___local:
   case tok::kw___global:
@@ -4483,7 +4532,7 @@ bool Parser::isConstructorDeclarator(bool IsUnqualified) {
   }
 
   // Parse the constructor name.
-  if (Tok.is(tok::identifier) || Tok.is(tok::annot_template_id)) {
+  if (Tok.isOneOf(tok::identifier, tok::annot_template_id)) {
     // We already know that we have a constructor name; just consume
     // the token.
     ConsumeToken();
@@ -4687,6 +4736,14 @@ void Parser::ParseTypeQualifierListOpt(DeclSpec &DS, unsigned AttrReqs,
         continue;
       }
       goto DoneWithTypeQuals;
+
+    // Nullability type specifiers.
+    case tok::kw___nonnull:
+    case tok::kw___nullable:
+    case tok::kw___null_unspecified:
+      ParseNullabilityTypeSpecifiers(DS.getAttributes());
+      continue;
+
     case tok::kw___attribute:
       if (AttrReqs & AR_GNUAttributesParsedAndRejected)
         // When GNU attributes are expressly forbidden, diagnose their usage.
@@ -5041,8 +5098,8 @@ void Parser::ParseDirectDeclarator(Declarator &D) {
       // the l_paren token.
     }
 
-    if (Tok.is(tok::identifier) || Tok.is(tok::kw_operator) ||
-        Tok.is(tok::annot_template_id) || Tok.is(tok::tilde)) {
+    if (Tok.isOneOf(tok::identifier, tok::kw_operator, tok::annot_template_id,
+                    tok::tilde)) {
       // We found something that indicates the start of an unqualified-id.
       // Parse that unqualified-id.
       bool AllowConstructorName;
@@ -5145,7 +5202,7 @@ void Parser::ParseDirectDeclarator(Declarator &D) {
           << (D.getDeclSpec().isEmpty() ? SourceRange()
                                         : D.getDeclSpec().getSourceRange());
     } else if (getLangOpts().CPlusPlus) {
-      if (Tok.is(tok::period) || Tok.is(tok::arrow))
+      if (Tok.isOneOf(tok::period, tok::arrow))
         Diag(Tok, diag::err_invalid_operator_on_type) << Tok.is(tok::arrow);
       else {
         SourceLocation Loc = D.getCXXScopeSpec().getEndLoc();
@@ -5521,7 +5578,7 @@ void Parser::ParseFunctionDeclarator(Declarator &D,
 /// true if a ref-qualifier is found.
 bool Parser::ParseRefQualifier(bool &RefQualifierIsLValueRef,
                                SourceLocation &RefQualifierLoc) {
-  if (Tok.is(tok::amp) || Tok.is(tok::ampamp)) {
+  if (Tok.isOneOf(tok::amp, tok::ampamp)) {
     Diag(Tok, getLangOpts().CPlusPlus11 ?
          diag::warn_cxx98_compat_ref_qualifier :
          diag::ext_ref_qualifier);
diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseDeclCXX.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseDeclCXX.cpp
index 53e4a41..47778b8 100644
--- a/contrib/llvm/tools/clang/lib/Parse/ParseDeclCXX.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/ParseDeclCXX.cpp
@@ -685,7 +685,7 @@ Decl *Parser::ParseUsingDeclaration(unsigned Context,
 ///           _Static_assert ( constant-expression  ,  string-literal  ) ;
 ///
 Decl *Parser::ParseStaticAssertDeclaration(SourceLocation &DeclEnd){
-  assert((Tok.is(tok::kw_static_assert) || Tok.is(tok::kw__Static_assert)) &&
+  assert(Tok.isOneOf(tok::kw_static_assert, tok::kw__Static_assert) &&
          "Not a static_assert declaration");
 
   if (Tok.is(tok::kw__Static_assert) && !getLangOpts().C11)
@@ -753,7 +753,7 @@ Decl *Parser::ParseStaticAssertDeclaration(SourceLocation &DeclEnd){
 /// 'decltype' ( 'auto' )      [C++1y]
 ///
 SourceLocation Parser::ParseDecltypeSpecifier(DeclSpec &DS) {
-  assert((Tok.is(tok::kw_decltype) || Tok.is(tok::annot_decltype))
+  assert(Tok.isOneOf(tok::kw_decltype, tok::annot_decltype)
            && "Not a decltype specifier");
   
   ExprResult Result;
@@ -943,7 +943,7 @@ TypeResult Parser::ParseBaseTypeSpecifier(SourceLocation &BaseLoc,
   // Parse decltype-specifier
   // tok == kw_decltype is just error recovery, it can only happen when SS 
   // isn't empty
-  if (Tok.is(tok::kw_decltype) || Tok.is(tok::annot_decltype)) {
+  if (Tok.isOneOf(tok::kw_decltype, tok::annot_decltype)) {
     if (SS.isNotEmpty())
       Diag(SS.getBeginLoc(), diag::err_unexpected_scope_on_base_decltype)
         << FixItHint::CreateRemoval(SS.getRange());
@@ -1058,9 +1058,9 @@ TypeResult Parser::ParseBaseTypeSpecifier(SourceLocation &BaseLoc,
 }
 
 void Parser::ParseMicrosoftInheritanceClassAttributes(ParsedAttributes &attrs) {
-  while (Tok.is(tok::kw___single_inheritance) ||
-         Tok.is(tok::kw___multiple_inheritance) ||
-         Tok.is(tok::kw___virtual_inheritance)) {
+  while (Tok.isOneOf(tok::kw___single_inheritance,
+                     tok::kw___multiple_inheritance,
+                     tok::kw___virtual_inheritance)) {
     IdentifierInfo *AttrName = Tok.getIdentifierInfo();
     SourceLocation AttrNameLoc = ConsumeToken();
     attrs.addNew(AttrName, AttrNameLoc, nullptr, AttrNameLoc, nullptr, 0,
@@ -1232,9 +1232,9 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind,
   MaybeParseMicrosoftDeclSpecs(attrs);
 
   // Parse inheritance specifiers.
-  if (Tok.is(tok::kw___single_inheritance) ||
-      Tok.is(tok::kw___multiple_inheritance) ||
-      Tok.is(tok::kw___virtual_inheritance))
+  if (Tok.isOneOf(tok::kw___single_inheritance,
+                  tok::kw___multiple_inheritance,
+                  tok::kw___virtual_inheritance))
     ParseMicrosoftInheritanceClassAttributes(attrs);
 
   // If C++0x attributes exist here, parse them.
@@ -1250,55 +1250,55 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind,
       Tok.isNot(tok::identifier) &&
       !Tok.isAnnotation() &&
       Tok.getIdentifierInfo() &&
-      (Tok.is(tok::kw___is_abstract) ||
-       Tok.is(tok::kw___is_arithmetic) ||
-       Tok.is(tok::kw___is_array) ||
-       Tok.is(tok::kw___is_base_of) ||
-       Tok.is(tok::kw___is_class) ||
-       Tok.is(tok::kw___is_complete_type) ||
-       Tok.is(tok::kw___is_compound) ||
-       Tok.is(tok::kw___is_const) ||
-       Tok.is(tok::kw___is_constructible) ||
-       Tok.is(tok::kw___is_convertible) ||
-       Tok.is(tok::kw___is_convertible_to) ||
-       Tok.is(tok::kw___is_destructible) ||
-       Tok.is(tok::kw___is_empty) ||
-       Tok.is(tok::kw___is_enum) ||
-       Tok.is(tok::kw___is_floating_point) ||
-       Tok.is(tok::kw___is_final) ||
-       Tok.is(tok::kw___is_function) ||
-       Tok.is(tok::kw___is_fundamental) ||
-       Tok.is(tok::kw___is_integral) ||
-       Tok.is(tok::kw___is_interface_class) ||
-       Tok.is(tok::kw___is_literal) ||
-       Tok.is(tok::kw___is_lvalue_expr) ||
-       Tok.is(tok::kw___is_lvalue_reference) ||
-       Tok.is(tok::kw___is_member_function_pointer) ||
-       Tok.is(tok::kw___is_member_object_pointer) ||
-       Tok.is(tok::kw___is_member_pointer) ||
-       Tok.is(tok::kw___is_nothrow_assignable) ||
-       Tok.is(tok::kw___is_nothrow_constructible) ||
-       Tok.is(tok::kw___is_nothrow_destructible) ||
-       Tok.is(tok::kw___is_object) ||
-       Tok.is(tok::kw___is_pod) ||
-       Tok.is(tok::kw___is_pointer) ||
-       Tok.is(tok::kw___is_polymorphic) ||
-       Tok.is(tok::kw___is_reference) ||
-       Tok.is(tok::kw___is_rvalue_expr) ||
-       Tok.is(tok::kw___is_rvalue_reference) ||
-       Tok.is(tok::kw___is_same) ||
-       Tok.is(tok::kw___is_scalar) ||
-       Tok.is(tok::kw___is_sealed) ||
-       Tok.is(tok::kw___is_signed) ||
-       Tok.is(tok::kw___is_standard_layout) ||
-       Tok.is(tok::kw___is_trivial) ||
-       Tok.is(tok::kw___is_trivially_assignable) ||
-       Tok.is(tok::kw___is_trivially_constructible) ||
-       Tok.is(tok::kw___is_trivially_copyable) ||
-       Tok.is(tok::kw___is_union) ||
-       Tok.is(tok::kw___is_unsigned) ||
-       Tok.is(tok::kw___is_void) ||
-       Tok.is(tok::kw___is_volatile)))
+      Tok.isOneOf(tok::kw___is_abstract,
+                  tok::kw___is_arithmetic,
+                  tok::kw___is_array,
+                  tok::kw___is_base_of,
+                  tok::kw___is_class,
+                  tok::kw___is_complete_type,
+                  tok::kw___is_compound,
+                  tok::kw___is_const,
+                  tok::kw___is_constructible,
+                  tok::kw___is_convertible,
+                  tok::kw___is_convertible_to,
+                  tok::kw___is_destructible,
+                  tok::kw___is_empty,
+                  tok::kw___is_enum,
+                  tok::kw___is_floating_point,
+                  tok::kw___is_final,
+                  tok::kw___is_function,
+                  tok::kw___is_fundamental,
+                  tok::kw___is_integral,
+                  tok::kw___is_interface_class,
+                  tok::kw___is_literal,
+                  tok::kw___is_lvalue_expr,
+                  tok::kw___is_lvalue_reference,
+                  tok::kw___is_member_function_pointer,
+                  tok::kw___is_member_object_pointer,
+                  tok::kw___is_member_pointer,
+                  tok::kw___is_nothrow_assignable,
+                  tok::kw___is_nothrow_constructible,
+                  tok::kw___is_nothrow_destructible,
+                  tok::kw___is_object,
+                  tok::kw___is_pod,
+                  tok::kw___is_pointer,
+                  tok::kw___is_polymorphic,
+                  tok::kw___is_reference,
+                  tok::kw___is_rvalue_expr,
+                  tok::kw___is_rvalue_reference,
+                  tok::kw___is_same,
+                  tok::kw___is_scalar,
+                  tok::kw___is_sealed,
+                  tok::kw___is_signed,
+                  tok::kw___is_standard_layout,
+                  tok::kw___is_trivial,
+                  tok::kw___is_trivially_assignable,
+                  tok::kw___is_trivially_constructible,
+                  tok::kw___is_trivially_copyable,
+                  tok::kw___is_union,
+                  tok::kw___is_unsigned,
+                  tok::kw___is_void,
+                  tok::kw___is_volatile))
     // GNU libstdc++ 4.2 and libc++ use certain intrinsic names as the
     // name of struct templates, but some are keywords in GCC >= 4.3
     // and Clang. Therefore, when we see the token sequence "struct
@@ -1476,7 +1476,7 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind,
       }
     }
 
-    if (Tok.is(tok::l_brace) || Tok.is(tok::colon))
+    if (Tok.isOneOf(tok::l_brace, tok::colon))
       TUK = Sema::TUK_Definition;
     else
       TUK = Sema::TUK_Reference;
@@ -2220,8 +2220,7 @@ void Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
   // Access declarations.
   bool MalformedTypeSpec = false;
   if (!TemplateInfo.Kind &&
-      (Tok.is(tok::identifier) || Tok.is(tok::coloncolon) ||
-       Tok.is(tok::kw___super))) {
+      Tok.isOneOf(tok::identifier, tok::coloncolon, tok::kw___super)) {
     if (TryAnnotateCXXScopeToken())
       MalformedTypeSpec = true;
 
@@ -2274,7 +2273,7 @@ void Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
   // static_assert-declaration. A templated static_assert declaration is
   // diagnosed in Parser::ParseSingleDeclarationAfterTemplate.
   if (!TemplateInfo.Kind &&
-      (Tok.is(tok::kw_static_assert) || Tok.is(tok::kw__Static_assert))) {
+      Tok.isOneOf(tok::kw_static_assert, tok::kw__Static_assert)) {
     SourceLocation DeclEnd;
     ParseStaticAssertDeclaration(DeclEnd);
     return;
@@ -2411,7 +2410,7 @@ void Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
     if (Tok.is(tok::l_brace) && !getLangOpts().CPlusPlus11) {
       DefinitionKind = FDK_Definition;
     } else if (DeclaratorInfo.isFunctionDeclarator()) {
-      if (Tok.is(tok::l_brace) || Tok.is(tok::colon) || Tok.is(tok::kw_try)) {
+      if (Tok.isOneOf(tok::l_brace, tok::colon, tok::kw_try)) {
         DefinitionKind = FDK_Definition;
       } else if (Tok.is(tok::equal)) {
         const Token &KW = NextToken();
@@ -2480,7 +2479,7 @@ void Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
 
   while (1) {
     InClassInitStyle HasInClassInit = ICIS_NoInit;
-    if ((Tok.is(tok::equal) || Tok.is(tok::l_brace)) && !HasInitializer) {
+    if (Tok.isOneOf(tok::equal, tok::l_brace) && !HasInitializer) {
       if (BitfieldSize.get()) {
         Diag(Tok, diag::err_bitfield_member_init);
         SkipUntil(tok::comma, StopAtSemi | StopBeforeMatch);
@@ -2657,7 +2656,7 @@ void Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
 /// be a constant-expression.
 ExprResult Parser::ParseCXXMemberInitializer(Decl *D, bool IsFunction,
                                              SourceLocation &EqualLoc) {
-  assert((Tok.is(tok::equal) || Tok.is(tok::l_brace))
+  assert(Tok.isOneOf(tok::equal, tok::l_brace)
          && "Data member initializer not starting with '=' or '{'");
 
   EnterExpressionEvaluationContext Context(Actions, 
@@ -2671,8 +2670,7 @@ ExprResult Parser::ParseCXXMemberInitializer(Decl *D, bool IsFunction,
       // An initializer of '= delete p, foo' will never be parsed, because
       // a top-level comma always ends the initializer expression.
       const Token &Next = NextToken();
-      if (IsFunction || Next.is(tok::semi) || Next.is(tok::comma) ||
-          Next.is(tok::eof)) {
+      if (IsFunction || Next.isOneOf(tok::semi, tok::comma, tok::eof)) {
         if (IsFunction)
           Diag(ConsumeToken(), diag::err_default_delete_in_multiple_declaration)
             << 1 /* delete */;
@@ -2724,12 +2722,13 @@ void Parser::SkipCXXMemberSpecification(SourceLocation RecordLoc,
     ParseScope ClassScope(this, Scope::ClassScope|Scope::DeclScope);
     ParsingClassDefinition ParsingDef(*this, TagDecl, /*NonNestedClass*/ true,
                                       TagType == DeclSpec::TST_interface);
-    Actions.ActOnTagStartSkippedDefinition(getCurScope(), TagDecl);
+    auto OldContext =
+        Actions.ActOnTagStartSkippedDefinition(getCurScope(), TagDecl);
 
     // Parse the bases but don't attach them to the class.
     ParseBaseClause(nullptr);
 
-    Actions.ActOnTagFinishSkippedDefinition();
+    Actions.ActOnTagFinishSkippedDefinition(OldContext);
 
     if (!Tok.is(tok::l_brace)) {
       Diag(PP.getLocForEndOfToken(PrevTokLocation),
@@ -2915,8 +2914,8 @@ void Parser::ParseCXXMemberSpecification(SourceLocation RecordLoc,
     while (Tok.isNot(tok::r_brace) && !isEofOrEom()) {
       // Each iteration of this loop reads one member-declaration.
 
-      if (getLangOpts().MicrosoftExt && (Tok.is(tok::kw___if_exists) ||
-          Tok.is(tok::kw___if_not_exists))) {
+      if (getLangOpts().MicrosoftExt && Tok.isOneOf(tok::kw___if_exists,
+                                                    tok::kw___if_not_exists)) {
         ParseMicrosoftIfExistsClassDeclaration((DeclSpec::TST)TagType, CurAS);
         continue;
       }
@@ -3127,7 +3126,7 @@ void Parser::ParseConstructorInitializer(Decl *ConstructorDecl) {
       break;
     // If the next token looks like a base or member initializer, assume that
     // we're just missing a comma.
-    else if (Tok.is(tok::identifier) || Tok.is(tok::coloncolon)) {
+    else if (Tok.isOneOf(tok::identifier, tok::coloncolon)) {
       SourceLocation Loc = PP.getLocForEndOfToken(PrevTokLocation);
       Diag(Loc, diag::err_ctor_init_missing_comma)
         << FixItHint::CreateInsertion(Loc, ", ");
@@ -3779,7 +3778,7 @@ SourceLocation Parser::SkipCXX11Attributes() {
   return EndLoc;
 }
 
-/// ParseMicrosoftAttributes - Parse a Microsoft attribute [Attr]
+/// Parse one or more Microsoft-style attributes [Attr]
 ///
 /// [MS] ms-attribute:
 ///             '[' token-seq ']'
@@ -3791,13 +3790,17 @@ void Parser::ParseMicrosoftAttributes(ParsedAttributes &attrs,
                                       SourceLocation *endLoc) {
   assert(Tok.is(tok::l_square) && "Not a Microsoft attribute list");
 
-  while (Tok.is(tok::l_square)) {
+  do {
     // FIXME: If this is actually a C++11 attribute, parse it as one.
-    ConsumeBracket();
+    BalancedDelimiterTracker T(*this, tok::l_square);
+    T.consumeOpen();
+    if (Tok.is(tok::r_square))
+      Diag(T.getOpenLocation(), diag::err_empty_attribute_block);
     SkipUntil(tok::r_square, StopAtSemi | StopBeforeMatch);
-    if (endLoc) *endLoc = Tok.getLocation();
-    ExpectAndConsume(tok::r_square);
-  }
+    T.consumeClose();
+    if (endLoc)
+      *endLoc = T.getCloseLocation();
+  } while (Tok.is(tok::l_square));
 }
 
 void Parser::ParseMicrosoftIfExistsClassDeclaration(DeclSpec::TST TagType,
@@ -3829,7 +3832,7 @@ void Parser::ParseMicrosoftIfExistsClassDeclaration(DeclSpec::TST TagType,
 
   while (Tok.isNot(tok::r_brace) && !isEofOrEom()) {
     // __if_exists, __if_not_exists can nest.
-    if ((Tok.is(tok::kw___if_exists) || Tok.is(tok::kw___if_not_exists))) {
+    if (Tok.isOneOf(tok::kw___if_exists, tok::kw___if_not_exists)) {
       ParseMicrosoftIfExistsClassDeclaration((DeclSpec::TST)TagType, CurAS);
       continue;
     }
diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseExpr.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseExpr.cpp
index 95a28a8..da759c7 100644
--- a/contrib/llvm/tools/clang/lib/Parse/ParseExpr.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/ParseExpr.cpp
@@ -470,8 +470,7 @@ class CastExpressionIdValidator : public CorrectionCandidateCallback {
     if (!AllowNonTypes || !CorrectionCandidateCallback::ValidateCandidate(candidate))
       return false;
 
-    if (!(NextToken.is(tok::equal) || NextToken.is(tok::arrow) ||
-          NextToken.is(tok::period)))
+    if (!NextToken.isOneOf(tok::equal, tok::arrow, tok::period))
       return true;
 
     for (auto *C : candidate) {
@@ -829,11 +828,9 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression,
         }
       }
 
-      if (Next.is(tok::coloncolon) ||
-          (!ColonIsSacred && Next.is(tok::colon)) ||
-          Next.is(tok::less) ||
-          Next.is(tok::l_paren) ||
-          Next.is(tok::l_brace)) {
+      if ((!ColonIsSacred && Next.is(tok::colon)) ||
+          Next.isOneOf(tok::coloncolon, tok::less, tok::l_paren,
+                       tok::l_brace)) {
         // If TryAnnotateTypeOrScopeToken annotates the token, tail recurse.
         if (TryAnnotateTypeOrScopeToken())
           return ExprError();
@@ -931,7 +928,7 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression,
     auto Validator = llvm::make_unique<CastExpressionIdValidator>(
         Tok, isTypeCast != NotTypeCast, isTypeCast != IsTypeCast);
     Validator->IsAddressOfOperand = isAddressOfOperand;
-    if (Tok.is(tok::periodstar) || Tok.is(tok::arrowstar)) {
+    if (Tok.isOneOf(tok::periodstar, tok::arrowstar)) {
       Validator->WantExpressionKeywords = false;
       Validator->WantRemainingKeywords = false;
     } else {
@@ -1639,10 +1636,9 @@ Parser::ParseExprAfterUnaryExprOrTypeTrait(const Token &OpTok,
                                            ParsedType &CastTy,
                                            SourceRange &CastRange) {
 
-  assert((OpTok.is(tok::kw_typeof)    || OpTok.is(tok::kw_sizeof) ||
-          OpTok.is(tok::kw___alignof) || OpTok.is(tok::kw_alignof) ||
-          OpTok.is(tok::kw__Alignof)  || OpTok.is(tok::kw_vec_step)) &&
-          "Not a typeof/sizeof/alignof/vec_step expression!");
+  assert(OpTok.isOneOf(tok::kw_typeof,  tok::kw_sizeof,   tok::kw___alignof,
+                       tok::kw_alignof, tok::kw__Alignof, tok::kw_vec_step) &&
+         "Not a typeof/sizeof/alignof/vec_step expression!");
 
   ExprResult Operand;
 
@@ -1650,8 +1646,8 @@ Parser::ParseExprAfterUnaryExprOrTypeTrait(const Token &OpTok,
   if (Tok.isNot(tok::l_paren)) {
     // If construct allows a form without parenthesis, user may forget to put
     // pathenthesis around type name.
-    if (OpTok.is(tok::kw_sizeof)  || OpTok.is(tok::kw___alignof) ||
-        OpTok.is(tok::kw_alignof) || OpTok.is(tok::kw__Alignof)) {
+    if (OpTok.isOneOf(tok::kw_sizeof, tok::kw___alignof, tok::kw_alignof,
+                      tok::kw__Alignof)) {
       if (isTypeIdUnambiguously()) {
         DeclSpec DS(AttrFactory);
         ParseSpecifierQualifierList(DS);
@@ -1725,9 +1721,8 @@ Parser::ParseExprAfterUnaryExprOrTypeTrait(const Token &OpTok,
 /// [C++11] 'alignof' '(' type-id ')'
 /// \endverbatim
 ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() {
-  assert((Tok.is(tok::kw_sizeof) || Tok.is(tok::kw___alignof) ||
-          Tok.is(tok::kw_alignof) || Tok.is(tok::kw__Alignof) ||
-          Tok.is(tok::kw_vec_step)) &&
+  assert(Tok.isOneOf(tok::kw_sizeof, tok::kw___alignof, tok::kw_alignof,
+                     tok::kw__Alignof, tok::kw_vec_step) &&
          "Not a sizeof/alignof/vec_step expression!");
   Token OpTok = Tok;
   ConsumeToken();
@@ -1778,7 +1773,7 @@ ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() {
                                                 RParenLoc);
   }
 
-  if (OpTok.is(tok::kw_alignof) || OpTok.is(tok::kw__Alignof))
+  if (OpTok.isOneOf(tok::kw_alignof, tok::kw__Alignof))
     Diag(OpTok, diag::warn_cxx98_compat_alignof);
 
   EnterExpressionEvaluationContext Unevaluated(Actions, Sema::Unevaluated,
@@ -1793,8 +1788,7 @@ ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() {
                                                           CastRange);
 
   UnaryExprOrTypeTrait ExprKind = UETT_SizeOf;
-  if (OpTok.is(tok::kw_alignof) || OpTok.is(tok::kw___alignof) ||
-      OpTok.is(tok::kw__Alignof))
+  if (OpTok.isOneOf(tok::kw_alignof, tok::kw___alignof, tok::kw__Alignof))
     ExprKind = UETT_AlignOf;
   else if (OpTok.is(tok::kw_vec_step))
     ExprKind = UETT_VecStep;
@@ -1806,7 +1800,7 @@ ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() {
                                                  CastTy.getAsOpaquePtr(),
                                                  CastRange);
 
-  if (OpTok.is(tok::kw_alignof) || OpTok.is(tok::kw__Alignof))
+  if (OpTok.isOneOf(tok::kw_alignof, tok::kw__Alignof))
     Diag(OpTok, diag::ext_alignof_expr) << OpTok.getIdentifierInfo();
 
   // If we get here, the operand to the sizeof/alignof was an expresion.
@@ -2107,10 +2101,10 @@ Parser::ParseParenExpression(ParenParseOption &ExprType, bool stopIfCastExpr,
 
   // Diagnose use of bridge casts in non-arc mode.
   bool BridgeCast = (getLangOpts().ObjC2 &&
-                     (Tok.is(tok::kw___bridge) || 
-                      Tok.is(tok::kw___bridge_transfer) ||
-                      Tok.is(tok::kw___bridge_retained) ||
-                      Tok.is(tok::kw___bridge_retain)));
+                     Tok.isOneOf(tok::kw___bridge,
+                                 tok::kw___bridge_transfer,
+                                 tok::kw___bridge_retained,
+                                 tok::kw___bridge_retain));
   if (BridgeCast && !getLangOpts().ObjCAutoRefCount) {
     if (!TryConsumeToken(tok::kw___bridge)) {
       StringRef BridgeCastName = Tok.getName();
@@ -2646,9 +2640,6 @@ void Parser::ParseBlockId(SourceLocation CaretLoc) {
   Declarator DeclaratorInfo(DS, Declarator::BlockLiteralContext);
   ParseDeclarator(DeclaratorInfo);
 
-  // We do this for: ^ __attribute__((noreturn)) {, as DS has the attributes.
-  DeclaratorInfo.takeAttributes(DS.getAttributes(), SourceLocation());
-
   MaybeParseGNUAttributes(DeclaratorInfo);
 
   // Inform sema that we are starting a block.
diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseExprCXX.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseExprCXX.cpp
index ed9f75d..7fb4b5e 100644
--- a/contrib/llvm/tools/clang/lib/Parse/ParseExprCXX.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/ParseExprCXX.cpp
@@ -254,7 +254,7 @@ bool Parser::ParseOptionalCXXScopeSpecifier(CXXScopeSpec &SS,
   }
 
   if (!HasScopeSpecifier &&
-      (Tok.is(tok::kw_decltype) || Tok.is(tok::annot_decltype))) {
+      Tok.isOneOf(tok::kw_decltype, tok::annot_decltype)) {
     DeclSpec DS(AttrFactory);
     SourceLocation DeclLoc = Tok.getLocation();
     SourceLocation EndLoc  = ParseDecltypeSpecifier(DS);
@@ -487,7 +487,7 @@ bool Parser::ParseOptionalCXXScopeSpecifier(CXXScopeSpec &SS,
       // as the name in a nested-name-specifier.
       Token Identifier = Tok;
       SourceLocation IdLoc = ConsumeToken();
-      assert((Tok.is(tok::coloncolon) || Tok.is(tok::colon)) &&
+      assert(Tok.isOneOf(tok::coloncolon, tok::colon) &&
              "NextToken() not working properly!");
       Token ColonColon = Tok;
       SourceLocation CCLoc = ConsumeToken();
@@ -892,7 +892,7 @@ Optional<unsigned> Parser::ParseLambdaIntroducer(LambdaIntroducer &Intro,
                                             Parens.getCloseLocation(),
                                             Exprs);
         }
-      } else if (Tok.is(tok::l_brace) || Tok.is(tok::equal)) {
+      } else if (Tok.isOneOf(tok::l_brace, tok::equal)) {
         // Each lambda init-capture forms its own full expression, which clears
         // Actions.MaybeODRUseExprs. So create an expression evaluation context
         // to save the necessary state, and restore it later.
@@ -1159,8 +1159,7 @@ ExprResult Parser::ParseLambdaExpressionAfterIntroducer(
                                            LParenLoc, FunLocalRangeEnd, D,
                                            TrailingReturnType),
                   Attr, DeclEndLoc);
-  } else if (Tok.is(tok::kw_mutable) || Tok.is(tok::arrow) ||
-             Tok.is(tok::kw___attribute) ||
+  } else if (Tok.isOneOf(tok::kw_mutable, tok::arrow, tok::kw___attribute) ||
              (Tok.is(tok::l_square) && NextToken().is(tok::l_square))) {
     // It's common to forget that one needs '()' before 'mutable', an attribute
     // specifier, or the result type. Deal with this.
diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseObjc.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseObjc.cpp
index 691f53f..e4f7911 100644
--- a/contrib/llvm/tools/clang/lib/Parse/ParseObjc.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/ParseObjc.cpp
@@ -13,6 +13,7 @@
 
 #include "clang/Parse/Parser.h"
 #include "RAIIObjectsForParser.h"
+#include "clang/AST/ASTContext.h"
 #include "clang/Basic/CharInfo.h"
 #include "clang/Parse/ParseDiagnostic.h"
 #include "clang/Sema/DeclSpec.h"
@@ -307,6 +308,37 @@ Decl *Parser::ParseObjCAtInterfaceDeclaration(SourceLocation AtLoc,
   return ClsType;
 }
 
+/// Add an attribute for a context-sensitive type nullability to the given
+/// declarator.
+static void addContextSensitiveTypeNullability(Parser &P,
+                                               Declarator &D,
+                                               NullabilityKind nullability,
+                                               SourceLocation nullabilityLoc,
+                                               bool &addedToDeclSpec) {
+  // Create the attribute.
+  auto getNullabilityAttr = [&]() -> AttributeList * {
+    return D.getAttributePool().create(
+             P.getNullabilityKeyword(nullability),
+             SourceRange(nullabilityLoc),
+             nullptr, SourceLocation(),
+             nullptr, 0,
+             AttributeList::AS_ContextSensitiveKeyword);
+  };
+
+  if (D.getNumTypeObjects() > 0) {
+    // Add the attribute to the declarator chunk nearest the declarator.
+    auto nullabilityAttr = getNullabilityAttr();
+    DeclaratorChunk &chunk = D.getTypeObject(0);
+    nullabilityAttr->setNext(chunk.getAttrListRef());
+    chunk.getAttrListRef() = nullabilityAttr;
+  } else if (!addedToDeclSpec) {
+    // Otherwise, just put it on the declaration specifiers (if one
+    // isn't there already).
+    D.getMutableDeclSpec().addAttributes(getNullabilityAttr());
+    addedToDeclSpec = true;
+  }
+}
+
 ///   objc-interface-decl-list:
 ///     empty
 ///     objc-interface-decl-list objc-property-decl [OBJC2]
@@ -330,7 +362,7 @@ void Parser::ParseObjCInterfaceDeclList(tok::ObjCKeywordKind contextKey,
     
   while (1) {
     // If this is a method prototype, parse it.
-    if (Tok.is(tok::minus) || Tok.is(tok::plus)) {
+    if (Tok.isOneOf(tok::minus, tok::plus)) {
       if (Decl *methodPrototype =
           ParseObjCMethodPrototype(MethodImplKind, false))
         allMethods.push_back(methodPrototype);
@@ -445,6 +477,7 @@ void Parser::ParseObjCInterfaceDeclList(tok::ObjCKeywordKind contextKey,
         ParseObjCPropertyAttribute(OCDS);
       }
 
+      bool addedToDeclSpec = false;
       auto ObjCPropertyCallback = [&](ParsingFieldDeclarator &FD) {
         if (FD.D.getIdentifier() == nullptr) {
           Diag(AtLoc, diag::err_objc_property_requires_field_name)
@@ -457,6 +490,13 @@ void Parser::ParseObjCInterfaceDeclList(tok::ObjCKeywordKind contextKey,
           return;
         }
 
+        // Map a nullability property attribute to a context-sensitive keyword
+        // attribute.
+        if (OCDS.getPropertyAttributes() & ObjCDeclSpec::DQ_PR_nullability)
+          addContextSensitiveTypeNullability(*this, FD.D, OCDS.getNullability(),
+                                             OCDS.getNullabilityLoc(),
+                                             addedToDeclSpec);
+
         // Install the property declarator into interfaceDecl.
         IdentifierInfo *SelName =
             OCDS.getGetterName() ? OCDS.getGetterName() : FD.D.getIdentifier();
@@ -510,6 +550,24 @@ void Parser::ParseObjCInterfaceDeclList(tok::ObjCKeywordKind contextKey,
   Actions.ActOnAtEnd(getCurScope(), AtEnd, allMethods, allTUVariables);
 }
 
+/// Diagnose redundant or conflicting nullability information.
+static void diagnoseRedundantPropertyNullability(Parser &P,
+                                                 ObjCDeclSpec &DS,
+                                                 NullabilityKind nullability,
+                                                 SourceLocation nullabilityLoc){
+  if (DS.getNullability() == nullability) {
+    P.Diag(nullabilityLoc, diag::warn_nullability_duplicate)
+      << static_cast<unsigned>(nullability) << true
+      << SourceRange(DS.getNullabilityLoc());
+    return;
+  }
+
+  P.Diag(nullabilityLoc, diag::err_nullability_conflicting)
+    << static_cast<unsigned>(nullability) << true
+    << static_cast<unsigned>(DS.getNullability()) << true
+    << SourceRange(DS.getNullabilityLoc());
+}
+
 ///   Parse property attribute declarations.
 ///
 ///   property-attr-decl: '(' property-attrlist ')'
@@ -529,6 +587,10 @@ void Parser::ParseObjCInterfaceDeclList(tok::ObjCKeywordKind contextKey,
 ///     strong
 ///     weak
 ///     unsafe_unretained
+///     nonnull
+///     nullable
+///     null_unspecified
+///     null_resettable
 ///
 void Parser::ParseObjCPropertyAttribute(ObjCDeclSpec &DS) {
   assert(Tok.getKind() == tok::l_paren);
@@ -614,6 +676,37 @@ void Parser::ParseObjCPropertyAttribute(ObjCDeclSpec &DS) {
         DS.setPropertyAttributes(ObjCDeclSpec::DQ_PR_getter);
         DS.setGetterName(SelIdent);
       }
+    } else if (II->isStr("nonnull")) {
+      if (DS.getPropertyAttributes() & ObjCDeclSpec::DQ_PR_nullability)
+        diagnoseRedundantPropertyNullability(*this, DS,
+                                             NullabilityKind::NonNull,
+                                             Tok.getLocation());
+      DS.setPropertyAttributes(ObjCDeclSpec::DQ_PR_nullability);
+      DS.setNullability(Tok.getLocation(), NullabilityKind::NonNull);
+    } else if (II->isStr("nullable")) {
+      if (DS.getPropertyAttributes() & ObjCDeclSpec::DQ_PR_nullability)
+        diagnoseRedundantPropertyNullability(*this, DS,
+                                             NullabilityKind::Nullable,
+                                             Tok.getLocation());
+      DS.setPropertyAttributes(ObjCDeclSpec::DQ_PR_nullability);
+      DS.setNullability(Tok.getLocation(), NullabilityKind::Nullable);
+    } else if (II->isStr("null_unspecified")) {
+      if (DS.getPropertyAttributes() & ObjCDeclSpec::DQ_PR_nullability)
+        diagnoseRedundantPropertyNullability(*this, DS,
+                                             NullabilityKind::Unspecified,
+                                             Tok.getLocation());
+      DS.setPropertyAttributes(ObjCDeclSpec::DQ_PR_nullability);
+      DS.setNullability(Tok.getLocation(), NullabilityKind::Unspecified);
+    } else if (II->isStr("null_resettable")) {
+      if (DS.getPropertyAttributes() & ObjCDeclSpec::DQ_PR_nullability)
+        diagnoseRedundantPropertyNullability(*this, DS,
+                                             NullabilityKind::Unspecified,
+                                             Tok.getLocation());
+      DS.setPropertyAttributes(ObjCDeclSpec::DQ_PR_nullability);
+      DS.setNullability(Tok.getLocation(), NullabilityKind::Unspecified);
+
+      // Also set the null_resettable bit.
+      DS.setPropertyAttributes(ObjCDeclSpec::DQ_PR_null_resettable);
     } else {
       Diag(AttrName, diag::err_objc_expected_property_attr) << II;
       SkipUntil(tok::r_paren, StopAtSemi);
@@ -641,7 +734,7 @@ void Parser::ParseObjCPropertyAttribute(ObjCDeclSpec &DS) {
 ///
 Decl *Parser::ParseObjCMethodPrototype(tok::ObjCKeywordKind MethodImplKind,
                                        bool MethodDefinition) {
-  assert((Tok.is(tok::minus) || Tok.is(tok::plus)) && "expected +/-");
+  assert(Tok.isOneOf(tok::minus, tok::plus) && "expected +/-");
 
   tok::TokenKind methodType = Tok.getKind();
   SourceLocation mLoc = ConsumeToken();
@@ -779,6 +872,17 @@ bool Parser::isTokIdentifier_in() const {
 ///     objc-type-qualifier
 ///     objc-type-qualifiers objc-type-qualifier
 ///
+///   objc-type-qualifier:
+///     'in'
+///     'out'
+///     'inout'
+///     'oneway'
+///     'bycopy'
+///     'byref'
+///     'nonnull'
+///     'nullable'
+///     'null_unspecified'
+///
 void Parser::ParseObjCTypeQualifierList(ObjCDeclSpec &DS,
                                         Declarator::TheContext Context) {
   assert(Context == Declarator::ObjCParameterContext ||
@@ -796,10 +900,13 @@ void Parser::ParseObjCTypeQualifierList(ObjCDeclSpec &DS,
 
     const IdentifierInfo *II = Tok.getIdentifierInfo();
     for (unsigned i = 0; i != objc_NumQuals; ++i) {
-      if (II != ObjCTypeQuals[i])
+      if (II != ObjCTypeQuals[i] ||
+          NextToken().is(tok::less) ||
+          NextToken().is(tok::coloncolon))
         continue;
 
       ObjCDeclSpec::ObjCDeclQualifier Qual;
+      NullabilityKind Nullability;
       switch (i) {
       default: llvm_unreachable("Unknown decl qualifier");
       case objc_in:     Qual = ObjCDeclSpec::DQ_In; break;
@@ -808,8 +915,28 @@ void Parser::ParseObjCTypeQualifierList(ObjCDeclSpec &DS,
       case objc_oneway: Qual = ObjCDeclSpec::DQ_Oneway; break;
       case objc_bycopy: Qual = ObjCDeclSpec::DQ_Bycopy; break;
       case objc_byref:  Qual = ObjCDeclSpec::DQ_Byref; break;
+
+      case objc_nonnull: 
+        Qual = ObjCDeclSpec::DQ_CSNullability;
+        Nullability = NullabilityKind::NonNull;
+        break;
+
+      case objc_nullable: 
+        Qual = ObjCDeclSpec::DQ_CSNullability;
+        Nullability = NullabilityKind::Nullable;
+        break;
+
+      case objc_null_unspecified: 
+        Qual = ObjCDeclSpec::DQ_CSNullability;
+        Nullability = NullabilityKind::Unspecified;
+        break;
       }
+
+      // FIXME: Diagnose redundant specifiers.
       DS.setObjCDeclQualifier(Qual);
+      if (Qual == ObjCDeclSpec::DQ_CSNullability)
+        DS.setNullability(Tok.getLocation(), Nullability);
+
       ConsumeToken();
       II = nullptr;
       break;
@@ -878,17 +1005,28 @@ ParsedType Parser::ParseObjCTypeName(ObjCDeclSpec &DS,
   ParseObjCTypeQualifierList(DS, context);
 
   ParsedType Ty;
-  if (isTypeSpecifierQualifier()) {
+  if (isTypeSpecifierQualifier() || isObjCInstancetype()) {
     // Parse an abstract declarator.
     DeclSpec declSpec(AttrFactory);
     declSpec.setObjCQualifiers(&DS);
-    ParseSpecifierQualifierList(declSpec);
+    DeclSpecContext dsContext = DSC_normal;
+    if (context == Declarator::ObjCResultContext)
+      dsContext = DSC_objc_method_result;
+    ParseSpecifierQualifierList(declSpec, AS_none, dsContext);
     declSpec.SetRangeEnd(Tok.getLocation());
     Declarator declarator(declSpec, context);
     ParseDeclarator(declarator);
 
     // If that's not invalid, extract a type.
     if (!declarator.isInvalidType()) {
+      // Map a nullability specifier to a context-sensitive keyword attribute.
+      bool addedToDeclSpec = false;
+      if (DS.getObjCDeclQualifier() & ObjCDeclSpec::DQ_CSNullability)
+        addContextSensitiveTypeNullability(*this, declarator,
+                                           DS.getNullability(),
+                                           DS.getNullabilityLoc(),
+                                           addedToDeclSpec);
+
       TypeResult type = Actions.ActOnTypeName(getCurScope(), declarator);
       if (!type.isInvalid())
         Ty = type.get();
@@ -898,15 +1036,6 @@ ParsedType Parser::ParseObjCTypeName(ObjCDeclSpec &DS,
       if (context == Declarator::ObjCParameterContext)
         takeDeclAttributes(*paramAttrs, declarator);
     }
-  } else if (context == Declarator::ObjCResultContext &&
-             Tok.is(tok::identifier)) {
-    if (!Ident_instancetype)
-      Ident_instancetype = PP.getIdentifierInfo("instancetype");
-    
-    if (Tok.getIdentifierInfo() == Ident_instancetype) {
-      Ty = Actions.ActOnObjCInstanceType(Tok.getLocation());
-      ConsumeToken();
-    }
   }
 
   if (Tok.is(tok::r_paren))
@@ -1309,6 +1438,7 @@ void Parser::ParseObjCClassInstanceVariables(Decl *interfaceDecl,
     auto ObjCIvarCallback = [&](ParsingFieldDeclarator &FD) {
       Actions.ActOnObjCContainerStartDefinition(interfaceDecl);
       // Install the declarator into the interface decl.
+      FD.D.setObjCIvar(true);
       Decl *Field = Actions.ActOnIvar(
           getCurScope(), FD.D.getDeclSpec().getSourceRange().getBegin(), FD.D,
           FD.BitfieldSize, visibility);
@@ -2170,8 +2300,8 @@ ExprResult Parser::ParseObjCAtExpression(SourceLocation AtLoc) {
 bool Parser::ParseObjCXXMessageReceiver(bool &IsExpr, void *&TypeOrExpr) {
   InMessageExpressionRAIIObject InMessage(*this, true);
 
-  if (Tok.is(tok::identifier) || Tok.is(tok::coloncolon) || 
-      Tok.is(tok::kw_typename) || Tok.is(tok::annot_cxxscope))
+  if (Tok.isOneOf(tok::identifier, tok::coloncolon, tok::kw_typename,
+                  tok::annot_cxxscope))
     TryAnnotateTypeOrScopeToken();
 
   if (!Actions.isSimpleTypeSpecifier(Tok.getKind())) {
@@ -2265,7 +2395,7 @@ bool Parser::isStartOfObjCClassMessageMissingOpenBracket() {
   
   if (!Type.get().isNull() && Type.get()->isObjCObjectOrInterfaceType()) {
     const Token &AfterNext = GetLookAheadToken(2);
-    if (AfterNext.is(tok::colon) || AfterNext.is(tok::r_square)) {
+    if (AfterNext.isOneOf(tok::colon, tok::r_square)) {
       if (Tok.is(tok::identifier))
         TryAnnotateTypeOrScopeToken();
       
@@ -2891,9 +3021,8 @@ void Parser::ParseLexedObjCMethodDefs(LexedMethod &LM, bool parseMethod) {
   // Consume the previously pushed token.
   ConsumeAnyToken(/*ConsumeCodeCompletionTok=*/true);
     
-  assert((Tok.is(tok::l_brace) || Tok.is(tok::kw_try) ||
-          Tok.is(tok::colon)) && 
-          "Inline objective-c method not starting with '{' or 'try' or ':'");
+  assert(Tok.isOneOf(tok::l_brace, tok::kw_try, tok::colon) && 
+         "Inline objective-c method not starting with '{' or 'try' or ':'");
   // Enter a scope for the method or c-function body.
   ParseScope BodyScope(this,
                        parseMethod
diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseOpenMP.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseOpenMP.cpp
index 187289ee..a3c3c5d 100644
--- a/contrib/llvm/tools/clang/lib/Parse/ParseOpenMP.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/ParseOpenMP.cpp
@@ -94,6 +94,7 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirective() {
   case OMPD_taskyield:
   case OMPD_barrier:
   case OMPD_taskwait:
+  case OMPD_taskgroup:
   case OMPD_flush:
   case OMPD_for:
   case OMPD_for_simd:
@@ -128,7 +129,8 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirective() {
 ///         'section' | 'single' | 'master' | 'critical' [ '(' <name> ')' ] |
 ///         'parallel for' | 'parallel sections' | 'task' | 'taskyield' |
 ///         'barrier' | 'taskwait' | 'flush' | 'ordered' | 'atomic' |
-///         'for simd' | 'parallel for simd' | 'target' | 'teams' {clause}
+///         'for simd' | 'parallel for simd' | 'target' | 'teams' | 'taskgroup'
+///         {clause}
 ///         annot_pragma_openmp_end
 ///
 StmtResult
@@ -198,7 +200,8 @@ Parser::ParseOpenMPDeclarativeOrExecutableDirective(bool StandAloneAllowed) {
   case OMPD_ordered:
   case OMPD_atomic:
   case OMPD_target:
-  case OMPD_teams: {
+  case OMPD_teams:
+  case OMPD_taskgroup: {
     ConsumeToken();
     // Parse directive name of the 'critical' directive if any.
     if (DKind == OMPD_critical) {
diff --git a/contrib/llvm/tools/clang/lib/Parse/ParsePragma.cpp b/contrib/llvm/tools/clang/lib/Parse/ParsePragma.cpp
index 84256df..892d3c6 100644
--- a/contrib/llvm/tools/clang/lib/Parse/ParsePragma.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/ParsePragma.cpp
@@ -823,9 +823,11 @@ bool Parser::HandlePragmaLoopHint(LoopHint &Hint) {
     ConsumeToken(); // The annotation token.
     SourceLocation StateLoc = Toks[0].getLocation();
     IdentifierInfo *StateInfo = Toks[0].getIdentifierInfo();
-    if (!StateInfo || ((OptionUnroll ? !StateInfo->isStr("full")
-                                     : !StateInfo->isStr("enable")) &&
-                       !StateInfo->isStr("disable"))) {
+    if (!StateInfo ||
+        ((OptionUnroll ? !StateInfo->isStr("full")
+                       : !StateInfo->isStr("enable") &&
+                             !StateInfo->isStr("assume_safety")) &&
+         !StateInfo->isStr("disable"))) {
       Diag(Toks[0].getLocation(), diag::err_pragma_invalid_keyword)
           << /*FullKeyword=*/OptionUnroll;
       return false;
@@ -1954,6 +1956,7 @@ static bool ParseLoopHintValue(Preprocessor &PP, Token &Tok, Token PragmaName,
 ///  loop-hint-keyword:
 ///    'enable'
 ///    'disable'
+///    'assume_safety'
 ///
 ///  unroll-hint-keyword:
 ///    'full'
diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseStmt.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseStmt.cpp
index 055bdea..b658cef 100644
--- a/contrib/llvm/tools/clang/lib/Parse/ParseStmt.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/ParseStmt.cpp
@@ -119,15 +119,12 @@ namespace {
 class StatementFilterCCC : public CorrectionCandidateCallback {
 public:
   StatementFilterCCC(Token nextTok) : NextToken(nextTok) {
-    WantTypeSpecifiers = nextTok.is(tok::l_paren) || nextTok.is(tok::less) ||
-                         nextTok.is(tok::identifier) || nextTok.is(tok::star) ||
-                         nextTok.is(tok::amp) || nextTok.is(tok::l_square);
-    WantExpressionKeywords = nextTok.is(tok::l_paren) ||
-                             nextTok.is(tok::identifier) ||
-                             nextTok.is(tok::arrow) || nextTok.is(tok::period);
-    WantRemainingKeywords = nextTok.is(tok::l_paren) || nextTok.is(tok::semi) ||
-                            nextTok.is(tok::identifier) ||
-                            nextTok.is(tok::l_brace);
+    WantTypeSpecifiers = nextTok.isOneOf(tok::l_paren, tok::less, tok::l_square,
+                                         tok::identifier, tok::star, tok::amp);
+    WantExpressionKeywords =
+        nextTok.isOneOf(tok::l_paren, tok::identifier, tok::arrow, tok::period);
+    WantRemainingKeywords =
+        nextTok.isOneOf(tok::l_paren, tok::semi, tok::identifier, tok::l_brace);
     WantCXXNamedCasts = false;
   }
 
@@ -1427,7 +1424,7 @@ bool Parser::isForRangeIdentifier() {
   if (Next.is(tok::colon))
     return true;
 
-  if (Next.is(tok::l_square) || Next.is(tok::kw_alignas)) {
+  if (Next.isOneOf(tok::l_square, tok::kw_alignas)) {
     TentativeParsingAction PA(*this);
     ConsumeToken();
     SkipCXX11Attributes();
diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseStmtAsm.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseStmtAsm.cpp
index 8ba9f15..8cdae6a 100644
--- a/contrib/llvm/tools/clang/lib/Parse/ParseStmtAsm.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/ParseStmtAsm.cpp
@@ -512,8 +512,8 @@ StmtResult Parser::ParseMicrosoftAsmStatement(SourceLocation AsmLoc) {
 
   llvm::SourceMgr TempSrcMgr;
   llvm::MCContext Ctx(MAI.get(), MRI.get(), MOFI.get(), &TempSrcMgr);
-  MOFI->InitMCObjectFileInfo(TT, llvm::Reloc::Default, llvm::CodeModel::Default,
-                             Ctx);
+  MOFI->InitMCObjectFileInfo(TheTriple, llvm::Reloc::Default,
+                             llvm::CodeModel::Default, Ctx);
   std::unique_ptr<llvm::MemoryBuffer> Buffer =
       llvm::MemoryBuffer::getMemBuffer(AsmString, "<MS inline asm>");
 
diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseTemplate.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseTemplate.cpp
index f1467fe..a811678 100644
--- a/contrib/llvm/tools/clang/lib/Parse/ParseTemplate.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/ParseTemplate.cpp
@@ -62,7 +62,7 @@ Parser::ParseTemplateDeclarationOrSpecialization(unsigned Context,
                                                  SourceLocation &DeclEnd,
                                                  AccessSpecifier AS,
                                                  AttributeList *AccessAttrs) {
-  assert((Tok.is(tok::kw_export) || Tok.is(tok::kw_template)) &&
+  assert(Tok.isOneOf(tok::kw_export, tok::kw_template) &&
          "Token does not start a template declaration.");
 
   // Enter template-parameter scope.
@@ -135,7 +135,7 @@ Parser::ParseTemplateDeclarationOrSpecialization(unsigned Context,
     } else {
       LastParamListWasEmpty = true;
     }
-  } while (Tok.is(tok::kw_export) || Tok.is(tok::kw_template));
+  } while (Tok.isOneOf(tok::kw_export, tok::kw_template));
 
   // Parse the actual template declaration.
   return ParseSingleDeclarationAfterTemplate(Context,
@@ -367,7 +367,7 @@ Parser::ParseTemplateParameterList(unsigned Depth,
     // Did we find a comma or the end of the template parameter list?
     if (Tok.is(tok::comma)) {
       ConsumeToken();
-    } else if (Tok.is(tok::greater) || Tok.is(tok::greatergreater)) {
+    } else if (Tok.isOneOf(tok::greater, tok::greatergreater)) {
       // Don't consume this... that's done by template parser.
       break;
     } else {
@@ -484,7 +484,7 @@ Decl *Parser::ParseTemplateParameter(unsigned Depth, unsigned Position) {
 ///         'typename' ...[opt][C++0x] identifier[opt]
 ///         'typename' identifier[opt] '=' type-id
 Decl *Parser::ParseTypeParameter(unsigned Depth, unsigned Position) {
-  assert((Tok.is(tok::kw_class) || Tok.is(tok::kw_typename)) &&
+  assert(Tok.isOneOf(tok::kw_class, tok::kw_typename) &&
          "A type-parameter starts with 'class' or 'typename'");
 
   // Consume the 'class' or 'typename' keyword.
@@ -506,8 +506,8 @@ Decl *Parser::ParseTypeParameter(unsigned Depth, unsigned Position) {
   if (Tok.is(tok::identifier)) {
     ParamName = Tok.getIdentifierInfo();
     NameLoc = ConsumeToken();
-  } else if (Tok.is(tok::equal) || Tok.is(tok::comma) ||
-             Tok.is(tok::greater) || Tok.is(tok::greatergreater)) {
+  } else if (Tok.isOneOf(tok::equal, tok::comma, tok::greater,
+                         tok::greatergreater)) {
     // Unnamed template parameter. Don't have to do anything here, just
     // don't consume this token.
   } else {
@@ -567,7 +567,7 @@ Parser::ParseTemplateTemplateParameter(unsigned Depth, unsigned Position) {
   // or greater appear immediately or after 'struct'. In the latter case,
   // replace the keyword with 'class'.
   if (!TryConsumeToken(tok::kw_class)) {
-    bool Replace = Tok.is(tok::kw_typename) || Tok.is(tok::kw_struct);
+    bool Replace = Tok.isOneOf(tok::kw_typename, tok::kw_struct);
     const Token &Next = Tok.is(tok::kw_struct) ? NextToken() : Tok;
     if (Tok.is(tok::kw_typename)) {
       Diag(Tok.getLocation(),
@@ -577,9 +577,8 @@ Parser::ParseTemplateTemplateParameter(unsigned Depth, unsigned Position) {
         << (!getLangOpts().CPlusPlus1z
                 ? FixItHint::CreateReplacement(Tok.getLocation(), "class")
                 : FixItHint());
-    } else if (Next.is(tok::identifier) || Next.is(tok::comma) ||
-               Next.is(tok::greater) || Next.is(tok::greatergreater) ||
-               Next.is(tok::ellipsis)) {
+    } else if (Next.isOneOf(tok::identifier, tok::comma, tok::greater,
+                            tok::greatergreater, tok::ellipsis)) {
       Diag(Tok.getLocation(), diag::err_class_on_template_template_param)
         << (Replace ? FixItHint::CreateReplacement(Tok.getLocation(), "class")
                     : FixItHint::CreateInsertion(Tok.getLocation(), "class "));
@@ -604,8 +603,8 @@ Parser::ParseTemplateTemplateParameter(unsigned Depth, unsigned Position) {
   if (Tok.is(tok::identifier)) {
     ParamName = Tok.getIdentifierInfo();
     NameLoc = ConsumeToken();
-  } else if (Tok.is(tok::equal) || Tok.is(tok::comma) ||
-             Tok.is(tok::greater) || Tok.is(tok::greatergreater)) {
+  } else if (Tok.isOneOf(tok::equal, tok::comma, tok::greater,
+                         tok::greatergreater)) {
     // Unnamed template parameter. Don't have to do anything here, just
     // don't consume this token.
   } else {
@@ -794,16 +793,15 @@ bool Parser::ParseGreaterThanInTemplateList(SourceLocation &RAngleLoc,
   Token Next = NextToken();
   if ((RemainingToken == tok::greater ||
        RemainingToken == tok::greatergreater) &&
-      (Next.is(tok::greater) || Next.is(tok::greatergreater) ||
-       Next.is(tok::greatergreatergreater) || Next.is(tok::equal) ||
-       Next.is(tok::greaterequal) || Next.is(tok::greatergreaterequal) ||
-       Next.is(tok::equalequal)) &&
+      Next.isOneOf(tok::greater, tok::greatergreater,
+                   tok::greatergreatergreater, tok::equal, tok::greaterequal,
+                   tok::greatergreaterequal, tok::equalequal) &&
       areTokensAdjacent(Tok, Next))
     Hint2 = FixItHint::CreateInsertion(Next.getLocation(), " ");
 
   unsigned DiagId = diag::err_two_right_angle_brackets_need_space;
   if (getLangOpts().CPlusPlus11 &&
-      (Tok.is(tok::greatergreater) || Tok.is(tok::greatergreatergreater)))
+      Tok.isOneOf(tok::greatergreater, tok::greatergreatergreater))
     DiagId = diag::warn_cxx98_compat_two_right_angle_brackets;
   else if (Tok.is(tok::greaterequal))
     DiagId = diag::err_right_angle_bracket_equal_needs_space;
@@ -1055,8 +1053,7 @@ void Parser::AnnotateTemplateIdTokenAsType() {
 
 /// \brief Determine whether the given token can end a template argument.
 static bool isEndOfTemplateArgument(Token Tok) {
-  return Tok.is(tok::comma) || Tok.is(tok::greater) || 
-         Tok.is(tok::greatergreater);
+  return Tok.isOneOf(tok::comma, tok::greater, tok::greatergreater);
 }
 
 /// \brief Parse a C++ template template argument.
@@ -1217,7 +1214,7 @@ bool Parser::IsTemplateArgumentList(unsigned Skip) {
     ConsumeToken();
   
   // If we have a '>' or a ',' then this is a template argument list.
-  return Tok.is(tok::greater) || Tok.is(tok::comma);
+  return Tok.isOneOf(tok::greater, tok::comma);
 }
 
 /// ParseTemplateArgumentList - Parse a C++ template-argument-list
@@ -1339,8 +1336,8 @@ void Parser::ParseLateTemplatedFuncDef(LateParsedTemplate &LPT) {
 
   // Consume the previously pushed token.
   ConsumeAnyToken(/*ConsumeCodeCompletionTok=*/true);
-  assert((Tok.is(tok::l_brace) || Tok.is(tok::colon) || Tok.is(tok::kw_try))
-         && "Inline method not starting with '{', ':' or 'try'");
+  assert(Tok.isOneOf(tok::l_brace, tok::colon, tok::kw_try) &&
+         "Inline method not starting with '{', ':' or 'try'");
 
   // Parse the method body. Function body parsing code is similar enough
   // to be re-used for method bodies as well.
diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseTentative.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseTentative.cpp
index abf16fa..d63cf24 100644
--- a/contrib/llvm/tools/clang/lib/Parse/ParseTentative.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/ParseTentative.cpp
@@ -179,8 +179,8 @@ Parser::TPResult Parser::TryConsumeDeclarationSpecifier() {
     ConsumeToken();
 
     // Skip attributes.
-    while (Tok.is(tok::l_square) || Tok.is(tok::kw___attribute) ||
-           Tok.is(tok::kw___declspec) || Tok.is(tok::kw_alignas)) {
+    while (Tok.isOneOf(tok::l_square, tok::kw___attribute, tok::kw___declspec,
+                       tok::kw_alignas)) {
       if (Tok.is(tok::l_square)) {
         ConsumeBracket();
         if (!SkipUntil(tok::r_square))
@@ -195,8 +195,8 @@ Parser::TPResult Parser::TryConsumeDeclarationSpecifier() {
       }
     }
 
-    if ((Tok.is(tok::identifier) || Tok.is(tok::coloncolon) ||
-         Tok.is(tok::kw_decltype) || Tok.is(tok::annot_template_id)) &&
+    if (Tok.isOneOf(tok::identifier, tok::coloncolon, tok::kw_decltype,
+                    tok::annot_template_id) &&
         TryAnnotateCXXScopeToken())
       return TPResult::Error;
     if (Tok.is(tok::annot_cxxscope))
@@ -289,7 +289,7 @@ Parser::TPResult Parser::TryParseInitDeclaratorList() {
       return TPR;
 
     // [GNU] simple-asm-expr[opt] attributes[opt]
-    if (Tok.is(tok::kw_asm) || Tok.is(tok::kw___attribute))
+    if (Tok.isOneOf(tok::kw_asm, tok::kw___attribute))
       return TPResult::True;
 
     // initializer[opt]
@@ -370,8 +370,7 @@ bool Parser::isCXXConditionDeclaration() {
   if (TPR == TPResult::Ambiguous) {
     // '='
     // [GNU] simple-asm-expr[opt] attributes[opt]
-    if (Tok.is(tok::equal)  ||
-        Tok.is(tok::kw_asm) || Tok.is(tok::kw___attribute))
+    if (Tok.isOneOf(tok::equal, tok::kw_asm, tok::kw___attribute))
       TPR = TPResult::True;
     else if (getLangOpts().CPlusPlus11 && Tok.is(tok::l_brace))
       TPR = TPResult::True;
@@ -448,7 +447,7 @@ bool Parser::isCXXTypeId(TentativeCXXTypeIdContext Context, bool &isAmbiguous) {
     // the abstract declarator we encounter a '>', '>>' (in C++0x), or
     // ',', this is a type-id. Otherwise, it's an expression.
     } else if (Context == TypeIdAsTemplateArgument &&
-               (Tok.is(tok::greater) || Tok.is(tok::comma) ||
+               (Tok.isOneOf(tok::greater, tok::comma) ||
                 (getLangOpts().CPlusPlus11 && Tok.is(tok::greatergreater)))) {
       TPR = TPResult::True;
       isAmbiguous = true;
@@ -624,18 +623,17 @@ Parser::isCXX11AttributeSpecifier(bool Disambiguate,
 
 Parser::TPResult Parser::TryParsePtrOperatorSeq() {
   while (true) {
-    if (Tok.is(tok::coloncolon) || Tok.is(tok::identifier))
+    if (Tok.isOneOf(tok::coloncolon, tok::identifier))
       if (TryAnnotateCXXScopeToken(true))
         return TPResult::Error;
 
-    if (Tok.is(tok::star) || Tok.is(tok::amp) || Tok.is(tok::caret) ||
-        Tok.is(tok::ampamp) ||
+    if (Tok.isOneOf(tok::star, tok::amp, tok::caret, tok::ampamp) ||
         (Tok.is(tok::annot_cxxscope) && NextToken().is(tok::star))) {
       // ptr-operator
       ConsumeToken();
-      while (Tok.is(tok::kw_const)    ||
-             Tok.is(tok::kw_volatile) ||
-             Tok.is(tok::kw_restrict))
+      while (Tok.isOneOf(tok::kw_const, tok::kw_volatile, tok::kw_restrict,
+                         tok::kw___nonnull, tok::kw___nullable,
+                         tok::kw___null_unspecified))
         ConsumeToken();
     } else {
       return TPResult::True;
@@ -803,7 +801,7 @@ Parser::TPResult Parser::TryParseDeclarator(bool mayBeAbstract,
   if (Tok.is(tok::ellipsis))
     ConsumeToken();
 
-  if ((Tok.is(tok::identifier) || Tok.is(tok::kw_operator) ||
+  if ((Tok.isOneOf(tok::identifier, tok::kw_operator) ||
        (Tok.is(tok::annot_cxxscope) && (NextToken().is(tok::identifier) ||
                                         NextToken().is(tok::kw_operator)))) &&
       mayHaveIdentifier) {
@@ -833,14 +831,9 @@ Parser::TPResult Parser::TryParseDeclarator(bool mayBeAbstract,
       // '(' declarator ')'
       // '(' attributes declarator ')'
       // '(' abstract-declarator ')'
-      if (Tok.is(tok::kw___attribute) ||
-          Tok.is(tok::kw___declspec) ||
-          Tok.is(tok::kw___cdecl) ||
-          Tok.is(tok::kw___stdcall) ||
-          Tok.is(tok::kw___fastcall) ||
-          Tok.is(tok::kw___thiscall) ||
-          Tok.is(tok::kw___vectorcall) ||
-          Tok.is(tok::kw___unaligned))
+      if (Tok.isOneOf(tok::kw___attribute, tok::kw___declspec, tok::kw___cdecl,
+                      tok::kw___stdcall, tok::kw___fastcall, tok::kw___thiscall,
+                      tok::kw___vectorcall, tok::kw___unaligned))
         return TPResult::True; // attributes indicate declaration
       TPResult TPR = TryParseDeclarator(mayBeAbstract, mayHaveIdentifier);
       if (TPR != TPResult::Ambiguous)
@@ -1015,9 +1008,8 @@ class TentativeParseCCC : public CorrectionCandidateCallback {
 public:
   TentativeParseCCC(const Token &Next) {
     WantRemainingKeywords = false;
-    WantTypeSpecifiers = Next.is(tok::l_paren) || Next.is(tok::r_paren) ||
-                         Next.is(tok::greater) || Next.is(tok::l_brace) ||
-                         Next.is(tok::identifier);
+    WantTypeSpecifiers = Next.isOneOf(tok::l_paren, tok::r_paren, tok::greater,
+                                      tok::l_brace, tok::identifier);
   }
 
   bool ValidateCandidate(const TypoCorrection &Candidate) override {
@@ -1201,8 +1193,8 @@ Parser::isCXXDeclarationSpecifier(Parser::TPResult BracedCastResult,
 
   case tok::coloncolon: {    // ::foo::bar
     const Token &Next = NextToken();
-    if (Next.is(tok::kw_new) ||    // ::new
-        Next.is(tok::kw_delete))   // ::delete
+    if (Next.isOneOf(tok::kw_new,       // ::new
+                     tok::kw_delete))   // ::delete
       return TPResult::False;
   }
     // Fall through.
@@ -1284,6 +1276,9 @@ Parser::isCXXDeclarationSpecifier(Parser::TPResult BracedCastResult,
   case tok::kw___ptr32:
   case tok::kw___forceinline:
   case tok::kw___unaligned:
+  case tok::kw___nonnull:
+  case tok::kw___nullable:
+  case tok::kw___null_unspecified:
     return TPResult::True;
 
     // Borland
@@ -1603,12 +1598,10 @@ bool Parser::isCXXFunctionDeclarator(bool *IsAmbiguous) {
       TPR = TPResult::False;
     else {
       const Token &Next = NextToken();
-      if (Next.is(tok::amp) || Next.is(tok::ampamp) ||
-          Next.is(tok::kw_const) || Next.is(tok::kw_volatile) ||
-          Next.is(tok::kw_throw) || Next.is(tok::kw_noexcept) ||
-          Next.is(tok::l_square) || isCXX11VirtSpecifier(Next) ||
-          Next.is(tok::l_brace) || Next.is(tok::kw_try) ||
-          Next.is(tok::equal) || Next.is(tok::arrow))
+      if (Next.isOneOf(tok::amp, tok::ampamp, tok::kw_const, tok::kw_volatile,
+                       tok::kw_throw, tok::kw_noexcept, tok::l_square,
+                       tok::l_brace, tok::kw_try, tok::equal, tok::arrow) ||
+          isCXX11VirtSpecifier(Next))
         // The next token cannot appear after a constructor-style initializer,
         // and can appear next in a function definition. This must be a function
         // declarator.
@@ -1729,8 +1722,8 @@ Parser::TryParseParameterDeclarationClause(bool *InvalidAsDeclaration,
     // parameter-declaration-clause, and the last param is missing its default
     // argument.
     if (VersusTemplateArgument)
-      return (Tok.is(tok::equal) || Tok.is(tok::r_paren)) ? TPResult::True
-                                                          : TPResult::False;
+      return Tok.isOneOf(tok::equal, tok::r_paren) ? TPResult::True
+                                                   : TPResult::False;
 
     if (Tok.is(tok::equal)) {
       // '=' assignment-expression
@@ -1783,13 +1776,11 @@ Parser::TPResult Parser::TryParseFunctionDeclarator() {
     return TPResult::Error;
 
   // cv-qualifier-seq
-  while (Tok.is(tok::kw_const)    ||
-         Tok.is(tok::kw_volatile) ||
-         Tok.is(tok::kw_restrict)   )
+  while (Tok.isOneOf(tok::kw_const, tok::kw_volatile, tok::kw_restrict))
     ConsumeToken();
 
   // ref-qualifier[opt]
-  if (Tok.is(tok::amp) || Tok.is(tok::ampamp))
+  if (Tok.isOneOf(tok::amp, tok::ampamp))
     ConsumeToken();
   
   // exception-specification
diff --git a/contrib/llvm/tools/clang/lib/Parse/Parser.cpp b/contrib/llvm/tools/clang/lib/Parse/Parser.cpp
index dea7a69..e32df95 100644
--- a/contrib/llvm/tools/clang/lib/Parse/Parser.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/Parser.cpp
@@ -463,6 +463,10 @@ void Parser::Initialize() {
     ObjCTypeQuals[objc_oneway] = &PP.getIdentifierTable().get("oneway");
     ObjCTypeQuals[objc_bycopy] = &PP.getIdentifierTable().get("bycopy");
     ObjCTypeQuals[objc_byref] = &PP.getIdentifierTable().get("byref");
+    ObjCTypeQuals[objc_nonnull] = &PP.getIdentifierTable().get("nonnull");
+    ObjCTypeQuals[objc_nullable] = &PP.getIdentifierTable().get("nullable");
+    ObjCTypeQuals[objc_null_unspecified]
+      = &PP.getIdentifierTable().get("null_unspecified");
   }
 
   Ident_instancetype = nullptr;
diff --git a/contrib/llvm/tools/clang/lib/Sema/AnalysisBasedWarnings.cpp b/contrib/llvm/tools/clang/lib/Sema/AnalysisBasedWarnings.cpp
index 97f4a8d..36030b9 100644
--- a/contrib/llvm/tools/clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -574,28 +574,29 @@ namespace {
 /// ContainsReference - A visitor class to search for references to
 /// a particular declaration (the needle) within any evaluated component of an
 /// expression (recursively).
-class ContainsReference : public EvaluatedExprVisitor<ContainsReference> {
+class ContainsReference : public ConstEvaluatedExprVisitor<ContainsReference> {
   bool FoundReference;
   const DeclRefExpr *Needle;
 
 public:
+  typedef ConstEvaluatedExprVisitor<ContainsReference> Inherited;
+
   ContainsReference(ASTContext &Context, const DeclRefExpr *Needle)
-    : EvaluatedExprVisitor<ContainsReference>(Context),
-      FoundReference(false), Needle(Needle) {}
+    : Inherited(Context), FoundReference(false), Needle(Needle) {}
 
-  void VisitExpr(Expr *E) {
+  void VisitExpr(const Expr *E) {
     // Stop evaluating if we already have a reference.
     if (FoundReference)
       return;
 
-    EvaluatedExprVisitor<ContainsReference>::VisitExpr(E);
+    Inherited::VisitExpr(E);
   }
 
-  void VisitDeclRefExpr(DeclRefExpr *E) {
+  void VisitDeclRefExpr(const DeclRefExpr *E) {
     if (E == Needle)
       FoundReference = true;
     else
-      EvaluatedExprVisitor<ContainsReference>::VisitDeclRefExpr(E);
+      Inherited::VisitDeclRefExpr(E);
   }
 
   bool doesContainReference() const { return FoundReference; }
@@ -854,7 +855,7 @@ static bool DiagnoseUninitializedUse(Sema &S, const VarDecl *VD,
         return false;
 
       ContainsReference CR(S.Context, DRE);
-      CR.Visit(const_cast<Expr*>(Initializer));
+      CR.Visit(Initializer);
       if (CR.doesContainReference()) {
         S.Diag(DRE->getLocStart(),
                diag::warn_uninit_self_reference_in_init)
diff --git a/contrib/llvm/tools/clang/lib/Sema/Sema.cpp b/contrib/llvm/tools/clang/lib/Sema/Sema.cpp
index 50edc42..db1251f 100644
--- a/contrib/llvm/tools/clang/lib/Sema/Sema.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/Sema.cpp
@@ -356,6 +356,19 @@ ExprResult Sema::ImpCastExprToType(Expr *E, QualType Ty,
   assert((VK == VK_RValue || !E->isRValue()) && "can't cast rvalue to lvalue");
 #endif
 
+  // Check whether we're implicitly casting from a nullable type to a nonnull
+  // type.
+  if (auto exprNullability = E->getType()->getNullability(Context)) {
+    if (*exprNullability == NullabilityKind::Nullable) {
+      if (auto typeNullability = Ty->getNullability(Context)) {
+        if (*typeNullability == NullabilityKind::NonNull) {
+          Diag(E->getLocStart(), diag::warn_nullability_lost)
+            << E->getType() << Ty;
+        }
+      }
+    }
+  }
+
   QualType ExprTy = Context.getCanonicalType(E->getType());
   QualType TypeTy = Context.getCanonicalType(Ty);
 
@@ -551,10 +564,10 @@ static bool MethodsAndNestedClassesComplete(const CXXRecordDecl *RD,
     if (const CXXMethodDecl *M = dyn_cast<CXXMethodDecl>(*I))
       Complete = M->isDefined() || (M->isPure() && !isa<CXXDestructorDecl>(M));
     else if (const FunctionTemplateDecl *F = dyn_cast<FunctionTemplateDecl>(*I))
-      // If the template function is marked as late template parsed at this point,
-      // it has not been instantiated and therefore we have not performed semantic
-      // analysis on it yet, so we cannot know if the type can be considered
-      // complete.
+      // If the template function is marked as late template parsed at this
+      // point, it has not been instantiated and therefore we have not
+      // performed semantic analysis on it yet, so we cannot know if the type
+      // can be considered complete.
       Complete = !F->getTemplatedDecl()->isLateTemplateParsed() &&
                   F->getTemplatedDecl()->isDefined();
     else if (const CXXRecordDecl *R = dyn_cast<CXXRecordDecl>(*I)) {
@@ -722,11 +735,7 @@ void Sema::ActOnEndOfTranslationUnit() {
         ModMap.resolveConflicts(Mod, /*Complain=*/false);
 
         // Queue the submodules, so their exports will also be resolved.
-        for (Module::submodule_iterator Sub = Mod->submodule_begin(),
-                                     SubEnd = Mod->submodule_end();
-             Sub != SubEnd; ++Sub) {
-          Stack.push_back(*Sub);
-        }
+        Stack.append(Mod->submodule_begin(), Mod->submodule_end());
       }
     }
 
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaChecking.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaChecking.cpp
index c3b81b6..f76727c 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaChecking.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaChecking.cpp
@@ -836,6 +836,16 @@ bool Sema::CheckARMBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
       SemaBuiltinConstantArgRange(TheCall, 2, 0, 1);
   }
 
+  if (BuiltinID == ARM::BI__builtin_arm_rsr64 ||
+      BuiltinID == ARM::BI__builtin_arm_wsr64)
+    return SemaBuiltinARMSpecialReg(BuiltinID, TheCall, 0, 3, false);
+
+  if (BuiltinID == ARM::BI__builtin_arm_rsr ||
+      BuiltinID == ARM::BI__builtin_arm_rsrp ||
+      BuiltinID == ARM::BI__builtin_arm_wsr ||
+      BuiltinID == ARM::BI__builtin_arm_wsrp)
+    return SemaBuiltinARMSpecialReg(BuiltinID, TheCall, 0, 5, true);
+
   if (CheckNeonBuiltinFunctionCall(BuiltinID, TheCall))
     return true;
 
@@ -876,6 +886,16 @@ bool Sema::CheckAArch64BuiltinFunctionCall(unsigned BuiltinID,
       SemaBuiltinConstantArgRange(TheCall, 4, 0, 1);
   }
 
+  if (BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
+      BuiltinID == AArch64::BI__builtin_arm_wsr64)
+    return SemaBuiltinARMSpecialReg(BuiltinID, TheCall, 0, 5, false);
+
+  if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
+      BuiltinID == AArch64::BI__builtin_arm_rsrp ||
+      BuiltinID == AArch64::BI__builtin_arm_wsr ||
+      BuiltinID == AArch64::BI__builtin_arm_wsrp)
+    return SemaBuiltinARMSpecialReg(BuiltinID, TheCall, 0, 5, true);
+
   if (CheckNeonBuiltinFunctionCall(BuiltinID, TheCall))
     return true;
 
@@ -1095,6 +1115,13 @@ bool Sema::getFormatStringInfo(const FormatAttr *Format, bool IsCXXMember,
 /// \brief Returns true if the value evaluates to null.
 static bool CheckNonNullExpr(Sema &S,
                              const Expr *Expr) {
+  // If the expression has non-null type, it doesn't evaluate to null.
+  if (auto nullability
+        = Expr->IgnoreImplicit()->getType()->getNullability(S.Context)) {
+    if (*nullability == NullabilityKind::NonNull)
+      return false;
+  }
+
   // As a special case, transparent unions initialized with zero are
   // considered null for the purposes of the nonnull attribute.
   if (const RecordType *UT = Expr->getType()->getAsUnionType()) {
@@ -1170,56 +1197,111 @@ DiagnoseCStringFormatDirectiveInCFAPI(Sema &S,
   }
 }
 
+/// Determine whether the given type has a non-null nullability annotation.
+static bool isNonNullType(ASTContext &ctx, QualType type) {
+  if (auto nullability = type->getNullability(ctx))
+    return *nullability == NullabilityKind::NonNull;
+     
+  return false;
+}
+
 static void CheckNonNullArguments(Sema &S,
                                   const NamedDecl *FDecl,
+                                  const FunctionProtoType *Proto,
                                   ArrayRef<const Expr *> Args,
                                   SourceLocation CallSiteLoc) {
+  assert((FDecl || Proto) && "Need a function declaration or prototype");
+
   // Check the attributes attached to the method/function itself.
   llvm::SmallBitVector NonNullArgs;
-  for (const auto *NonNull : FDecl->specific_attrs<NonNullAttr>()) {
-    if (!NonNull->args_size()) {
-      // Easy case: all pointer arguments are nonnull.
-      for (const auto *Arg : Args)
-        if (S.isValidPointerAttrType(Arg->getType()))
-          CheckNonNullArgument(S, Arg, CallSiteLoc);
-      return;
-    }
+  if (FDecl) {
+    // Handle the nonnull attribute on the function/method declaration itself.
+    for (const auto *NonNull : FDecl->specific_attrs<NonNullAttr>()) {
+      if (!NonNull->args_size()) {
+        // Easy case: all pointer arguments are nonnull.
+        for (const auto *Arg : Args)
+          if (S.isValidPointerAttrType(Arg->getType()))
+            CheckNonNullArgument(S, Arg, CallSiteLoc);
+        return;
+      }
 
-    for (unsigned Val : NonNull->args()) {
-      if (Val >= Args.size())
-        continue;
-      if (NonNullArgs.empty())
-        NonNullArgs.resize(Args.size());
-      NonNullArgs.set(Val);
+      for (unsigned Val : NonNull->args()) {
+        if (Val >= Args.size())
+          continue;
+        if (NonNullArgs.empty())
+          NonNullArgs.resize(Args.size());
+        NonNullArgs.set(Val);
+      }
     }
   }
 
-  // Check the attributes on the parameters.
-  ArrayRef<ParmVarDecl*> parms;
-  if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(FDecl))
-    parms = FD->parameters();
-  else if (const ObjCMethodDecl *MD = dyn_cast<ObjCMethodDecl>(FDecl))
-    parms = MD->parameters();
-
-  unsigned ArgIndex = 0;
-  for (ArrayRef<ParmVarDecl*>::iterator I = parms.begin(), E = parms.end();
-       I != E; ++I, ++ArgIndex) {
-    const ParmVarDecl *PVD = *I;
-    if (PVD->hasAttr<NonNullAttr>() ||
-        (ArgIndex < NonNullArgs.size() && NonNullArgs[ArgIndex]))
-      CheckNonNullArgument(S, Args[ArgIndex], CallSiteLoc);
+  if (FDecl && (isa<FunctionDecl>(FDecl) || isa<ObjCMethodDecl>(FDecl))) {
+    // Handle the nonnull attribute on the parameters of the
+    // function/method.
+    ArrayRef<ParmVarDecl*> parms;
+    if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(FDecl))
+      parms = FD->parameters();
+    else
+      parms = cast<ObjCMethodDecl>(FDecl)->parameters();
+    
+    unsigned ParamIndex = 0;
+    for (ArrayRef<ParmVarDecl*>::iterator I = parms.begin(), E = parms.end();
+         I != E; ++I, ++ParamIndex) {
+      const ParmVarDecl *PVD = *I;
+      if (PVD->hasAttr<NonNullAttr>() || 
+          isNonNullType(S.Context, PVD->getType())) {
+        if (NonNullArgs.empty())
+          NonNullArgs.resize(Args.size());
+
+        NonNullArgs.set(ParamIndex);
+      }
+    }
+  } else {
+    // If we have a non-function, non-method declaration but no
+    // function prototype, try to dig out the function prototype.
+    if (!Proto) {
+      if (const ValueDecl *VD = dyn_cast<ValueDecl>(FDecl)) {
+        QualType type = VD->getType().getNonReferenceType();
+        if (auto pointerType = type->getAs<PointerType>())
+          type = pointerType->getPointeeType();
+        else if (auto blockType = type->getAs<BlockPointerType>())
+          type = blockType->getPointeeType();
+        // FIXME: data member pointers?
+
+        // Dig out the function prototype, if there is one.
+        Proto = type->getAs<FunctionProtoType>();
+      } 
+    }
+
+    // Fill in non-null argument information from the nullability
+    // information on the parameter types (if we have them).
+    if (Proto) {
+      unsigned Index = 0;
+      for (auto paramType : Proto->getParamTypes()) {
+        if (isNonNullType(S.Context, paramType)) {
+          if (NonNullArgs.empty())
+            NonNullArgs.resize(Args.size());
+          
+          NonNullArgs.set(Index);
+        }
+        
+        ++Index;
+      }
+    }
   }
 
-  // In case this is a variadic call, check any remaining arguments.
-  for (/**/; ArgIndex < NonNullArgs.size(); ++ArgIndex)
+  // Check for non-null arguments.
+  for (unsigned ArgIndex = 0, ArgIndexEnd = NonNullArgs.size(); 
+       ArgIndex != ArgIndexEnd; ++ArgIndex) {
     if (NonNullArgs[ArgIndex])
       CheckNonNullArgument(S, Args[ArgIndex], CallSiteLoc);
+  }
 }
 
 /// Handles the checks for format strings, non-POD arguments to vararg
 /// functions, and NULL arguments passed to non-NULL parameters.
-void Sema::checkCall(NamedDecl *FDecl, ArrayRef<const Expr *> Args,
-                     unsigned NumParams, bool IsMemberFunction,
+void Sema::checkCall(NamedDecl *FDecl, const FunctionProtoType *Proto,
+                     ArrayRef<const Expr *> Args, bool IsMemberFunction,
                      SourceLocation Loc, SourceRange Range,
                      VariadicCallType CallType) {
   // FIXME: We should check as much as we can in the template definition.
@@ -1241,6 +1323,13 @@ void Sema::checkCall(NamedDecl *FDecl, ArrayRef<const Expr *> Args,
   // Refuse POD arguments that weren't caught by the format string
   // checks above.
   if (CallType != VariadicDoesNotApply) {
+    unsigned NumParams = Proto ? Proto->getNumParams()
+                       : FDecl && isa<FunctionDecl>(FDecl)
+                           ? cast<FunctionDecl>(FDecl)->getNumParams()
+                       : FDecl && isa<ObjCMethodDecl>(FDecl)
+                           ? cast<ObjCMethodDecl>(FDecl)->param_size()
+                       : 0;
+
     for (unsigned ArgIdx = NumParams; ArgIdx < Args.size(); ++ArgIdx) {
       // Args[ArgIdx] can be null in malformed code.
       if (const Expr *Arg = Args[ArgIdx]) {
@@ -1250,12 +1339,14 @@ void Sema::checkCall(NamedDecl *FDecl, ArrayRef<const Expr *> Args,
     }
   }
 
-  if (FDecl) {
-    CheckNonNullArguments(*this, FDecl, Args, Loc);
+  if (FDecl || Proto) {
+    CheckNonNullArguments(*this, FDecl, Proto, Args, Loc);
 
     // Type safety checking.
-    for (const auto *I : FDecl->specific_attrs<ArgumentWithTypeTagAttr>())
-      CheckArgumentWithTypeTag(I, Args.data());
+    if (FDecl) {
+      for (const auto *I : FDecl->specific_attrs<ArgumentWithTypeTagAttr>())
+        CheckArgumentWithTypeTag(I, Args.data());
+    }
   }
 }
 
@@ -1267,8 +1358,8 @@ void Sema::CheckConstructorCall(FunctionDecl *FDecl,
                                 SourceLocation Loc) {
   VariadicCallType CallType =
     Proto->isVariadic() ? VariadicConstructor : VariadicDoesNotApply;
-  checkCall(FDecl, Args, Proto->getNumParams(),
-            /*IsMemberFunction=*/true, Loc, SourceRange(), CallType);
+  checkCall(FDecl, Proto, Args, /*IsMemberFunction=*/true, Loc, SourceRange(), 
+            CallType);
 }
 
 /// CheckFunctionCall - Check a direct function call for various correctness
@@ -1281,7 +1372,6 @@ bool Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall,
                           IsMemberOperatorCall;
   VariadicCallType CallType = getVariadicCallType(FDecl, Proto,
                                                   TheCall->getCallee());
-  unsigned NumParams = Proto ? Proto->getNumParams() : 0;
   Expr** Args = TheCall->getArgs();
   unsigned NumArgs = TheCall->getNumArgs();
   if (IsMemberOperatorCall) {
@@ -1291,7 +1381,7 @@ bool Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall,
     ++Args;
     --NumArgs;
   }
-  checkCall(FDecl, llvm::makeArrayRef(Args, NumArgs), NumParams,
+  checkCall(FDecl, Proto, llvm::makeArrayRef(Args, NumArgs), 
             IsMemberFunction, TheCall->getRParenLoc(),
             TheCall->getCallee()->getSourceRange(), CallType);
 
@@ -1325,9 +1415,9 @@ bool Sema::CheckObjCMethodCall(ObjCMethodDecl *Method, SourceLocation lbrac,
   VariadicCallType CallType =
       Method->isVariadic() ? VariadicMethod : VariadicDoesNotApply;
 
-  checkCall(Method, Args, Method->param_size(),
-            /*IsMemberFunction=*/false,
-            lbrac, Method->getSourceRange(), CallType);
+  checkCall(Method, nullptr, Args,
+            /*IsMemberFunction=*/false, lbrac, Method->getSourceRange(), 
+            CallType);
 
   return false;
 }
@@ -1336,13 +1426,14 @@ bool Sema::CheckPointerCall(NamedDecl *NDecl, CallExpr *TheCall,
                             const FunctionProtoType *Proto) {
   QualType Ty;
   if (const auto *V = dyn_cast<VarDecl>(NDecl))
-    Ty = V->getType();
+    Ty = V->getType().getNonReferenceType();
   else if (const auto *F = dyn_cast<FieldDecl>(NDecl))
-    Ty = F->getType();
+    Ty = F->getType().getNonReferenceType();
   else
     return false;
 
-  if (!Ty->isBlockPointerType() && !Ty->isFunctionPointerType())
+  if (!Ty->isBlockPointerType() && !Ty->isFunctionPointerType() &&
+      !Ty->isFunctionProtoType())
     return false;
 
   VariadicCallType CallType;
@@ -1353,11 +1444,10 @@ bool Sema::CheckPointerCall(NamedDecl *NDecl, CallExpr *TheCall,
   } else { // Ty->isFunctionPointerType()
     CallType = VariadicFunction;
   }
-  unsigned NumParams = Proto ? Proto->getNumParams() : 0;
 
-  checkCall(NDecl, llvm::makeArrayRef(TheCall->getArgs(),
-                                      TheCall->getNumArgs()),
-            NumParams, /*IsMemberFunction=*/false, TheCall->getRParenLoc(),
+  checkCall(NDecl, Proto,
+            llvm::makeArrayRef(TheCall->getArgs(), TheCall->getNumArgs()),
+            /*IsMemberFunction=*/false, TheCall->getRParenLoc(),
             TheCall->getCallee()->getSourceRange(), CallType);
 
   return false;
@@ -1368,11 +1458,9 @@ bool Sema::CheckPointerCall(NamedDecl *NDecl, CallExpr *TheCall,
 bool Sema::CheckOtherCall(CallExpr *TheCall, const FunctionProtoType *Proto) {
   VariadicCallType CallType = getVariadicCallType(/*FDecl=*/nullptr, Proto,
                                                   TheCall->getCallee());
-  unsigned NumParams = Proto ? Proto->getNumParams() : 0;
-
-  checkCall(/*FDecl=*/nullptr,
+  checkCall(/*FDecl=*/nullptr, Proto,
             llvm::makeArrayRef(TheCall->getArgs(), TheCall->getNumArgs()),
-            NumParams, /*IsMemberFunction=*/false, TheCall->getRParenLoc(),
+            /*IsMemberFunction=*/false, TheCall->getRParenLoc(),
             TheCall->getCallee()->getSourceRange(), CallType);
 
   return false;
@@ -2593,6 +2681,107 @@ bool Sema::SemaBuiltinConstantArgRange(CallExpr *TheCall, int ArgNum,
   return false;
 }
 
+/// SemaBuiltinARMSpecialReg - Handle a check if argument ArgNum of CallExpr
+/// TheCall is an ARM/AArch64 special register string literal.
+bool Sema::SemaBuiltinARMSpecialReg(unsigned BuiltinID, CallExpr *TheCall,
+                                    int ArgNum, unsigned ExpectedFieldNum,
+                                    bool AllowName) {
+  bool IsARMBuiltin = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
+                      BuiltinID == ARM::BI__builtin_arm_wsr64 ||
+                      BuiltinID == ARM::BI__builtin_arm_rsr ||
+                      BuiltinID == ARM::BI__builtin_arm_rsrp ||
+                      BuiltinID == ARM::BI__builtin_arm_wsr ||
+                      BuiltinID == ARM::BI__builtin_arm_wsrp;
+  bool IsAArch64Builtin = BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
+                          BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
+                          BuiltinID == AArch64::BI__builtin_arm_rsr ||
+                          BuiltinID == AArch64::BI__builtin_arm_rsrp ||
+                          BuiltinID == AArch64::BI__builtin_arm_wsr ||
+                          BuiltinID == AArch64::BI__builtin_arm_wsrp;
+  assert((IsARMBuiltin || IsAArch64Builtin) && "Unexpected ARM builtin.");
+
+  // We can't check the value of a dependent argument.
+  Expr *Arg = TheCall->getArg(ArgNum);
+  if (Arg->isTypeDependent() || Arg->isValueDependent())
+    return false;
+
+  // Check if the argument is a string literal.
+  if (!isa<StringLiteral>(Arg->IgnoreParenImpCasts()))
+    return Diag(TheCall->getLocStart(), diag::err_expr_not_string_literal)
+           << Arg->getSourceRange();
+
+  // Check the type of special register given.
+  StringRef Reg = cast<StringLiteral>(Arg->IgnoreParenImpCasts())->getString();
+  SmallVector<StringRef, 6> Fields;
+  Reg.split(Fields, ":");
+
+  if (Fields.size() != ExpectedFieldNum && !(AllowName && Fields.size() == 1))
+    return Diag(TheCall->getLocStart(), diag::err_arm_invalid_specialreg)
+           << Arg->getSourceRange();
+
+  // If the string is the name of a register then we cannot check that it is
+  // valid here but if the string is of one the forms described in ACLE then we
+  // can check that the supplied fields are integers and within the valid
+  // ranges.
+  if (Fields.size() > 1) {
+    bool FiveFields = Fields.size() == 5;
+
+    bool ValidString = true;
+    if (IsARMBuiltin) {
+      ValidString &= Fields[0].startswith_lower("cp") ||
+                     Fields[0].startswith_lower("p");
+      if (ValidString)
+        Fields[0] =
+          Fields[0].drop_front(Fields[0].startswith_lower("cp") ? 2 : 1);
+
+      ValidString &= Fields[2].startswith_lower("c");
+      if (ValidString)
+        Fields[2] = Fields[2].drop_front(1);
+
+      if (FiveFields) {
+        ValidString &= Fields[3].startswith_lower("c");
+        if (ValidString)
+          Fields[3] = Fields[3].drop_front(1);
+      }
+    }
+
+    SmallVector<int, 5> Ranges;
+    if (FiveFields)
+      Ranges.append({IsAArch64Builtin ? 1 : 15, 7, 7, 15, 15});
+    else
+      Ranges.append({15, 7, 15});
+
+    for (unsigned i=0; i<Fields.size(); ++i) {
+      int IntField;
+      ValidString &= !Fields[i].getAsInteger(10, IntField);
+      ValidString &= (IntField >= 0 && IntField <= Ranges[i]);
+    }
+
+    if (!ValidString)
+      return Diag(TheCall->getLocStart(), diag::err_arm_invalid_specialreg)
+             << Arg->getSourceRange();
+
+  } else if (IsAArch64Builtin && Fields.size() == 1) {
+    // If the register name is one of those that appear in the condition below
+    // and the special register builtin being used is one of the write builtins,
+    // then we require that the argument provided for writing to the register
+    // is an integer constant expression. This is because it will be lowered to
+    // an MSR (immediate) instruction, so we need to know the immediate at
+    // compile time.
+    if (TheCall->getNumArgs() != 2)
+      return false;
+
+    std::string RegLower = Reg.lower();
+    if (RegLower != "spsel" && RegLower != "daifset" && RegLower != "daifclr" &&
+        RegLower != "pan" && RegLower != "uao")
+      return false;
+
+    return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15);
+  }
+
+  return false;
+}
+
 /// SemaBuiltinLongjmp - Handle __builtin_longjmp(void *env[5], int val).
 /// This checks that the target supports __builtin_longjmp and
 /// that val is a constant 1.
@@ -5559,7 +5748,8 @@ Sema::CheckReturnValExpr(Expr *RetValExp, QualType lhsType,
   CheckReturnStackAddr(*this, RetValExp, lhsType, ReturnLoc);
 
   // Check if the return value is null but should not be.
-  if (Attrs && hasSpecificAttr<ReturnsNonNullAttr>(*Attrs) &&
+  if (((Attrs && hasSpecificAttr<ReturnsNonNullAttr>(*Attrs)) ||
+       (!isObjCMethod && isNonNullType(Context, lhsType))) &&
       CheckNonNullExpr(*this, RetValExp))
     Diag(ReturnLoc, diag::warn_null_ret)
       << (isObjCMethod ? 1 : 0) << RetValExp->getSourceRange();
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaCodeComplete.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaCodeComplete.cpp
index fd97809..ebb6bbc 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaCodeComplete.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaCodeComplete.cpp
@@ -1341,6 +1341,11 @@ static void AddTypeSpecifierResults(const LangOptions &LangOpts,
     Builder.AddChunk(CodeCompletionString::CK_RightParen);
     Results.AddResult(Result(Builder.TakeString()));
   }
+
+  // Nullability
+  Results.AddResult(Result("__nonnull", CCP_Type));
+  Results.AddResult(Result("__null_unspecified", CCP_Type));
+  Results.AddResult(Result("__nullable", CCP_Type));
 }
 
 static void AddStorageSpecifiers(Sema::ParserCompletionContext CCC,
@@ -2097,7 +2102,8 @@ static void MaybeAddSentinel(Preprocessor &PP,
     }
 }
 
-static std::string formatObjCParamQualifiers(unsigned ObjCQuals) {
+static std::string formatObjCParamQualifiers(unsigned ObjCQuals, 
+                                             QualType &Type) {
   std::string Result;
   if (ObjCQuals & Decl::OBJC_TQ_In)
     Result += "in ";
@@ -2111,6 +2117,23 @@ static std::string formatObjCParamQualifiers(unsigned ObjCQuals) {
     Result += "byref ";
   if (ObjCQuals & Decl::OBJC_TQ_Oneway)
     Result += "oneway ";
+  if (ObjCQuals & Decl::OBJC_TQ_CSNullability) {
+    if (auto nullability = AttributedType::stripOuterNullability(Type)) {
+      switch (*nullability) {
+      case NullabilityKind::NonNull:
+        Result += "nonnull ";
+        break;
+
+      case NullabilityKind::Nullable:
+        Result += "nullable ";
+        break;
+
+      case NullabilityKind::Unspecified:
+        Result += "null_unspecified ";
+        break;
+      }
+    }
+  }
   return Result;
 }
 
@@ -2128,13 +2151,15 @@ static std::string FormatFunctionParameter(const PrintingPolicy &Policy,
     if (Param->getIdentifier() && !ObjCMethodParam && !SuppressName)
       Result = Param->getIdentifier()->getName();
     
-    Param->getType().getAsStringInternal(Result, Policy);
-    
+    QualType Type = Param->getType();
     if (ObjCMethodParam) {
-      Result = "(" + formatObjCParamQualifiers(Param->getObjCDeclQualifier())
-             + Result + ")";
+      Result = "(" + formatObjCParamQualifiers(Param->getObjCDeclQualifier(),
+                                               Type);
+      Result += Type.getAsString(Policy) + ")";
       if (Param->getIdentifier() && !SuppressName)
         Result += Param->getIdentifier()->getName();
+    } else {
+      Type.getAsStringInternal(Result, Policy);
     }
     return Result;
   }
@@ -2182,13 +2207,16 @@ static std::string FormatFunctionParameter(const PrintingPolicy &Policy,
     if (!ObjCMethodParam && Param->getIdentifier())
       Result = Param->getIdentifier()->getName();
 
-    Param->getType().getUnqualifiedType().getAsStringInternal(Result, Policy);
+    QualType Type = Param->getType().getUnqualifiedType();
     
     if (ObjCMethodParam) {
-      Result = "(" + formatObjCParamQualifiers(Param->getObjCDeclQualifier())
-             + Result + ")";
+      Result = "(" + formatObjCParamQualifiers(Param->getObjCDeclQualifier(),
+                                               Type);
+      Result += Type.getAsString(Policy) + Result + ")";
       if (Param->getIdentifier())
         Result += Param->getIdentifier()->getName();
+    } else {
+      Type.getAsStringInternal(Result, Policy);
     }
       
     return Result;
@@ -2762,9 +2790,10 @@ CodeCompletionResult::CreateCodeCompletionString(ASTContext &Ctx,
       if ((*P)->getType()->isBlockPointerType() && !DeclaringEntity)
         Arg = FormatFunctionParameter(Policy, *P, true);
       else {
-        (*P)->getType().getAsStringInternal(Arg, Policy);
-        Arg = "(" + formatObjCParamQualifiers((*P)->getObjCDeclQualifier()) 
-            + Arg + ")";
+        QualType Type = (*P)->getType();
+        Arg = "(" + formatObjCParamQualifiers((*P)->getObjCDeclQualifier(),
+                                              Type);
+        Arg += Type.getAsString(Policy) + ")";
         if (IdentifierInfo *II = (*P)->getIdentifier())
           if (DeclaringEntity || AllParametersAreInformative)
             Arg += II->getName();
@@ -4858,6 +4887,12 @@ void Sema::CodeCompleteObjCPropertyFlags(Scope *S, ObjCDeclSpec &ODS) {
     Getter.AddPlaceholderChunk("method");
     Results.AddResult(CodeCompletionResult(Getter.TakeString()));
   }
+  if (!ObjCPropertyFlagConflicts(Attributes, ObjCDeclSpec::DQ_PR_nullability)) {
+    Results.AddResult(CodeCompletionResult("nonnull"));
+    Results.AddResult(CodeCompletionResult("nullable"));
+    Results.AddResult(CodeCompletionResult("null_unspecified"));
+    Results.AddResult(CodeCompletionResult("null_resettable"));
+  }
   Results.ExitScope();
   HandleCodeCompleteResults(this, CodeCompleter, 
                             CodeCompletionContext::CCC_Other,
@@ -5107,6 +5142,11 @@ void Sema::CodeCompleteObjCPassingType(Scope *S, ObjCDeclSpec &DS,
      Results.AddResult("byref");
      Results.AddResult("oneway");
   }
+  if ((DS.getObjCDeclQualifier() & ObjCDeclSpec::DQ_CSNullability) == 0) {
+    Results.AddResult("nonnull");
+    Results.AddResult("nullable");
+    Results.AddResult("null_unspecified");
+  }
   
   // If we're completing the return type of an Objective-C method and the 
   // identifier IBAction refers to a macro, provide a completion item for
@@ -6279,7 +6319,7 @@ static void AddObjCPassingTypeChunk(QualType Type,
                                     const PrintingPolicy &Policy,
                                     CodeCompletionBuilder &Builder) {
   Builder.AddChunk(CodeCompletionString::CK_LeftParen);
-  std::string Quals = formatObjCParamQualifiers(ObjCDeclQuals);
+  std::string Quals = formatObjCParamQualifiers(ObjCDeclQuals, Type);
   if (!Quals.empty())
     Builder.AddTextChunk(Builder.getAllocator().CopyString(Quals));
   Builder.AddTextChunk(GetCompletionTypeString(Type, Context, Policy,
@@ -7018,7 +7058,12 @@ void Sema::CodeCompleteObjCMethodDecl(Scope *S,
         break;
 
       // Add the parameter type.
-      AddObjCPassingTypeChunk((*P)->getOriginalType(),
+      QualType ParamType;
+      if ((*P)->getObjCDeclQualifier() & Decl::OBJC_TQ_CSNullability)
+        ParamType = (*P)->getType();
+      else
+        ParamType = (*P)->getOriginalType();
+      AddObjCPassingTypeChunk(ParamType,
                               (*P)->getObjCDeclQualifier(),
                               Context, Policy,
                               Builder);
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaDecl.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaDecl.cpp
index 89f4b3a..aa006b3 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaDecl.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaDecl.cpp
@@ -1007,7 +1007,7 @@ Corrected:
 
   // Check for a tag type hidden by a non-type decl in a few cases where it
   // seems likely a type is wanted instead of the non-type that was found.
-  bool NextIsOp = NextToken.is(tok::amp) || NextToken.is(tok::star);
+  bool NextIsOp = NextToken.isOneOf(tok::amp, tok::star);
   if ((NextToken.is(tok::identifier) ||
        (NextIsOp &&
         FirstDecl->getUnderlyingDecl()->isFunctionOrFunctionTemplate())) &&
@@ -1081,6 +1081,22 @@ void Sema::PopDeclContext() {
   assert(CurContext && "Popped translation unit!");
 }
 
+Sema::SkippedDefinitionContext Sema::ActOnTagStartSkippedDefinition(Scope *S,
+                                                                    Decl *D) {
+  // Unlike PushDeclContext, the context to which we return is not necessarily
+  // the containing DC of TD, because the new context will be some pre-existing
+  // TagDecl definition instead of a fresh one.
+  auto Result = static_cast<SkippedDefinitionContext>(CurContext);
+  CurContext = cast<TagDecl>(D)->getDefinition();
+  assert(CurContext && "skipping definition of undefined tag");
+  S->setEntity(CurContext);
+  return Result;
+}
+
+void Sema::ActOnTagFinishSkippedDefinition(SkippedDefinitionContext Context) {
+  CurContext = static_cast<decltype(CurContext)>(Context);
+}
+
 /// EnterDeclaratorContext - Used when we must lookup names in the context
 /// of a declarator's nested name specifier.
 ///
@@ -1788,7 +1804,7 @@ static void filterNonConflictingPreviousDecls(Sema &S,
                                               NamedDecl *decl,
                                               LookupResult &previous){
   // This is only interesting when modules are enabled.
-  if (!S.getLangOpts().Modules)
+  if (!S.getLangOpts().Modules && !S.getLangOpts().ModulesLocalVisibility)
     return;
 
   // Empty sets are uninteresting.
@@ -1818,7 +1834,7 @@ static void filterNonConflictingPreviousTypedefDecls(Sema &S,
                                                      TypedefNameDecl *Decl,
                                                      LookupResult &Previous) {
   // This is only interesting when modules are enabled.
-  if (!S.getLangOpts().Modules)
+  if (!S.getLangOpts().Modules && !S.getLangOpts().ModulesLocalVisibility)
     return;
 
   // Empty sets are uninteresting.
@@ -2449,6 +2465,32 @@ static void mergeParamDeclAttributes(ParmVarDecl *newDecl,
   if (!foundAny) newDecl->dropAttrs();
 }
 
+static void mergeParamDeclTypes(ParmVarDecl *NewParam,
+                                const ParmVarDecl *OldParam,
+                                Sema &S) {
+  if (auto Oldnullability = OldParam->getType()->getNullability(S.Context)) {
+    if (auto Newnullability = NewParam->getType()->getNullability(S.Context)) {
+      if (*Oldnullability != *Newnullability) {
+        unsigned unsNewnullability = static_cast<unsigned>(*Newnullability);
+        unsigned unsOldnullability = static_cast<unsigned>(*Oldnullability);
+        S.Diag(NewParam->getLocation(), diag::warn_mismatched_nullability_attr)
+          << unsNewnullability
+          << ((NewParam->getObjCDeclQualifier() & Decl::OBJC_TQ_CSNullability) != 0)
+          << unsOldnullability
+          << ((OldParam->getObjCDeclQualifier() & Decl::OBJC_TQ_CSNullability) != 0);
+        S.Diag(OldParam->getLocation(), diag::note_previous_declaration);
+      }
+    }
+    else {
+      QualType NewT = NewParam->getType();
+      NewT = S.Context.getAttributedType(
+                         AttributedType::getNullabilityAttrKind(*Oldnullability),
+                         NewT, NewT);
+      NewParam->setType(NewT);
+    }
+  }
+}
+
 namespace {
 
 /// Used in MergeFunctionDecl to keep track of function parameters in
@@ -3085,9 +3127,12 @@ bool Sema::MergeCompatibleFunctionDecls(FunctionDecl *New, FunctionDecl *Old,
   // Merge attributes from the parameters.  These can mismatch with K&R
   // declarations.
   if (New->getNumParams() == Old->getNumParams())
-    for (unsigned i = 0, e = New->getNumParams(); i != e; ++i)
-      mergeParamDeclAttributes(New->getParamDecl(i), Old->getParamDecl(i),
-                               *this);
+      for (unsigned i = 0, e = New->getNumParams(); i != e; ++i) {
+        ParmVarDecl *NewParam = New->getParamDecl(i);
+        ParmVarDecl *OldParam = Old->getParamDecl(i);
+        mergeParamDeclAttributes(NewParam, OldParam, *this);
+        mergeParamDeclTypes(NewParam, OldParam, *this);
+      }
 
   if (getLangOpts().CPlusPlus)
     return MergeCXXFunctionDecl(New, Old, S);
@@ -4663,12 +4708,14 @@ NamedDecl *Sema::HandleDeclarator(Scope *S, Declarator &D,
         RequireCompleteDeclContext(D.getCXXScopeSpec(), DC))
       return nullptr;
 
+    // If a class is incomplete, do not parse entities inside it.
     if (isa<CXXRecordDecl>(DC) && !cast<CXXRecordDecl>(DC)->hasDefinition()) {
       Diag(D.getIdentifierLoc(),
            diag::err_member_def_undefined_record)
         << Name << DC << D.getCXXScopeSpec().getRange();
-      D.setInvalidType();
-    } else if (!D.getDeclSpec().isFriendSpecified()) {
+      return nullptr;
+    }
+    if (!D.getDeclSpec().isFriendSpecified()) {
       if (diagnoseQualifiedDeclaration(D.getCXXScopeSpec(), DC,
                                       Name, D.getIdentifierLoc())) {
         if (DC->isRecord())
@@ -13486,7 +13533,8 @@ EnumConstantDecl *Sema::CheckEnumConstant(EnumDecl *Enum,
 
 Sema::SkipBodyInfo Sema::shouldSkipAnonEnumBody(Scope *S, IdentifierInfo *II,
                                                 SourceLocation IILoc) {
-  if (!getLangOpts().Modules || !getLangOpts().CPlusPlus)
+  if (!(getLangOpts().Modules || getLangOpts().ModulesLocalVisibility) ||
+      !getLangOpts().CPlusPlus)
     return SkipBodyInfo();
 
   // We have an anonymous enum definition. Look up the first enumerator to
@@ -13500,7 +13548,6 @@ Sema::SkipBodyInfo Sema::shouldSkipAnonEnumBody(Scope *S, IdentifierInfo *II,
       !hasVisibleDefinition(cast<NamedDecl>(PrevECD->getDeclContext()),
                             &Hidden)) {
     SkipBodyInfo Skip;
-    Skip.ShouldSkip = true;
     Skip.Previous = Hidden;
     return Skip;
   }
@@ -14197,16 +14244,22 @@ void Sema::ActOnPragmaRedefineExtname(IdentifierInfo* Name,
                                       SourceLocation PragmaLoc,
                                       SourceLocation NameLoc,
                                       SourceLocation AliasNameLoc) {
-  Decl *PrevDecl = LookupSingleName(TUScope, Name, NameLoc,
-                                    LookupOrdinaryName);
-  AsmLabelAttr *Attr = ::new (Context) AsmLabelAttr(AliasNameLoc, Context,
-                                                    AliasName->getName(), 0);
-
-  if (PrevDecl) 
+  NamedDecl *PrevDecl = LookupSingleName(TUScope, Name, NameLoc,
+                                         LookupOrdinaryName);
+  AsmLabelAttr *Attr =
+      AsmLabelAttr::CreateImplicit(Context, AliasName->getName(), AliasNameLoc);
+
+  // If a declaration that:
+  // 1) declares a function or a variable
+  // 2) has external linkage
+  // already exists, add a label attribute to it.
+  if (PrevDecl &&
+      (isa<FunctionDecl>(PrevDecl) || isa<VarDecl>(PrevDecl)) &&
+      PrevDecl->hasExternalFormalLinkage())
     PrevDecl->addAttr(Attr);
-  else 
-    (void)ExtnameUndeclaredIdentifiers.insert(
-      std::pair<IdentifierInfo*,AsmLabelAttr*>(Name, Attr));
+  // Otherwise, add a label atttibute to ExtnameUndeclaredIdentifiers.
+  else
+    (void)ExtnameUndeclaredIdentifiers.insert(std::make_pair(Name, Attr));
 }
 
 void Sema::ActOnPragmaWeakID(IdentifierInfo* Name,
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaDeclAttr.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaDeclAttr.cpp
index 1d04159..43790c2 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaDeclAttr.cpp
@@ -2397,6 +2397,28 @@ static void handleSectionAttr(Sema &S, Decl *D, const AttributeList &Attr) {
     D->addAttr(NewAttr);
 }
 
+// Check for things we'd like to warn about, no errors or validation for now.
+// TODO: Validation should use a backend target library that specifies
+// the allowable subtarget features and cpus. We could use something like a
+// TargetCodeGenInfo hook here to do validation.
+void Sema::checkTargetAttr(SourceLocation LiteralLoc, StringRef AttrStr) {
+  for (auto Str : {"tune=", "fpmath="})
+    if (AttrStr.find(Str) != StringRef::npos)
+      Diag(LiteralLoc, diag::warn_unsupported_target_attribute) << Str;
+}
+
+static void handleTargetAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+  StringRef Str;
+  SourceLocation LiteralLoc;
+  if (!S.checkStringLiteralArgumentAttr(Attr, 0, Str, &LiteralLoc))
+    return;
+  S.checkTargetAttr(LiteralLoc, Str);
+  unsigned Index = Attr.getAttributeSpellingListIndex();
+  TargetAttr *NewAttr =
+      ::new (S.Context) TargetAttr(Attr.getRange(), S.Context, Str, Index);
+  D->addAttr(NewAttr);
+}
+
 
 static void handleCleanupAttr(Sema &S, Decl *D, const AttributeList &Attr) {
   VarDecl *VD = cast<VarDecl>(D);
@@ -3132,16 +3154,22 @@ static void handleModeAttr(Sema &S, Decl *D, const AttributeList &Attr) {
     return;
   }
 
-  if (!OldTy->getAs<BuiltinType>() && !OldTy->isComplexType())
+  // Base type can also be a vector type (see PR17453).
+  // Distinguish between base type and base element type.
+  QualType OldElemTy = OldTy;
+  if (const VectorType *VT = OldTy->getAs<VectorType>())
+    OldElemTy = VT->getElementType();
+
+  if (!OldElemTy->getAs<BuiltinType>() && !OldElemTy->isComplexType())
     S.Diag(Attr.getLoc(), diag::err_mode_not_primitive);
   else if (IntegerMode) {
-    if (!OldTy->isIntegralOrEnumerationType())
+    if (!OldElemTy->isIntegralOrEnumerationType())
       S.Diag(Attr.getLoc(), diag::err_mode_wrong_type);
   } else if (ComplexMode) {
-    if (!OldTy->isComplexType())
+    if (!OldElemTy->isComplexType())
       S.Diag(Attr.getLoc(), diag::err_mode_wrong_type);
   } else {
-    if (!OldTy->isFloatingType())
+    if (!OldElemTy->isFloatingType())
       S.Diag(Attr.getLoc(), diag::err_mode_wrong_type);
   }
 
@@ -3154,21 +3182,40 @@ static void handleModeAttr(Sema &S, Decl *D, const AttributeList &Attr) {
     return;
   }
 
-  QualType NewTy;
+  QualType NewElemTy;
 
   if (IntegerMode)
-    NewTy = S.Context.getIntTypeForBitwidth(DestWidth,
-                                            OldTy->isSignedIntegerType());
+    NewElemTy = S.Context.getIntTypeForBitwidth(
+        DestWidth, OldElemTy->isSignedIntegerType());
   else
-    NewTy = S.Context.getRealTypeForBitwidth(DestWidth);
+    NewElemTy = S.Context.getRealTypeForBitwidth(DestWidth);
 
-  if (NewTy.isNull()) {
+  if (NewElemTy.isNull()) {
     S.Diag(Attr.getLoc(), diag::err_machine_mode) << 1 /*Unsupported*/ << Name;
     return;
   }
 
   if (ComplexMode) {
-    NewTy = S.Context.getComplexType(NewTy);
+    NewElemTy = S.Context.getComplexType(NewElemTy);
+  }
+
+  QualType NewTy = NewElemTy;
+  if (const VectorType *OldVT = OldTy->getAs<VectorType>()) {
+    // Complex machine mode does not support base vector types.
+    if (ComplexMode) {
+      S.Diag(Attr.getLoc(), diag::err_complex_mode_vector_type);
+      return;
+    }
+    unsigned NumElements = S.Context.getTypeSize(OldElemTy) *
+                           OldVT->getNumElements() /
+                           S.Context.getTypeSize(NewElemTy);
+    NewTy =
+        S.Context.getVectorType(NewElemTy, NumElements, OldVT->getVectorKind());
+  }
+
+  if (NewTy.isNull()) {
+    S.Diag(Attr.getLoc(), diag::err_mode_wrong_type);
+    return;
   }
 
   // Install the new type.
@@ -3683,10 +3730,31 @@ static void handleNSReturnsRetainedAttr(Sema &S, Decl *D,
     returnType = PD->getType();
   else if (FunctionDecl *FD = dyn_cast<FunctionDecl>(D))
     returnType = FD->getReturnType();
-  else {
+  else if (auto *Param = dyn_cast<ParmVarDecl>(D)) {
+    returnType = Param->getType()->getPointeeType();
+    if (returnType.isNull()) {
+      S.Diag(D->getLocStart(), diag::warn_ns_attribute_wrong_parameter_type)
+          << Attr.getName() << /*pointer-to-CF*/2
+          << Attr.getRange();
+      return;
+    }
+  } else {
+    AttributeDeclKind ExpectedDeclKind;
+    switch (Attr.getKind()) {
+    default: llvm_unreachable("invalid ownership attribute");
+    case AttributeList::AT_NSReturnsRetained:
+    case AttributeList::AT_NSReturnsAutoreleased:
+    case AttributeList::AT_NSReturnsNotRetained:
+      ExpectedDeclKind = ExpectedFunctionOrMethod;
+      break;
+
+    case AttributeList::AT_CFReturnsRetained:
+    case AttributeList::AT_CFReturnsNotRetained:
+      ExpectedDeclKind = ExpectedFunctionMethodOrParameter;
+      break;
+    }
     S.Diag(D->getLocStart(), diag::warn_attribute_wrong_decl_type)
-        << Attr.getRange() << Attr.getName()
-        << ExpectedFunctionOrMethod;
+        << Attr.getRange() << Attr.getName() << ExpectedDeclKind;
     return;
   }
 
@@ -3713,8 +3781,25 @@ static void handleNSReturnsRetainedAttr(Sema &S, Decl *D,
   }
 
   if (!typeOK) {
-    S.Diag(D->getLocStart(), diag::warn_ns_attribute_wrong_return_type)
-      << Attr.getRange() << Attr.getName() << isa<ObjCMethodDecl>(D) << cf;
+    if (isa<ParmVarDecl>(D)) {
+      S.Diag(D->getLocStart(), diag::warn_ns_attribute_wrong_parameter_type)
+          << Attr.getName() << /*pointer-to-CF*/2
+          << Attr.getRange();
+    } else {
+      // Needs to be kept in sync with warn_ns_attribute_wrong_return_type.
+      enum : unsigned {
+        Function,
+        Method,
+        Property
+      } SubjectKind = Function;
+      if (isa<ObjCMethodDecl>(D))
+        SubjectKind = Method;
+      else if (isa<ObjCPropertyDecl>(D))
+        SubjectKind = Property;
+      S.Diag(D->getLocStart(), diag::warn_ns_attribute_wrong_return_type)
+          << Attr.getName() << SubjectKind << cf
+          << Attr.getRange();
+    }
     return;
   }
 
@@ -4716,6 +4801,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
   case AttributeList::AT_Section:
     handleSectionAttr(S, D, Attr);
     break;
+  case AttributeList::AT_Target:
+    handleTargetAttr(S, D, Attr);
+    break;
   case AttributeList::AT_Unavailable:
     handleAttrWithMessage<UnavailableAttr>(S, D, Attr);
     break;
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaDeclCXX.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaDeclCXX.cpp
index c80ef2d..7ed9bfc 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaDeclCXX.cpp
@@ -9494,6 +9494,7 @@ static void getDefaultArgExprsForConstructors(Sema &S, CXXRecordDecl *Class) {
 
       Expr *DefaultArg = S.BuildCXXDefaultArgExpr(Class->getLocation(), CD,
                                                   CD->getParamDecl(I)).get();
+      S.DiscardCleanupsInEvaluationContext();
       S.Context.addDefaultArgExprForConstructor(CD, I, DefaultArg);
     }
   }
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaDeclObjC.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaDeclObjC.cpp
index 3831879..543566f 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaDeclObjC.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaDeclObjC.cpp
@@ -1366,6 +1366,13 @@ static SourceRange getTypeRange(TypeSourceInfo *TSI) {
   return (TSI ? TSI->getTypeLoc().getSourceRange() : SourceRange());
 }
 
+/// Determine whether two set of Objective-C declaration qualifiers conflict.
+static bool objcModifiersConflict(Decl::ObjCDeclQualifier x,
+                                  Decl::ObjCDeclQualifier y) {
+  return (x & ~Decl::OBJC_TQ_CSNullability) !=
+         (y & ~Decl::OBJC_TQ_CSNullability);
+}
+
 static bool CheckMethodOverrideReturn(Sema &S,
                                       ObjCMethodDecl *MethodImpl,
                                       ObjCMethodDecl *MethodDecl,
@@ -1373,8 +1380,8 @@ static bool CheckMethodOverrideReturn(Sema &S,
                                       bool IsOverridingMode,
                                       bool Warn) {
   if (IsProtocolMethodDecl &&
-      (MethodDecl->getObjCDeclQualifier() !=
-       MethodImpl->getObjCDeclQualifier())) {
+      objcModifiersConflict(MethodDecl->getObjCDeclQualifier(),
+                            MethodImpl->getObjCDeclQualifier())) {
     if (Warn) {
       S.Diag(MethodImpl->getLocation(),
              (IsOverridingMode
@@ -1388,7 +1395,24 @@ static bool CheckMethodOverrideReturn(Sema &S,
     else
       return false;
   }
-
+  if (Warn && IsOverridingMode &&
+      !isa<ObjCImplementationDecl>(MethodImpl->getDeclContext()) &&
+      !S.Context.hasSameNullabilityTypeQualifier(MethodImpl->getReturnType(),
+                                                 MethodDecl->getReturnType(),
+                                                 false)) {
+    unsigned unsNullabilityMethodImpl =
+      static_cast<unsigned>(*MethodImpl->getReturnType()->getNullability(S.Context));
+    unsigned unsNullabilityMethodDecl =
+      static_cast<unsigned>(*MethodDecl->getReturnType()->getNullability(S.Context));
+      S.Diag(MethodImpl->getLocation(),
+             diag::warn_conflicting_nullability_attr_overriding_ret_types)
+        << unsNullabilityMethodImpl
+        << ((MethodImpl->getObjCDeclQualifier() & Decl::OBJC_TQ_CSNullability) != 0)
+        << unsNullabilityMethodDecl
+        << ((MethodDecl->getObjCDeclQualifier() & Decl::OBJC_TQ_CSNullability) != 0);
+      S.Diag(MethodDecl->getLocation(), diag::note_previous_declaration);
+  }
+    
   if (S.Context.hasSameUnqualifiedType(MethodImpl->getReturnType(),
                                        MethodDecl->getReturnType()))
     return true;
@@ -1438,8 +1462,8 @@ static bool CheckMethodOverrideParam(Sema &S,
                                      bool IsOverridingMode,
                                      bool Warn) {
   if (IsProtocolMethodDecl &&
-      (ImplVar->getObjCDeclQualifier() !=
-       IfaceVar->getObjCDeclQualifier())) {
+      objcModifiersConflict(ImplVar->getObjCDeclQualifier(),
+                            IfaceVar->getObjCDeclQualifier())) {
     if (Warn) {
       if (IsOverridingMode)
         S.Diag(ImplVar->getLocation(), 
@@ -1459,7 +1483,19 @@ static bool CheckMethodOverrideParam(Sema &S,
       
   QualType ImplTy = ImplVar->getType();
   QualType IfaceTy = IfaceVar->getType();
-  
+  if (Warn && IsOverridingMode &&
+      !isa<ObjCImplementationDecl>(MethodImpl->getDeclContext()) &&
+      !S.Context.hasSameNullabilityTypeQualifier(ImplTy, IfaceTy, true)) {
+    unsigned unsImplTy = static_cast<unsigned>(*ImplTy->getNullability(S.Context));
+    unsigned unsIfaceTy = static_cast<unsigned>(*IfaceTy->getNullability(S.Context));
+    S.Diag(ImplVar->getLocation(),
+           diag::warn_conflicting_nullability_attr_overriding_param_types)
+        << unsImplTy
+        << ((ImplVar->getObjCDeclQualifier() & Decl::OBJC_TQ_CSNullability) != 0)
+        << unsIfaceTy
+        << ((IfaceVar->getObjCDeclQualifier() & Decl::OBJC_TQ_CSNullability) != 0);
+        S.Diag(IfaceVar->getLocation(), diag::note_previous_declaration);
+  }
   if (S.Context.hasSameUnqualifiedType(ImplTy, IfaceTy))
     return true;
   
@@ -1988,6 +2024,9 @@ void Sema::ImplMethodsVsClassMethods(Scope *S, ObjCImplDecl* IMPDecl,
     DiagnoseUnimplementedProperties(S, IMPDecl, CDecl, SynthesizeProperties);
   }
 
+  // Diagnose null-resettable synthesized setters.
+  diagnoseNullResettableSynthesizedSetters(IMPDecl);
+
   SelectorSet ClsMap;
   for (const auto *I : IMPDecl->class_methods())
     ClsMap.insert(I->getSelector());
@@ -3121,6 +3160,89 @@ void Sema::CheckObjCMethodOverrides(ObjCMethodDecl *ObjCMethod,
   ObjCMethod->setOverriding(hasOverriddenMethodsInBaseOrProtocol);
 }
 
+/// Merge type nullability from for a redeclaration of the same entity,
+/// producing the updated type of the redeclared entity.
+static QualType mergeTypeNullabilityForRedecl(Sema &S, SourceLocation loc,
+                                              QualType type,
+                                              bool usesCSKeyword,
+                                              SourceLocation prevLoc,
+                                              QualType prevType,
+                                              bool prevUsesCSKeyword) {
+  // Determine the nullability of both types.
+  auto nullability = type->getNullability(S.Context);
+  auto prevNullability = prevType->getNullability(S.Context);
+
+  // Easy case: both have nullability.
+  if (nullability.hasValue() == prevNullability.hasValue()) {
+    // Neither has nullability; continue.
+    if (!nullability)
+      return type;
+
+    // The nullabilities are equivalent; do nothing.
+    if (*nullability == *prevNullability)
+      return type;
+
+    // Complain about mismatched nullability.
+    S.Diag(loc, diag::err_nullability_conflicting)
+      << static_cast<unsigned>(*nullability) << usesCSKeyword
+      << static_cast<unsigned>(*prevNullability) << prevUsesCSKeyword;
+    return type;
+  }
+
+  // If it's the redeclaration that has nullability, don't change anything.
+  if (nullability)
+    return type;
+
+  // Otherwise, provide the result with the same nullability.
+  return S.Context.getAttributedType(
+           AttributedType::getNullabilityAttrKind(*prevNullability),
+           type, type);
+}
+
+/// Merge information from the declaration of a method in the \@interface
+/// (or a category/extension) into the corresponding method in the
+/// @implementation (for a class or category).
+static void mergeInterfaceMethodToImpl(Sema &S,
+                                       ObjCMethodDecl *method,
+                                       ObjCMethodDecl *prevMethod) {
+  // Merge the objc_requires_super attribute.
+  if (prevMethod->hasAttr<ObjCRequiresSuperAttr>() &&
+      !method->hasAttr<ObjCRequiresSuperAttr>()) {
+    // merge the attribute into implementation.
+    method->addAttr(
+      ObjCRequiresSuperAttr::CreateImplicit(S.Context,
+                                            method->getLocation()));
+  }
+
+  // Merge nullability of the result type.
+  QualType newReturnType
+    = mergeTypeNullabilityForRedecl(
+        S, method->getReturnTypeSourceRange().getBegin(),
+        method->getReturnType(),
+        method->getObjCDeclQualifier() & Decl::OBJC_TQ_CSNullability,
+        prevMethod->getReturnTypeSourceRange().getBegin(),
+        prevMethod->getReturnType(),
+        prevMethod->getObjCDeclQualifier() & Decl::OBJC_TQ_CSNullability);
+  method->setReturnType(newReturnType);
+
+  // Handle each of the parameters.
+  unsigned numParams = method->param_size();
+  unsigned numPrevParams = prevMethod->param_size();
+  for (unsigned i = 0, n = std::min(numParams, numPrevParams); i != n; ++i) {
+    ParmVarDecl *param = method->param_begin()[i];
+    ParmVarDecl *prevParam = prevMethod->param_begin()[i];
+
+    // Merge nullability.
+    QualType newParamType
+      = mergeTypeNullabilityForRedecl(
+          S, param->getLocation(), param->getType(),
+          param->getObjCDeclQualifier() & Decl::OBJC_TQ_CSNullability,
+          prevParam->getLocation(), prevParam->getType(),
+          prevParam->getObjCDeclQualifier() & Decl::OBJC_TQ_CSNullability);
+    param->setType(newParamType);
+  }
+}
+
 Decl *Sema::ActOnMethodDeclaration(
     Scope *S,
     SourceLocation MethodLoc, SourceLocation EndLoc,
@@ -3151,7 +3273,9 @@ Decl *Sema::ActOnMethodDeclaration(
     if (CheckFunctionReturnType(resultDeclType, MethodLoc))
       return nullptr;
 
-    HasRelatedResultType = (resultDeclType == Context.getObjCInstanceType());
+    QualType bareResultType = resultDeclType;
+    (void)AttributedType::stripOuterNullability(bareResultType);
+    HasRelatedResultType = (bareResultType == Context.getObjCInstanceType());
   } else { // get the type for "id".
     resultDeclType = Context.getObjCIdType();
     Diag(MethodLoc, diag::warn_missing_method_return_type)
@@ -3252,22 +3376,20 @@ Decl *Sema::ActOnMethodDeclaration(
       ImpDecl->addClassMethod(ObjCMethod);
     }
 
-    ObjCMethodDecl *IMD = nullptr;
-    if (ObjCInterfaceDecl *IDecl = ImpDecl->getClassInterface())
-      IMD = IDecl->lookupMethod(ObjCMethod->getSelector(), 
-                                ObjCMethod->isInstanceMethod());
-    if (IMD && IMD->hasAttr<ObjCRequiresSuperAttr>() &&
-        !ObjCMethod->hasAttr<ObjCRequiresSuperAttr>()) {
-      // merge the attribute into implementation.
-      ObjCMethod->addAttr(ObjCRequiresSuperAttr::CreateImplicit(Context,
-                                                   ObjCMethod->getLocation()));
-    }
-    if (isa<ObjCCategoryImplDecl>(ImpDecl)) {
-      ObjCMethodFamily family = 
-        ObjCMethod->getSelector().getMethodFamily();
-      if (family == OMF_dealloc && IMD && IMD->isOverriding()) 
-        Diag(ObjCMethod->getLocation(), diag::warn_dealloc_in_category)
-          << ObjCMethod->getDeclName();
+    // Merge information from the @interface declaration into the
+    // @implementation.
+    if (ObjCInterfaceDecl *IDecl = ImpDecl->getClassInterface()) {
+      if (auto *IMD = IDecl->lookupMethod(ObjCMethod->getSelector(),
+                                          ObjCMethod->isInstanceMethod())) {
+        mergeInterfaceMethodToImpl(*this, ObjCMethod, IMD);
+
+        // Warn about defining -dealloc in a category.
+        if (isa<ObjCCategoryImplDecl>(ImpDecl) && IMD->isOverriding() &&
+            ObjCMethod->getSelector().getMethodFamily() == OMF_dealloc) {
+          Diag(ObjCMethod->getLocation(), diag::warn_dealloc_in_category)
+            << ObjCMethod->getDeclName();
+        }
+      }
     }
   } else {
     cast<DeclContext>(ClassDecl)->addDecl(ObjCMethod);
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaExceptionSpec.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaExceptionSpec.cpp
index 51d6ace..f3bcf76 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaExceptionSpec.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaExceptionSpec.cpp
@@ -1041,6 +1041,7 @@ CanThrowResult Sema::canThrow(const Expr *E) {
   case Expr::CXXReinterpretCastExprClass:
   case Expr::CXXStdInitializerListExprClass:
   case Expr::DesignatedInitExprClass:
+  case Expr::DesignatedInitUpdateExprClass:
   case Expr::ExprWithCleanupsClass:
   case Expr::ExtVectorElementExprClass:
   case Expr::InitListExprClass:
@@ -1135,6 +1136,7 @@ CanThrowResult Sema::canThrow(const Expr *E) {
   case Expr::ImaginaryLiteralClass:
   case Expr::ImplicitValueInitExprClass:
   case Expr::IntegerLiteralClass:
+  case Expr::NoInitExprClass:
   case Expr::ObjCEncodeExprClass:
   case Expr::ObjCStringLiteralClass:
   case Expr::ObjCBoolLiteralExprClass:
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp
index 7e305ff..6c839f3 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp
@@ -20,7 +20,6 @@
 #include "clang/AST/CXXInheritance.h"
 #include "clang/AST/CharUnits.h"
 #include "clang/AST/DeclObjC.h"
-#include "clang/AST/EvaluatedExprVisitor.h"
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/ExprObjC.h"
 #include "clang/AST/RecursiveASTVisitor.h"
@@ -3291,10 +3290,10 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType,
 
     // We may not have been able to figure out what this member pointer resolved
     // to up until this exact point.  Attempt to lock-in it's inheritance model.
-    QualType FromType = From->getType();
-    if (FromType->isMemberPointerType())
-      if (Context.getTargetInfo().getCXXABI().isMicrosoft())
-        RequireCompleteType(From->getExprLoc(), FromType, 0);
+    if (Context.getTargetInfo().getCXXABI().isMicrosoft()) {
+      RequireCompleteType(From->getExprLoc(), From->getType(), 0);
+      RequireCompleteType(From->getExprLoc(), ToType, 0);
+    }
 
     From = ImpCastExprToType(From, ToType, Kind, VK_RValue, &BasePath, CCK)
              .get();
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaExprObjC.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaExprObjC.cpp
index 63b7485..9947fad 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaExprObjC.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaExprObjC.cpp
@@ -1135,49 +1135,154 @@ ObjCMethodDecl *Sema::tryCaptureObjCSelf(SourceLocation Loc) {
 }
 
 static QualType stripObjCInstanceType(ASTContext &Context, QualType T) {
+  QualType origType = T;
+  if (auto nullability = AttributedType::stripOuterNullability(T)) {
+    if (T == Context.getObjCInstanceType()) {
+      return Context.getAttributedType(
+               AttributedType::getNullabilityAttrKind(*nullability),
+               Context.getObjCIdType(),
+               Context.getObjCIdType());
+    }
+
+    return origType;
+  }
+
   if (T == Context.getObjCInstanceType())
     return Context.getObjCIdType();
   
-  return T;
+  return origType;
 }
 
-QualType Sema::getMessageSendResultType(QualType ReceiverType,
-                                        ObjCMethodDecl *Method,
-                                    bool isClassMessage, bool isSuperMessage) {
+/// Determine the result type of a message send based on the receiver type,
+/// method, and the kind of message send.
+///
+/// This is the "base" result type, which will still need to be adjusted
+/// to account for nullability.
+static QualType getBaseMessageSendResultType(Sema &S,
+                                             QualType ReceiverType,
+                                             ObjCMethodDecl *Method,
+                                             bool isClassMessage,
+                                             bool isSuperMessage) {
   assert(Method && "Must have a method");
   if (!Method->hasRelatedResultType())
     return Method->getSendResultType();
-  
+
+  ASTContext &Context = S.Context;
+
+  // Local function that transfers the nullability of the method's
+  // result type to the returned result.
+  auto transferNullability = [&](QualType type) -> QualType {
+    // If the method's result type has nullability, extract it.
+    if (auto nullability = Method->getSendResultType()->getNullability(Context)){
+      // Strip off any outer nullability sugar from the provided type.
+      (void)AttributedType::stripOuterNullability(type);
+
+      // Form a new attributed type using the method result type's nullability.
+      return Context.getAttributedType(
+               AttributedType::getNullabilityAttrKind(*nullability),
+               type,
+               type);
+    }
+
+    return type;
+  };
+
   // If a method has a related return type:
   //   - if the method found is an instance method, but the message send
   //     was a class message send, T is the declared return type of the method
   //     found
   if (Method->isInstanceMethod() && isClassMessage)
     return stripObjCInstanceType(Context, Method->getSendResultType());
-  
-  //   - if the receiver is super, T is a pointer to the class of the 
+
+  //   - if the receiver is super, T is a pointer to the class of the
   //     enclosing method definition
   if (isSuperMessage) {
-    if (ObjCMethodDecl *CurMethod = getCurMethodDecl())
-      if (ObjCInterfaceDecl *Class = CurMethod->getClassInterface())
-        return Context.getObjCObjectPointerType(
-                                        Context.getObjCInterfaceType(Class));
+    if (ObjCMethodDecl *CurMethod = S.getCurMethodDecl())
+      if (ObjCInterfaceDecl *Class = CurMethod->getClassInterface()) {
+        return transferNullability(
+                 Context.getObjCObjectPointerType(
+                   Context.getObjCInterfaceType(Class)));
+      }
   }
-    
+
   //   - if the receiver is the name of a class U, T is a pointer to U
   if (ReceiverType->getAs<ObjCInterfaceType>() ||
       ReceiverType->isObjCQualifiedInterfaceType())
-    return Context.getObjCObjectPointerType(ReceiverType);
-  //   - if the receiver is of type Class or qualified Class type, 
+    return transferNullability(Context.getObjCObjectPointerType(ReceiverType));
+  //   - if the receiver is of type Class or qualified Class type,
   //     T is the declared return type of the method.
   if (ReceiverType->isObjCClassType() ||
       ReceiverType->isObjCQualifiedClassType())
     return stripObjCInstanceType(Context, Method->getSendResultType());
-  
+
   //   - if the receiver is id, qualified id, Class, or qualified Class, T
   //     is the receiver type, otherwise
   //   - T is the type of the receiver expression.
-  return ReceiverType;
+  return transferNullability(ReceiverType);
+}
+
+QualType Sema::getMessageSendResultType(QualType ReceiverType,
+                                        ObjCMethodDecl *Method,
+                                        bool isClassMessage,
+                                        bool isSuperMessage) {
+  // Produce the result type.
+  QualType resultType = getBaseMessageSendResultType(*this, ReceiverType,
+                                                     Method,
+                                                     isClassMessage,
+                                                     isSuperMessage);
+
+  // If this is a class message, ignore the nullability of the receiver.
+  if (isClassMessage)
+    return resultType;
+
+  // Map the nullability of the result into a table index.
+  unsigned receiverNullabilityIdx = 0;
+  if (auto nullability = ReceiverType->getNullability(Context))
+    receiverNullabilityIdx = 1 + static_cast<unsigned>(*nullability);
+
+  unsigned resultNullabilityIdx = 0;
+  if (auto nullability = resultType->getNullability(Context))
+    resultNullabilityIdx = 1 + static_cast<unsigned>(*nullability);
+
+  // The table of nullability mappings, indexed by the receiver's nullability
+  // and then the result type's nullability.
+  static const uint8_t None = 0;
+  static const uint8_t NonNull = 1;
+  static const uint8_t Nullable = 2;
+  static const uint8_t Unspecified = 3;
+  static const uint8_t nullabilityMap[4][4] = {
+    //                  None        NonNull       Nullable    Unspecified
+    /* None */        { None,       None,         Nullable,   None },
+    /* NonNull */     { None,       NonNull,      Nullable,   Unspecified },
+    /* Nullable */    { Nullable,   Nullable,     Nullable,   Nullable },
+    /* Unspecified */ { None,       Unspecified,  Nullable,   Unspecified }
+  };
+
+  unsigned newResultNullabilityIdx
+    = nullabilityMap[receiverNullabilityIdx][resultNullabilityIdx];
+  if (newResultNullabilityIdx == resultNullabilityIdx)
+    return resultType;
+
+  // Strip off the existing nullability. This removes as little type sugar as
+  // possible.
+  do {
+    if (auto attributed = dyn_cast<AttributedType>(resultType.getTypePtr())) {
+      resultType = attributed->getModifiedType();
+    } else {
+      resultType = resultType.getDesugaredType(Context);
+    }
+  } while (resultType->getNullability(Context));
+
+  // Add nullability back if needed.
+  if (newResultNullabilityIdx > 0) {
+    auto newNullability
+      = static_cast<NullabilityKind>(newResultNullabilityIdx-1);
+    return Context.getAttributedType(
+             AttributedType::getNullabilityAttrKind(newNullability),
+             resultType, resultType);
+  }
+
+  return resultType;
 }
 
 /// Look for an ObjC method whose result type exactly matches the given type.
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaInit.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaInit.cpp
index 610e0a9..821d7f6 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaInit.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaInit.cpp
@@ -306,7 +306,8 @@ class InitListChecker {
                                            QualType CurrentObjectType,
                                            InitListExpr *StructuredList,
                                            unsigned StructuredIndex,
-                                           SourceRange InitRange);
+                                           SourceRange InitRange,
+                                           bool IsFullyOverwritten = false);
   void UpdateStructuredListElement(InitListExpr *StructuredList,
                                    unsigned &StructuredIndex,
                                    Expr *expr);
@@ -317,11 +318,33 @@ class InitListChecker {
                                      SourceLocation Loc,
                                      const InitializedEntity &Entity,
                                      bool VerifyOnly);
+
+  // Explanation on the "FillWithNoInit" mode:
+  //
+  // Assume we have the following definitions (Case#1):
+  // struct P { char x[6][6]; } xp = { .x[1] = "bar" };
+  // struct PP { struct P lp; } l = { .lp = xp, .lp.x[1][2] = 'f' };
+  //
+  // l.lp.x[1][0..1] should not be filled with implicit initializers because the
+  // "base" initializer "xp" will provide values for them; l.lp.x[1] will be "baf".
+  //
+  // But if we have (Case#2):
+  // struct PP l = { .lp = xp, .lp.x[1] = { [2] = 'f' } };
+  //
+  // l.lp.x[1][0..1] are implicitly initialized and do not use values from the
+  // "base" initializer; l.lp.x[1] will be "\0\0f\0\0\0".
+  //
+  // To distinguish Case#1 from Case#2, and also to avoid leaving many "holes"
+  // in the InitListExpr, the "holes" in Case#1 are filled not with empty
+  // initializers but with special "NoInitExpr" place holders, which tells the
+  // CodeGen not to generate any initializers for these parts.
   void FillInEmptyInitForField(unsigned Init, FieldDecl *Field,
                                const InitializedEntity &ParentEntity,
-                               InitListExpr *ILE, bool &RequiresSecondPass);
+                               InitListExpr *ILE, bool &RequiresSecondPass,
+                               bool FillWithNoInit = false);
   void FillInEmptyInitializations(const InitializedEntity &Entity,
-                                  InitListExpr *ILE, bool &RequiresSecondPass);
+                                  InitListExpr *ILE, bool &RequiresSecondPass,
+                                  bool FillWithNoInit = false);
   bool CheckFlexibleArrayInit(const InitializedEntity &Entity,
                               Expr *InitExpr, FieldDecl *Field,
                               bool TopLevelObject);
@@ -455,12 +478,26 @@ void InitListChecker::CheckEmptyInitializable(const InitializedEntity &Entity,
 void InitListChecker::FillInEmptyInitForField(unsigned Init, FieldDecl *Field,
                                         const InitializedEntity &ParentEntity,
                                               InitListExpr *ILE,
-                                              bool &RequiresSecondPass) {
+                                              bool &RequiresSecondPass,
+                                              bool FillWithNoInit) {
   SourceLocation Loc = ILE->getLocEnd();
   unsigned NumInits = ILE->getNumInits();
   InitializedEntity MemberEntity
     = InitializedEntity::InitializeMember(Field, &ParentEntity);
+
+  if (const RecordType *RType = ILE->getType()->getAs<RecordType>())
+    if (!RType->getDecl()->isUnion())
+      assert(Init < NumInits && "This ILE should have been expanded");
+
   if (Init >= NumInits || !ILE->getInit(Init)) {
+    if (FillWithNoInit) {
+      Expr *Filler = new (SemaRef.Context) NoInitExpr(Field->getType());
+      if (Init < NumInits)
+        ILE->setInit(Init, Filler);
+      else
+        ILE->updateInit(SemaRef.Context, Init, Filler);
+      return;
+    }
     // C++1y [dcl.init.aggr]p7:
     //   If there are fewer initializer-clauses in the list than there are
     //   members in the aggregate, then each member not explicitly initialized
@@ -516,7 +553,11 @@ void InitListChecker::FillInEmptyInitForField(unsigned Init, FieldDecl *Field,
   } else if (InitListExpr *InnerILE
                = dyn_cast<InitListExpr>(ILE->getInit(Init)))
     FillInEmptyInitializations(MemberEntity, InnerILE,
-                               RequiresSecondPass);
+                               RequiresSecondPass, FillWithNoInit);
+  else if (DesignatedInitUpdateExpr *InnerDIUE
+               = dyn_cast<DesignatedInitUpdateExpr>(ILE->getInit(Init)))
+    FillInEmptyInitializations(MemberEntity, InnerDIUE->getUpdater(),
+                               RequiresSecondPass, /*FillWithNoInit =*/ true);
 }
 
 /// Recursively replaces NULL values within the given initializer list
@@ -525,7 +566,8 @@ void InitListChecker::FillInEmptyInitForField(unsigned Init, FieldDecl *Field,
 void
 InitListChecker::FillInEmptyInitializations(const InitializedEntity &Entity,
                                             InitListExpr *ILE,
-                                            bool &RequiresSecondPass) {
+                                            bool &RequiresSecondPass,
+                                            bool FillWithNoInit) {
   assert((ILE->getType() != SemaRef.Context.VoidTy) &&
          "Should not have void type");
 
@@ -533,16 +575,27 @@ InitListChecker::FillInEmptyInitializations(const InitializedEntity &Entity,
     const RecordDecl *RDecl = RType->getDecl();
     if (RDecl->isUnion() && ILE->getInitializedFieldInUnion())
       FillInEmptyInitForField(0, ILE->getInitializedFieldInUnion(),
-                              Entity, ILE, RequiresSecondPass);
+                              Entity, ILE, RequiresSecondPass, FillWithNoInit);
     else if (RDecl->isUnion() && isa<CXXRecordDecl>(RDecl) &&
              cast<CXXRecordDecl>(RDecl)->hasInClassInitializer()) {
       for (auto *Field : RDecl->fields()) {
         if (Field->hasInClassInitializer()) {
-          FillInEmptyInitForField(0, Field, Entity, ILE, RequiresSecondPass);
+          FillInEmptyInitForField(0, Field, Entity, ILE, RequiresSecondPass,
+                                  FillWithNoInit);
           break;
         }
       }
     } else {
+      // The fields beyond ILE->getNumInits() are default initialized, so in
+      // order to leave them uninitialized, the ILE is expanded and the extra
+      // fields are then filled with NoInitExpr.
+      unsigned NumFields = 0;
+      for (auto *Field : RDecl->fields())
+        if (!Field->isUnnamedBitfield())
+          ++NumFields;
+      if (ILE->getNumInits() < NumFields)
+        ILE->resizeInits(SemaRef.Context, NumFields);
+
       unsigned Init = 0;
       for (auto *Field : RDecl->fields()) {
         if (Field->isUnnamedBitfield())
@@ -551,7 +604,8 @@ InitListChecker::FillInEmptyInitializations(const InitializedEntity &Entity,
         if (hadError)
           return;
 
-        FillInEmptyInitForField(Init, Field, Entity, ILE, RequiresSecondPass);
+        FillInEmptyInitForField(Init, Field, Entity, ILE, RequiresSecondPass,
+                                FillWithNoInit);
         if (hadError)
           return;
 
@@ -594,13 +648,23 @@ InitListChecker::FillInEmptyInitializations(const InitializedEntity &Entity,
       ElementEntity.setElementIndex(Init);
 
     Expr *InitExpr = (Init < NumInits ? ILE->getInit(Init) : nullptr);
-    if (!InitExpr && !ILE->hasArrayFiller()) {
-      ExprResult ElementInit = PerformEmptyInit(SemaRef, ILE->getLocEnd(),
-                                                ElementEntity,
-                                                /*VerifyOnly*/false);
-      if (ElementInit.isInvalid()) {
-        hadError = true;
-        return;
+    if (!InitExpr && Init < NumInits && ILE->hasArrayFiller())
+      ILE->setInit(Init, ILE->getArrayFiller());
+    else if (!InitExpr && !ILE->hasArrayFiller()) {
+      Expr *Filler = nullptr;
+
+      if (FillWithNoInit)
+        Filler = new (SemaRef.Context) NoInitExpr(ElementType);
+      else {
+        ExprResult ElementInit = PerformEmptyInit(SemaRef, ILE->getLocEnd(),
+                                                  ElementEntity,
+                                                  /*VerifyOnly*/false);
+        if (ElementInit.isInvalid()) {
+          hadError = true;
+          return;
+        }
+
+        Filler = ElementInit.getAs<Expr>();
       }
 
       if (hadError) {
@@ -609,29 +673,34 @@ InitListChecker::FillInEmptyInitializations(const InitializedEntity &Entity,
         // For arrays, just set the expression used for value-initialization
         // of the "holes" in the array.
         if (ElementEntity.getKind() == InitializedEntity::EK_ArrayElement)
-          ILE->setArrayFiller(ElementInit.getAs<Expr>());
+          ILE->setArrayFiller(Filler);
         else
-          ILE->setInit(Init, ElementInit.getAs<Expr>());
+          ILE->setInit(Init, Filler);
       } else {
         // For arrays, just set the expression used for value-initialization
         // of the rest of elements and exit.
         if (ElementEntity.getKind() == InitializedEntity::EK_ArrayElement) {
-          ILE->setArrayFiller(ElementInit.getAs<Expr>());
+          ILE->setArrayFiller(Filler);
           return;
         }
 
-        if (!isa<ImplicitValueInitExpr>(ElementInit.get())) {
+        if (!isa<ImplicitValueInitExpr>(Filler) && !isa<NoInitExpr>(Filler)) {
           // Empty initialization requires a constructor call, so
           // extend the initializer list to include the constructor
           // call and make a note that we'll need to take another pass
           // through the initializer list.
-          ILE->updateInit(SemaRef.Context, Init, ElementInit.getAs<Expr>());
+          ILE->updateInit(SemaRef.Context, Init, Filler);
           RequiresSecondPass = true;
         }
       }
     } else if (InitListExpr *InnerILE
                  = dyn_cast_or_null<InitListExpr>(InitExpr))
-      FillInEmptyInitializations(ElementEntity, InnerILE, RequiresSecondPass);
+      FillInEmptyInitializations(ElementEntity, InnerILE, RequiresSecondPass,
+                                 FillWithNoInit);
+    else if (DesignatedInitUpdateExpr *InnerDIUE
+                 = dyn_cast_or_null<DesignatedInitUpdateExpr>(InitExpr))
+      FillInEmptyInitializations(ElementEntity, InnerDIUE->getUpdater(),
+                                 RequiresSecondPass, /*FillWithNoInit =*/ true);
   }
 }
 
@@ -966,13 +1035,26 @@ void InitListChecker::CheckSubElementType(const InitializedEntity &Entity,
                               StructuredList, StructuredIndex);
 
   if (InitListExpr *SubInitList = dyn_cast<InitListExpr>(expr)) {
-    if (!SemaRef.getLangOpts().CPlusPlus) {
+    if (SubInitList->getNumInits() == 1 &&
+        IsStringInit(SubInitList->getInit(0), ElemType, SemaRef.Context) ==
+        SIF_None) {
+      expr = SubInitList->getInit(0);
+    } else if (!SemaRef.getLangOpts().CPlusPlus) {
       InitListExpr *InnerStructuredList
         = getStructuredSubobjectInit(IList, Index, ElemType,
                                      StructuredList, StructuredIndex,
-                                     SubInitList->getSourceRange());
+                                     SubInitList->getSourceRange(), true);
       CheckExplicitInitList(Entity, SubInitList, ElemType,
                             InnerStructuredList);
+
+      if (!hadError && !VerifyOnly) {
+        bool RequiresSecondPass = false;
+        FillInEmptyInitializations(Entity, InnerStructuredList,
+                                   RequiresSecondPass);
+        if (RequiresSecondPass && !hadError)
+          FillInEmptyInitializations(Entity, InnerStructuredList,
+                                     RequiresSecondPass);
+      }
       ++StructuredIndex;
       ++Index;
       return;
@@ -1913,11 +1995,66 @@ InitListChecker::CheckDesignatedInitializer(const InitializedEntity &Entity,
 
     // Determine the structural initializer list that corresponds to the
     // current subobject.
-    StructuredList = IsFirstDesignator? SyntacticToSemantic.lookup(IList)
-      : getStructuredSubobjectInit(IList, Index, CurrentObjectType,
-                                   StructuredList, StructuredIndex,
-                                   SourceRange(D->getLocStart(),
-                                               DIE->getLocEnd()));
+    if (IsFirstDesignator)
+      StructuredList = SyntacticToSemantic.lookup(IList);
+    else {
+      Expr *ExistingInit = StructuredIndex < StructuredList->getNumInits() ?
+          StructuredList->getInit(StructuredIndex) : nullptr;
+      if (!ExistingInit && StructuredList->hasArrayFiller())
+        ExistingInit = StructuredList->getArrayFiller();
+
+      if (!ExistingInit)
+        StructuredList =
+          getStructuredSubobjectInit(IList, Index, CurrentObjectType,
+                                     StructuredList, StructuredIndex,
+                                     SourceRange(D->getLocStart(),
+                                                 DIE->getLocEnd()));
+      else if (InitListExpr *Result = dyn_cast<InitListExpr>(ExistingInit))
+        StructuredList = Result;
+      else {
+        if (DesignatedInitUpdateExpr *E =
+                dyn_cast<DesignatedInitUpdateExpr>(ExistingInit))
+          StructuredList = E->getUpdater();
+        else {
+          DesignatedInitUpdateExpr *DIUE =
+              new (SemaRef.Context) DesignatedInitUpdateExpr(SemaRef.Context,
+                                        D->getLocStart(), ExistingInit,
+                                        DIE->getLocEnd());
+          StructuredList->updateInit(SemaRef.Context, StructuredIndex, DIUE);
+          StructuredList = DIUE->getUpdater();
+        }
+
+        // We need to check on source range validity because the previous
+        // initializer does not have to be an explicit initializer. e.g.,
+        //
+        // struct P { int a, b; };
+        // struct PP { struct P p } l = { { .a = 2 }, .p.b = 3 };
+        //
+        // There is an overwrite taking place because the first braced initializer
+        // list "{ .a = 2 }" already provides value for .p.b (which is zero).
+        if (ExistingInit->getSourceRange().isValid()) {
+          // We are creating an initializer list that initializes the
+          // subobjects of the current object, but there was already an
+          // initialization that completely initialized the current
+          // subobject, e.g., by a compound literal:
+          //
+          // struct X { int a, b; };
+          // struct X xs[] = { [0] = (struct X) { 1, 2 }, [0].b = 3 };
+          //
+          // Here, xs[0].a == 0 and xs[0].b == 3, since the second,
+          // designated initializer re-initializes the whole
+          // subobject [0], overwriting previous initializers.
+          SemaRef.Diag(D->getLocStart(),
+                       diag::warn_subobject_initializer_overrides)
+            << SourceRange(D->getLocStart(), DIE->getLocEnd());
+  
+          SemaRef.Diag(ExistingInit->getLocStart(),
+                       diag::note_previous_initializer)
+            << /*FIXME:has side effects=*/0
+            << ExistingInit->getSourceRange();
+        }
+      }
+    }
     assert(StructuredList && "Expected a structured initializer list");
   }
 
@@ -2367,7 +2504,8 @@ InitListChecker::getStructuredSubobjectInit(InitListExpr *IList, unsigned Index,
                                             QualType CurrentObjectType,
                                             InitListExpr *StructuredList,
                                             unsigned StructuredIndex,
-                                            SourceRange InitRange) {
+                                            SourceRange InitRange,
+                                            bool IsFullyOverwritten) {
   if (VerifyOnly)
     return nullptr; // No structured list in verification-only mode.
   Expr *ExistingInit = nullptr;
@@ -2377,7 +2515,16 @@ InitListChecker::getStructuredSubobjectInit(InitListExpr *IList, unsigned Index,
     ExistingInit = StructuredList->getInit(StructuredIndex);
 
   if (InitListExpr *Result = dyn_cast_or_null<InitListExpr>(ExistingInit))
-    return Result;
+    // There might have already been initializers for subobjects of the current
+    // object, but a subsequent initializer list will overwrite the entirety
+    // of the current object. (See DR 253 and C99 6.7.8p21). e.g.,
+    //
+    // struct P { char x[6]; };
+    // struct P l = { .x[2] = 'x', .x = { [0] = 'f' } };
+    //
+    // The first designated initializer is ignored, and l.x is just "f".
+    if (!IsFullyOverwritten)
+      return Result;
 
   if (ExistingInit) {
     // We are creating an initializer list that initializes the
@@ -2469,13 +2616,22 @@ void InitListChecker::UpdateStructuredListElement(InitListExpr *StructuredList,
   if (Expr *PrevInit = StructuredList->updateInit(SemaRef.Context,
                                                   StructuredIndex, expr)) {
     // This initializer overwrites a previous initializer. Warn.
-    SemaRef.Diag(expr->getLocStart(),
-                  diag::warn_initializer_overrides)
-      << expr->getSourceRange();
-    SemaRef.Diag(PrevInit->getLocStart(),
-                  diag::note_previous_initializer)
-      << /*FIXME:has side effects=*/0
-      << PrevInit->getSourceRange();
+    // We need to check on source range validity because the previous
+    // initializer does not have to be an explicit initializer.
+    // struct P { int a, b; };
+    // struct PP { struct P p } l = { { .a = 2 }, .p.b = 3 };
+    // There is an overwrite taking place because the first braced initializer
+    // list "{ .a = 2 }' already provides value for .p.b (which is zero).
+    if (PrevInit->getSourceRange().isValid()) {
+      SemaRef.Diag(expr->getLocStart(),
+                   diag::warn_initializer_overrides)
+        << expr->getSourceRange();
+
+      SemaRef.Diag(PrevInit->getLocStart(),
+                   diag::note_previous_initializer)
+        << /*FIXME:has side effects=*/0
+        << PrevInit->getSourceRange();
+    }
   }
 
   ++StructuredIndex;
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaLookup.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaLookup.cpp
index d0a55b5..3fd1f21 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaLookup.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaLookup.cpp
@@ -1233,6 +1233,12 @@ void Sema::makeMergedDefinitionVisible(NamedDecl *ND, SourceLocation Loc) {
   else
     // We're not building a module; just make the definition visible.
     ND->setHidden(false);
+
+  // If ND is a template declaration, make the template parameters
+  // visible too. They're not (necessarily) within a mergeable DeclContext.
+  if (auto *TD = dyn_cast<TemplateDecl>(ND))
+    for (auto *Param : *TD->getTemplateParameters())
+      makeMergedDefinitionVisible(Param, Loc);
 }
 
 /// \brief Find the module in which the given declaration was defined.
@@ -1282,6 +1288,41 @@ bool Sema::hasVisibleMergedDefinition(NamedDecl *Def) {
   return false;
 }
 
+template<typename ParmDecl>
+static bool
+hasVisibleDefaultArgument(Sema &S, const ParmDecl *D,
+                          llvm::SmallVectorImpl<Module *> *Modules) {
+  if (!D->hasDefaultArgument())
+    return false;
+
+  while (D) {
+    auto &DefaultArg = D->getDefaultArgStorage();
+    if (!DefaultArg.isInherited() && S.isVisible(D))
+      return true;
+
+    if (!DefaultArg.isInherited() && Modules) {
+      auto *NonConstD = const_cast<ParmDecl*>(D);
+      Modules->push_back(S.getOwningModule(NonConstD));
+      const auto &Merged = S.Context.getModulesWithMergedDefinition(NonConstD);
+      Modules->insert(Modules->end(), Merged.begin(), Merged.end());
+    }
+
+    // If there was a previous default argument, maybe its parameter is visible.
+    D = DefaultArg.getInheritedFrom();
+  }
+  return false;
+}
+
+bool Sema::hasVisibleDefaultArgument(const NamedDecl *D,
+                                     llvm::SmallVectorImpl<Module *> *Modules) {
+  if (auto *P = dyn_cast<TemplateTypeParmDecl>(D))
+    return ::hasVisibleDefaultArgument(*this, P, Modules);
+  if (auto *P = dyn_cast<NonTypeTemplateParmDecl>(D))
+    return ::hasVisibleDefaultArgument(*this, P, Modules);
+  return ::hasVisibleDefaultArgument(*this, cast<TemplateTemplateParmDecl>(D),
+                                     Modules);
+}
+
 /// \brief Determine whether a declaration is visible to name lookup.
 ///
 /// This routine determines whether the declaration D is visible in the current
@@ -3006,6 +3047,9 @@ void Sema::ArgumentDependentLookup(DeclarationName Name, SourceLocation Loc,
       if (!isa<FunctionDecl>(D) && !isa<FunctionTemplateDecl>(D))
         continue;
 
+      if (!isVisible(D) && !(D = findAcceptableDecl(*this, D)))
+        continue;
+
       Result.insert(D);
     }
   }
@@ -4632,6 +4676,76 @@ static NamedDecl *getDefinitionToImport(NamedDecl *D) {
   return nullptr;
 }
 
+void Sema::diagnoseMissingImport(SourceLocation Loc, NamedDecl *Decl,
+                                 bool NeedDefinition, bool Recover) {
+  assert(!isVisible(Decl) && "missing import for non-hidden decl?");
+
+  // Suggest importing a module providing the definition of this entity, if
+  // possible.
+  NamedDecl *Def = getDefinitionToImport(Decl);
+  if (!Def)
+    Def = Decl;
+
+  // FIXME: Add a Fix-It that imports the corresponding module or includes
+  // the header.
+  Module *Owner = getOwningModule(Decl);
+  assert(Owner && "definition of hidden declaration is not in a module");
+
+  llvm::SmallVector<Module*, 8> OwningModules;
+  OwningModules.push_back(Owner);
+  auto Merged = Context.getModulesWithMergedDefinition(Decl);
+  OwningModules.insert(OwningModules.end(), Merged.begin(), Merged.end());
+
+  diagnoseMissingImport(Loc, Decl, Decl->getLocation(), OwningModules,
+                        NeedDefinition ? MissingImportKind::Definition
+                                       : MissingImportKind::Declaration,
+                        Recover);
+}
+
+void Sema::diagnoseMissingImport(SourceLocation UseLoc, NamedDecl *Decl,
+                                 SourceLocation DeclLoc,
+                                 ArrayRef<Module *> Modules,
+                                 MissingImportKind MIK, bool Recover) {
+  assert(!Modules.empty());
+
+  if (Modules.size() > 1) {
+    std::string ModuleList;
+    unsigned N = 0;
+    for (Module *M : Modules) {
+      ModuleList += "\n        ";
+      if (++N == 5 && N != Modules.size()) {
+        ModuleList += "[...]";
+        break;
+      }
+      ModuleList += M->getFullModuleName();
+    }
+
+    Diag(UseLoc, diag::err_module_unimported_use_multiple)
+      << (int)MIK << Decl << ModuleList;
+  } else {
+    Diag(UseLoc, diag::err_module_unimported_use)
+      << (int)MIK << Decl << Modules[0]->getFullModuleName();
+  }
+
+  unsigned DiagID;
+  switch (MIK) {
+  case MissingImportKind::Declaration:
+    DiagID = diag::note_previous_declaration;
+    break;
+  case MissingImportKind::Definition:
+    DiagID = diag::note_previous_definition;
+    break;
+  case MissingImportKind::DefaultArgument:
+    DiagID = diag::note_default_argument_declared_here;
+    break;
+  }
+  Diag(DeclLoc, DiagID);
+
+  // Try to recover by implicitly importing this module.
+  if (Recover)
+    createImplicitModuleImportForErrorRecovery(UseLoc, Modules[0]);
+}
+
 /// \brief Diagnose a successfully-corrected typo. Separated from the correction
 /// itself to allow external validation of the result, etc.
 ///
@@ -4658,23 +4772,8 @@ void Sema::diagnoseTypo(const TypoCorrection &Correction,
     NamedDecl *Decl = Correction.getCorrectionDecl();
     assert(Decl && "import required but no declaration to import");
 
-    // Suggest importing a module providing the definition of this entity, if
-    // possible.
-    NamedDecl *Def = getDefinitionToImport(Decl);
-    if (!Def)
-      Def = Decl;
-    Module *Owner = getOwningModule(Def);
-    assert(Owner && "definition of hidden declaration is not in a module");
-
-    Diag(Correction.getCorrectionRange().getBegin(),
-         diag::err_module_private_declaration)
-      << Def << Owner->getFullModuleName();
-    Diag(Def->getLocation(), diag::note_previous_declaration);
-
-    // Recover by implicitly importing this module.
-    if (ErrorRecovery)
-      createImplicitModuleImportForErrorRecovery(
-          Correction.getCorrectionRange().getBegin(), Owner);
+    diagnoseMissingImport(Correction.getCorrectionRange().getBegin(), Decl,
+                          /*NeedDefinition*/ false, ErrorRecovery);
     return;
   }
 
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaObjCProperty.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaObjCProperty.cpp
index 5e7b4b8..87fb5b6 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaObjCProperty.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaObjCProperty.cpp
@@ -103,15 +103,6 @@ static void checkARCPropertyDecl(Sema &S, ObjCPropertyDecl *property) {
     << propertyLifetime;
 }
 
-static unsigned deduceWeakPropertyFromType(Sema &S, QualType T) {
-  if ((S.getLangOpts().getGC() != LangOptions::NonGC && 
-       T.isObjCGCWeak()) ||
-      (S.getLangOpts().ObjCAutoRefCount &&
-       T.getObjCLifetime() == Qualifiers::OCL_Weak))
-    return ObjCDeclSpec::DQ_PR_weak;
-  return 0;
-}
-
 /// \brief Check this Objective-C property against a property declared in the
 /// given protocol.
 static void
@@ -146,10 +137,10 @@ Decl *Sema::ActOnProperty(Scope *S, SourceLocation AtLoc,
                           tok::ObjCKeywordKind MethodImplKind,
                           DeclContext *lexicalDC) {
   unsigned Attributes = ODS.getPropertyAttributes();
+  FD.D.setObjCWeakProperty((Attributes & ObjCDeclSpec::DQ_PR_weak) != 0);
   TypeSourceInfo *TSI = GetTypeForDeclarator(FD.D, S);
   QualType T = TSI->getType();
-  Attributes |= deduceWeakPropertyFromType(*this, T);
-  
+  Attributes |= deduceWeakPropertyFromType(T);
   bool isReadWrite = ((Attributes & ObjCDeclSpec::DQ_PR_readwrite) ||
                       // default is readwrite!
                       !(Attributes & ObjCDeclSpec::DQ_PR_readonly));
@@ -173,7 +164,7 @@ Decl *Sema::ActOnProperty(Scope *S, SourceLocation AtLoc,
                                            isAssign, isReadWrite,
                                            Attributes,
                                            ODS.getPropertyAttributes(),
-                                           isOverridingProperty, TSI,
+                                           isOverridingProperty, T, TSI,
                                            MethodImplKind);
       if (!Res)
         return nullptr;
@@ -184,7 +175,7 @@ Decl *Sema::ActOnProperty(Scope *S, SourceLocation AtLoc,
     Res = CreatePropertyDecl(S, ClassDecl, AtLoc, LParenLoc, FD,
                              GetterSel, SetterSel, isAssign, isReadWrite,
                              Attributes, ODS.getPropertyAttributes(),
-                             TSI, MethodImplKind);
+                             T, TSI, MethodImplKind);
     if (lexicalDC)
       Res->setLexicalDeclContext(lexicalDC);
   }
@@ -322,7 +313,8 @@ Sema::HandlePropertyInClassExtension(Scope *S,
                                      const unsigned Attributes,
                                      const unsigned AttributesAsWritten,
                                      bool *isOverridingProperty,
-                                     TypeSourceInfo *T,
+                                     QualType T,
+                                     TypeSourceInfo *TSI,
                                      tok::ObjCKeywordKind MethodImplKind) {
   ObjCCategoryDecl *CDecl = cast<ObjCCategoryDecl>(CurContext);
   // Diagnose if this property is already in continuation class.
@@ -348,7 +340,7 @@ Sema::HandlePropertyInClassExtension(Scope *S,
   // FIXME. We should really be using CreatePropertyDecl for this.
   ObjCPropertyDecl *PDecl =
     ObjCPropertyDecl::Create(Context, DC, FD.D.getIdentifierLoc(),
-                             PropertyId, AtLoc, LParenLoc, T);
+                             PropertyId, AtLoc, LParenLoc, T, TSI);
   PDecl->setPropertyAttributesAsWritten(
                           makePropertyAttributesAsWritten(AttributesAsWritten));
   if (Attributes & ObjCDeclSpec::DQ_PR_readonly)
@@ -359,6 +351,11 @@ Sema::HandlePropertyInClassExtension(Scope *S,
     PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_nonatomic);
   if (Attributes & ObjCDeclSpec::DQ_PR_atomic)
     PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_atomic);
+  if (Attributes & ObjCDeclSpec::DQ_PR_nullability)
+    PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_nullability);
+  if (Attributes & ObjCDeclSpec::DQ_PR_null_resettable)
+    PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_null_resettable);
+
   // Set setter/getter selector name. Needed later.
   PDecl->setGetterName(GetterSel);
   PDecl->setSetterName(SetterSel);
@@ -383,7 +380,8 @@ Sema::HandlePropertyInClassExtension(Scope *S,
     ObjCPropertyDecl *PrimaryPDecl =
       CreatePropertyDecl(S, CCPrimary, AtLoc, LParenLoc,
                          FD, GetterSel, SetterSel, isAssign, isReadWrite,
-                         Attributes,AttributesAsWritten, T, MethodImplKind, DC);
+                         Attributes,AttributesAsWritten, T, TSI, MethodImplKind, 
+                         DC);
 
     // A case of continuation class adding a new property in the class. This
     // is not what it was meant for. However, gcc supports it and so should we.
@@ -427,7 +425,7 @@ Sema::HandlePropertyInClassExtension(Scope *S,
   if (isReadWrite && (PIkind & ObjCPropertyDecl::OBJC_PR_readonly)) {
     PIkind &= ~ObjCPropertyDecl::OBJC_PR_readonly;
     PIkind |= ObjCPropertyDecl::OBJC_PR_readwrite;
-    PIkind |= deduceWeakPropertyFromType(*this, PIDecl->getType());
+    PIkind |= deduceWeakPropertyFromType(PIDecl->getType());
     unsigned ClassExtensionMemoryModel = getOwnershipRule(Attributes);
     unsigned PrimaryClassMemoryModel = getOwnershipRule(PIkind);
     if (PrimaryClassMemoryModel && ClassExtensionMemoryModel &&
@@ -531,11 +529,11 @@ ObjCPropertyDecl *Sema::CreatePropertyDecl(Scope *S,
                                            const bool isReadWrite,
                                            const unsigned Attributes,
                                            const unsigned AttributesAsWritten,
+                                           QualType T,
                                            TypeSourceInfo *TInfo,
                                            tok::ObjCKeywordKind MethodImplKind,
                                            DeclContext *lexicalDC){
   IdentifierInfo *PropertyId = FD.D.getIdentifier();
-  QualType T = TInfo->getType();
 
   // Issue a warning if property is 'assign' as default and its object, which is
   // gc'able conforms to NSCopying protocol
@@ -564,7 +562,8 @@ ObjCPropertyDecl *Sema::CreatePropertyDecl(Scope *S,
   DeclContext *DC = cast<DeclContext>(CDecl);
   ObjCPropertyDecl *PDecl = ObjCPropertyDecl::Create(Context, DC,
                                                      FD.D.getIdentifierLoc(),
-                                                     PropertyId, AtLoc, LParenLoc, TInfo);
+                                                     PropertyId, AtLoc, 
+                                                     LParenLoc, T, TInfo);
 
   if (ObjCPropertyDecl *prevDecl =
         ObjCPropertyDecl::findPropertyDecl(DC, PropertyId)) {
@@ -639,6 +638,12 @@ ObjCPropertyDecl *Sema::CreatePropertyDecl(Scope *S,
   else if (MethodImplKind == tok::objc_optional)
     PDecl->setPropertyImplementation(ObjCPropertyDecl::Optional);
 
+  if (Attributes & ObjCDeclSpec::DQ_PR_nullability)
+    PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_nullability);
+
+  if (Attributes & ObjCDeclSpec::DQ_PR_null_resettable)
+    PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_null_resettable);
+
   return PDecl;
 }
 
@@ -1753,6 +1758,33 @@ void Sema::DiagnoseUnimplementedProperties(Scope *S, ObjCImplDecl* IMPDecl,
   }
 }
 
+void Sema::diagnoseNullResettableSynthesizedSetters(ObjCImplDecl *impDecl) {
+  for (const auto *propertyImpl : impDecl->property_impls()) {
+    const auto *property = propertyImpl->getPropertyDecl();
+
+    // Warn about null_resettable properties with synthesized setters,
+    // because the setter won't properly handle nil.
+    if (propertyImpl->getPropertyImplementation()
+          == ObjCPropertyImplDecl::Synthesize &&
+        (property->getPropertyAttributes() &
+         ObjCPropertyDecl::OBJC_PR_null_resettable) &&
+        property->getGetterMethodDecl() &&
+        property->getSetterMethodDecl()) {
+      auto *getterMethod = property->getGetterMethodDecl();
+      auto *setterMethod = property->getSetterMethodDecl();
+      if (!impDecl->getInstanceMethod(setterMethod->getSelector()) &&
+          !impDecl->getInstanceMethod(getterMethod->getSelector())) {
+        SourceLocation loc = propertyImpl->getLocation();
+        if (loc.isInvalid())
+          loc = impDecl->getLocStart();
+
+        Diag(loc, diag::warn_null_resettable_setter)
+          << setterMethod->getSelector() << property->getDeclName();
+      }
+    }
+  }
+}
+
 void
 Sema::AtomicPropertySetterGetterRules (ObjCImplDecl* IMPDecl,
                                        ObjCContainerDecl* IDecl) {
@@ -1988,9 +2020,21 @@ void Sema::ProcessPropertyDecl(ObjCPropertyDecl *property,
       redeclaredProperty->getLocation() :
       property->getLocation();
 
+    // If the property is null_resettable, the getter returns nonnull.
+    QualType resultTy = property->getType();
+    if (property->getPropertyAttributes() &
+        ObjCPropertyDecl::OBJC_PR_null_resettable) {
+      QualType modifiedTy = resultTy;
+      if (auto nullability = AttributedType::stripOuterNullability(modifiedTy)){
+        if (*nullability == NullabilityKind::Unspecified)
+          resultTy = Context.getAttributedType(AttributedType::attr_nonnull,
+                                               modifiedTy, modifiedTy);
+      }
+    }
+
     GetterMethod = ObjCMethodDecl::Create(Context, Loc, Loc,
                              property->getGetterName(),
-                             property->getType(), nullptr, CD,
+                             resultTy, nullptr, CD,
                              /*isInstance=*/true, /*isVariadic=*/false,
                              /*isPropertyAccessor=*/true,
                              /*isImplicitlyDeclared=*/true, /*isDefined=*/false,
@@ -2051,12 +2095,25 @@ void Sema::ProcessPropertyDecl(ObjCPropertyDecl *property,
                                 ObjCMethodDecl::Optional :
                                 ObjCMethodDecl::Required);
 
+      // If the property is null_resettable, the setter accepts a
+      // nullable value.
+      QualType paramTy = property->getType().getUnqualifiedType();
+      if (property->getPropertyAttributes() &
+          ObjCPropertyDecl::OBJC_PR_null_resettable) {
+        QualType modifiedTy = paramTy;
+        if (auto nullability = AttributedType::stripOuterNullability(modifiedTy)){
+          if (*nullability == NullabilityKind::Unspecified)
+            paramTy = Context.getAttributedType(AttributedType::attr_nullable,
+                                                modifiedTy, modifiedTy);
+        }
+      }
+
       // Invent the arguments for the setter. We don't bother making a
       // nice name for the argument.
       ParmVarDecl *Argument = ParmVarDecl::Create(Context, SetterMethod,
                                                   Loc, Loc,
                                                   property->getIdentifier(),
-                                    property->getType().getUnqualifiedType(),
+                                                  paramTy,
                                                   /*TInfo=*/nullptr,
                                                   SC_None,
                                                   nullptr);
@@ -2228,6 +2285,22 @@ void Sema::CheckObjCPropertyAttributes(Decl *PDecl,
       Attributes &= ~ObjCDeclSpec::DQ_PR_weak;
   }
 
+  if (Attributes & ObjCDeclSpec::DQ_PR_weak) {
+    // 'weak' and 'nonnull' are mutually exclusive.
+    if (auto nullability = PropertyTy->getNullability(Context)) {
+      if (*nullability == NullabilityKind::NonNull)
+        Diag(Loc, diag::err_objc_property_attr_mutually_exclusive)
+          << "nonnull" << "weak";
+    } else {
+        PropertyTy =
+          Context.getAttributedType(
+            AttributedType::getNullabilityAttrKind(NullabilityKind::Nullable),
+            PropertyTy, PropertyTy);
+        TypeSourceInfo *TSInfo = PropertyDecl->getTypeSourceInfo();
+        PropertyDecl->setType(PropertyTy, TSInfo);
+    }
+  }
+
   if ((Attributes & ObjCDeclSpec::DQ_PR_atomic) &&
       (Attributes & ObjCDeclSpec::DQ_PR_nonatomic)) {
       Diag(Loc, diag::err_objc_property_attr_mutually_exclusive)
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaOpenMP.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaOpenMP.cpp
index cfe8db3..e609fcf 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaOpenMP.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaOpenMP.cpp
@@ -1270,6 +1270,14 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
                              Params);
     break;
   }
+  case OMPD_taskgroup: {
+    Sema::CapturedParamNameType Params[] = {
+        std::make_pair(StringRef(), QualType()) // __context with shared vars
+    };
+    ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
+                             Params);
+    break;
+  }
   case OMPD_threadprivate:
   case OMPD_taskyield:
   case OMPD_barrier:
@@ -1335,6 +1343,7 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | parallel         | taskyield       | *                                  |
   // | parallel         | barrier         | *                                  |
   // | parallel         | taskwait        | *                                  |
+  // | parallel         | taskgroup       | *                                  |
   // | parallel         | flush           | *                                  |
   // | parallel         | ordered         | +                                  |
   // | parallel         | atomic          | *                                  |
@@ -1357,6 +1366,7 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | for              | taskyield       | *                                  |
   // | for              | barrier         | +                                  |
   // | for              | taskwait        | *                                  |
+  // | for              | taskgroup       | *                                  |
   // | for              | flush           | *                                  |
   // | for              | ordered         | * (if construct is ordered)        |
   // | for              | atomic          | *                                  |
@@ -1379,6 +1389,7 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | master           | taskyield       | *                                  |
   // | master           | barrier         | +                                  |
   // | master           | taskwait        | *                                  |
+  // | master           | taskgroup       | *                                  |
   // | master           | flush           | *                                  |
   // | master           | ordered         | +                                  |
   // | master           | atomic          | *                                  |
@@ -1401,6 +1412,7 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | critical         | taskyield       | *                                  |
   // | critical         | barrier         | +                                  |
   // | critical         | taskwait        | *                                  |
+  // | critical         | taskgroup       | *                                  |
   // | critical         | ordered         | +                                  |
   // | critical         | atomic          | *                                  |
   // | critical         | target          | *                                  |
@@ -1422,6 +1434,7 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | simd             | taskyield       |                                    |
   // | simd             | barrier         |                                    |
   // | simd             | taskwait        |                                    |
+  // | simd             | taskgroup       |                                    |
   // | simd             | flush           |                                    |
   // | simd             | ordered         |                                    |
   // | simd             | atomic          |                                    |
@@ -1444,6 +1457,7 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | for simd         | taskyield       |                                    |
   // | for simd         | barrier         |                                    |
   // | for simd         | taskwait        |                                    |
+  // | for simd         | taskgroup       |                                    |
   // | for simd         | flush           |                                    |
   // | for simd         | ordered         |                                    |
   // | for simd         | atomic          |                                    |
@@ -1466,6 +1480,7 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | parallel for simd| taskyield       |                                    |
   // | parallel for simd| barrier         |                                    |
   // | parallel for simd| taskwait        |                                    |
+  // | parallel for simd| taskgroup       |                                    |
   // | parallel for simd| flush           |                                    |
   // | parallel for simd| ordered         |                                    |
   // | parallel for simd| atomic          |                                    |
@@ -1488,6 +1503,7 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | sections         | taskyield       | *                                  |
   // | sections         | barrier         | +                                  |
   // | sections         | taskwait        | *                                  |
+  // | sections         | taskgroup       | *                                  |
   // | sections         | flush           | *                                  |
   // | sections         | ordered         | +                                  |
   // | sections         | atomic          | *                                  |
@@ -1510,6 +1526,7 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | section          | taskyield       | *                                  |
   // | section          | barrier         | +                                  |
   // | section          | taskwait        | *                                  |
+  // | section          | taskgroup       | *                                  |
   // | section          | flush           | *                                  |
   // | section          | ordered         | +                                  |
   // | section          | atomic          | *                                  |
@@ -1532,6 +1549,7 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | single           | taskyield       | *                                  |
   // | single           | barrier         | +                                  |
   // | single           | taskwait        | *                                  |
+  // | single           | taskgroup       | *                                  |
   // | single           | flush           | *                                  |
   // | single           | ordered         | +                                  |
   // | single           | atomic          | *                                  |
@@ -1554,6 +1572,7 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | parallel for     | taskyield       | *                                  |
   // | parallel for     | barrier         | +                                  |
   // | parallel for     | taskwait        | *                                  |
+  // | parallel for     | taskgroup       | *                                  |
   // | parallel for     | flush           | *                                  |
   // | parallel for     | ordered         | * (if construct is ordered)        |
   // | parallel for     | atomic          | *                                  |
@@ -1576,6 +1595,7 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | parallel sections| taskyield       | *                                  |
   // | parallel sections| barrier         | +                                  |
   // | parallel sections| taskwait        | *                                  |
+  // | parallel sections| taskgroup       | *                                  |
   // | parallel sections| flush           | *                                  |
   // | parallel sections| ordered         | +                                  |
   // | parallel sections| atomic          | *                                  |
@@ -1598,6 +1618,7 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | task             | taskyield       | *                                  |
   // | task             | barrier         | +                                  |
   // | task             | taskwait        | *                                  |
+  // | task             | taskgroup       | *                                  |
   // | task             | flush           | *                                  |
   // | task             | ordered         | +                                  |
   // | task             | atomic          | *                                  |
@@ -1620,6 +1641,7 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | ordered          | taskyield       | *                                  |
   // | ordered          | barrier         | +                                  |
   // | ordered          | taskwait        | *                                  |
+  // | ordered          | taskgroup       | *                                  |
   // | ordered          | flush           | *                                  |
   // | ordered          | ordered         | +                                  |
   // | ordered          | atomic          | *                                  |
@@ -1642,6 +1664,7 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | atomic           | taskyield       |                                    |
   // | atomic           | barrier         |                                    |
   // | atomic           | taskwait        |                                    |
+  // | atomic           | taskgroup       |                                    |
   // | atomic           | flush           |                                    |
   // | atomic           | ordered         |                                    |
   // | atomic           | atomic          |                                    |
@@ -1664,6 +1687,7 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | target           | taskyield       | *                                  |
   // | target           | barrier         | *                                  |
   // | target           | taskwait        | *                                  |
+  // | target           | taskgroup       | *                                  |
   // | target           | flush           | *                                  |
   // | target           | ordered         | *                                  |
   // | target           | atomic          | *                                  |
@@ -1686,6 +1710,7 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | teams            | taskyield       | +                                  |
   // | teams            | barrier         | +                                  |
   // | teams            | taskwait        | +                                  |
+  // | teams            | taskgroup        | +                                  |
   // | teams            | flush           | +                                  |
   // | teams            | ordered         | +                                  |
   // | teams            | atomic          | +                                  |
@@ -1936,6 +1961,11 @@ StmtResult Sema::ActOnOpenMPExecutableDirective(OpenMPDirectiveKind Kind,
            "No associated statement allowed for 'omp taskwait' directive");
     Res = ActOnOpenMPTaskwaitDirective(StartLoc, EndLoc);
     break;
+  case OMPD_taskgroup:
+    assert(ClausesWithImplicit.empty() &&
+           "No clauses are allowed for 'omp taskgroup' directive");
+    Res = ActOnOpenMPTaskgroupDirective(AStmt, StartLoc, EndLoc);
+    break;
   case OMPD_flush:
     assert(AStmt == nullptr &&
            "No associated statement allowed for 'omp flush' directive");
@@ -2939,9 +2969,6 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *NestedLoopCountExpr,
           ? SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), UB.get())
           : SemaRef.BuildBinOp(CurScope, CondLoc, BO_LT, IV.get(),
                                NumIterations.get());
-  // Loop condition with 1 iteration separated (IV < LastIteration)
-  ExprResult SeparatedCond = SemaRef.BuildBinOp(CurScope, CondLoc, BO_LT,
-                                                IV.get(), LastIteration.get());
 
   // Loop increment (IV = IV + 1)
   SourceLocation IncLoc;
@@ -3071,7 +3098,6 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *NestedLoopCountExpr,
   Built.CalcLastIteration = CalcLastIteration.get();
   Built.PreCond = PreCond.get();
   Built.Cond = Cond.get();
-  Built.SeparatedCond = SeparatedCond.get();
   Built.Init = Init.get();
   Built.Inc = Inc.get();
   Built.LB = LB.get();
@@ -3161,6 +3187,16 @@ StmtResult Sema::ActOnOpenMPForSimdDirective(
   assert((CurContext->isDependentContext() || B.builtAll()) &&
          "omp for simd loop exprs were not built");
 
+  if (!CurContext->isDependentContext()) {
+    // Finalize the clauses that need pre-built expressions for CodeGen.
+    for (auto C : Clauses) {
+      if (auto LC = dyn_cast<OMPLinearClause>(C))
+        if (FinishOpenMPLinearClause(*LC, cast<DeclRefExpr>(B.IterationVarRef),
+                                     B.NumIterations, *this, CurScope))
+          return StmtError();
+    }
+  }
+
   getCurFunction()->setHasBranchProtectedScope();
   return OMPForSimdDirective::Create(Context, StartLoc, EndLoc, NestedLoopCount,
                                      Clauses, AStmt, B);
@@ -3310,6 +3346,16 @@ StmtResult Sema::ActOnOpenMPParallelForSimdDirective(
   if (NestedLoopCount == 0)
     return StmtError();
 
+  if (!CurContext->isDependentContext()) {
+    // Finalize the clauses that need pre-built expressions for CodeGen.
+    for (auto C : Clauses) {
+      if (auto LC = dyn_cast<OMPLinearClause>(C))
+        if (FinishOpenMPLinearClause(*LC, cast<DeclRefExpr>(B.IterationVarRef),
+                                     B.NumIterations, *this, CurScope))
+          return StmtError();
+    }
+  }
+
   getCurFunction()->setHasBranchProtectedScope();
   return OMPParallelForSimdDirective::Create(
       Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B);
@@ -3382,6 +3428,16 @@ StmtResult Sema::ActOnOpenMPTaskwaitDirective(SourceLocation StartLoc,
   return OMPTaskwaitDirective::Create(Context, StartLoc, EndLoc);
 }
 
+StmtResult Sema::ActOnOpenMPTaskgroupDirective(Stmt *AStmt,
+                                               SourceLocation StartLoc,
+                                               SourceLocation EndLoc) {
+  assert(AStmt && isa<CapturedStmt>(AStmt) && "Captured statement expected");
+
+  getCurFunction()->setHasBranchProtectedScope();
+
+  return OMPTaskgroupDirective::Create(Context, StartLoc, EndLoc, AStmt);
+}
+
 StmtResult Sema::ActOnOpenMPFlushDirective(ArrayRef<OMPClause *> Clauses,
                                            SourceLocation StartLoc,
                                            SourceLocation EndLoc) {
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaOverload.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaOverload.cpp
index 9d87a10..a0fdcd7 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaOverload.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaOverload.cpp
@@ -10507,7 +10507,8 @@ DiagnoseTwoPhaseLookup(Sema &SemaRef, SourceLocation FnLoc,
                        const CXXScopeSpec &SS, LookupResult &R,
                        OverloadCandidateSet::CandidateSetKind CSK,
                        TemplateArgumentListInfo *ExplicitTemplateArgs,
-                       ArrayRef<Expr *> Args) {
+                       ArrayRef<Expr *> Args,
+                       bool *DoDiagnoseEmptyLookup = nullptr) {
   if (SemaRef.ActiveTemplateInstantiations.empty() || !SS.isEmpty())
     return false;
 
@@ -10524,6 +10525,8 @@ DiagnoseTwoPhaseLookup(Sema &SemaRef, SourceLocation FnLoc,
         // Don't diagnose names we find in classes; we get much better
         // diagnostics for these from DiagnoseEmptyLookup.
         R.clear();
+        if (DoDiagnoseEmptyLookup)
+          *DoDiagnoseEmptyLookup = true;
         return false;
       }
 
@@ -10673,15 +10676,16 @@ BuildRecoveryCallExpr(Sema &SemaRef, Scope *S, Expr *Fn,
 
   LookupResult R(SemaRef, ULE->getName(), ULE->getNameLoc(),
                  Sema::LookupOrdinaryName);
+  bool DoDiagnoseEmptyLookup = EmptyLookup;
   if (!DiagnoseTwoPhaseLookup(SemaRef, Fn->getExprLoc(), SS, R,
                               OverloadCandidateSet::CSK_Normal,
-                              ExplicitTemplateArgs, Args) &&
-      (!EmptyLookup ||
-       SemaRef.DiagnoseEmptyLookup(
-           S, SS, R,
-           MakeValidator(SemaRef, dyn_cast<MemberExpr>(Fn), Args.size(),
-                         ExplicitTemplateArgs != nullptr, AllowTypoCorrection),
-           ExplicitTemplateArgs, Args)))
+                              ExplicitTemplateArgs, Args,
+                              &DoDiagnoseEmptyLookup) &&
+    (!DoDiagnoseEmptyLookup || SemaRef.DiagnoseEmptyLookup(
+        S, SS, R,
+        MakeValidator(SemaRef, dyn_cast<MemberExpr>(Fn), Args.size(),
+                      ExplicitTemplateArgs != nullptr, AllowTypoCorrection),
+        ExplicitTemplateArgs, Args)))
     return ExprError();
 
   assert(!R.empty() && "lookup results empty despite recovery");
@@ -10746,26 +10750,29 @@ bool Sema::buildOverloadedCallSet(Scope *S, Expr *Fn,
   // functions, including those from argument-dependent lookup.
   AddOverloadedCallCandidates(ULE, Args, *CandidateSet);
 
-  // If we found nothing, try to recover.
-  // BuildRecoveryCallExpr diagnoses the error itself, so we just bail
-  // out if it fails.
-  if (CandidateSet->empty()) {
-    // In Microsoft mode, if we are inside a template class member function then
-    // create a type dependent CallExpr. The goal is to postpone name lookup
-    // to instantiation time to be able to search into type dependent base
-    // classes.
-    if (getLangOpts().MSVCCompat && CurContext->isDependentContext() &&
-        (isa<FunctionDecl>(CurContext) || isa<CXXRecordDecl>(CurContext))) {
-      CallExpr *CE = new (Context) CallExpr(Context, Fn, Args,
-                                            Context.DependentTy, VK_RValue,
-                                            RParenLoc);
+  if (getLangOpts().MSVCCompat &&
+      CurContext->isDependentContext() && !isSFINAEContext() &&
+      (isa<FunctionDecl>(CurContext) || isa<CXXRecordDecl>(CurContext))) {
+
+    OverloadCandidateSet::iterator Best;
+    if (CandidateSet->empty() ||
+        CandidateSet->BestViableFunction(*this, Fn->getLocStart(), Best) ==
+            OR_No_Viable_Function) {
+      // In Microsoft mode, if we are inside a template class member function then
+      // create a type dependent CallExpr. The goal is to postpone name lookup
+      // to instantiation time to be able to search into type dependent base
+      // classes.
+      CallExpr *CE = new (Context) CallExpr(
+          Context, Fn, Args, Context.DependentTy, VK_RValue, RParenLoc);
       CE->setTypeDependent(true);
       *Result = CE;
       return true;
     }
-    return false;
   }
 
+  if (CandidateSet->empty())
+    return false;
+
   UnbridgedCasts.restore();
   return false;
 }
@@ -11253,7 +11260,7 @@ Sema::CreateOverloadedBinOp(SourceLocation OpLoc,
         if (Op == OO_Equal)
           DiagnoseSelfMove(Args[0], Args[1], OpLoc);
 
-        checkCall(FnDecl, ArgsArray, 0, isa<CXXMethodDecl>(FnDecl), OpLoc,
+        checkCall(FnDecl, nullptr, ArgsArray, isa<CXXMethodDecl>(FnDecl), OpLoc, 
                   TheCall->getSourceRange(), VariadicDoesNotApply);
 
         return MaybeBindToTemporary(TheCall);
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaStmt.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaStmt.cpp
index 5c72529..50e4345 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaStmt.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaStmt.cpp
@@ -3432,7 +3432,7 @@ class CatchHandlerType {
 
 public:
   /// Used when creating a CatchHandlerType from a handler type; will determine
-  /// whether the type is a pointer or reference and will strip off the the top
+  /// whether the type is a pointer or reference and will strip off the top
   /// level pointer and cv-qualifiers.
   CatchHandlerType(QualType Q) : QT(Q), IsPointer(false) {
     if (QT->isPointerType())
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaStmtAttr.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaStmtAttr.cpp
index 19e2c8e..5b71c11 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaStmtAttr.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaStmtAttr.cpp
@@ -105,6 +105,8 @@ static Attr *handleLoopHintAttr(Sema &S, Stmt *St, const AttributeList &A,
     if (StateLoc && StateLoc->Ident) {
       if (StateLoc->Ident->isStr("disable"))
         State = LoopHintAttr::Disable;
+      else if (StateLoc->Ident->isStr("assume_safety"))
+        State = LoopHintAttr::AssumeSafety;
       else
         State = LoopHintAttr::Enable;
     }
@@ -159,7 +161,7 @@ CheckForIncompatibleAttributes(Sema &S,
     const LoopHintAttr *PrevAttr;
     if (Option == LoopHintAttr::Vectorize ||
         Option == LoopHintAttr::Interleave || Option == LoopHintAttr::Unroll) {
-      // Enable|disable hint.  For example, vectorize(enable).
+      // Enable|Disable|AssumeSafety hint.  For example, vectorize(enable).
       PrevAttr = CategoryState.StateAttr;
       CategoryState.StateAttr = LH;
     } else {
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaTemplate.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaTemplate.cpp
index 19c0f2a..f4740a5 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaTemplate.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaTemplate.cpp
@@ -602,7 +602,7 @@ Decl *Sema::ActOnTypeParameter(Scope *S, bool Typename,
       return Param;
     }
 
-    Param->setDefaultArgument(DefaultTInfo, false);
+    Param->setDefaultArgument(DefaultTInfo);
   }
 
   return Param;
@@ -723,7 +723,7 @@ Decl *Sema::ActOnNonTypeTemplateParameter(Scope *S, Declarator &D,
     }
     Default = DefaultRes.get();
 
-    Param->setDefaultArgument(Default, false);
+    Param->setDefaultArgument(Default);
   }
 
   return Param;
@@ -799,7 +799,7 @@ Decl *Sema::ActOnTemplateTemplateParameter(Scope* S,
                                         UPPC_DefaultArgument))
       return Param;
 
-    Param->setDefaultArgument(DefaultArg, false);
+    Param->setDefaultArgument(Context, DefaultArg);
   }
 
   return Param;
@@ -1310,15 +1310,11 @@ bool Sema::CheckTemplateParameterList(TemplateParameterList *NewParams,
       // Merge default arguments for template type parameters.
       TemplateTypeParmDecl *OldTypeParm
           = OldParams? cast<TemplateTypeParmDecl>(*OldParam) : nullptr;
-      // FIXME: There might be a visible declaration of this template parameter.
-      if (OldTypeParm && !LookupResult::isVisible(*this, OldTypeParm))
-        OldTypeParm = nullptr;
-
       if (NewTypeParm->isParameterPack()) {
         assert(!NewTypeParm->hasDefaultArgument() &&
                "Parameter packs can't have a default argument!");
         SawParameterPack = true;
-      } else if (OldTypeParm && OldTypeParm->hasDefaultArgument() &&
+      } else if (OldTypeParm && hasVisibleDefaultArgument(OldTypeParm) &&
                  NewTypeParm->hasDefaultArgument()) {
         OldDefaultLoc = OldTypeParm->getDefaultArgumentLoc();
         NewDefaultLoc = NewTypeParm->getDefaultArgumentLoc();
@@ -1328,8 +1324,7 @@ bool Sema::CheckTemplateParameterList(TemplateParameterList *NewParams,
       } else if (OldTypeParm && OldTypeParm->hasDefaultArgument()) {
         // Merge the default argument from the old declaration to the
         // new declaration.
-        NewTypeParm->setDefaultArgument(OldTypeParm->getDefaultArgumentInfo(),
-                                        true);
+        NewTypeParm->setInheritedDefaultArgument(Context, OldTypeParm);
         PreviousDefaultArgLoc = OldTypeParm->getDefaultArgumentLoc();
       } else if (NewTypeParm->hasDefaultArgument()) {
         SawDefaultArgument = true;
@@ -1358,14 +1353,12 @@ bool Sema::CheckTemplateParameterList(TemplateParameterList *NewParams,
       // Merge default arguments for non-type template parameters
       NonTypeTemplateParmDecl *OldNonTypeParm
         = OldParams? cast<NonTypeTemplateParmDecl>(*OldParam) : nullptr;
-      if (OldNonTypeParm && !LookupResult::isVisible(*this, OldNonTypeParm))
-        OldNonTypeParm = nullptr;
       if (NewNonTypeParm->isParameterPack()) {
         assert(!NewNonTypeParm->hasDefaultArgument() &&
                "Parameter packs can't have a default argument!");
         if (!NewNonTypeParm->isPackExpansion())
           SawParameterPack = true;
-      } else if (OldNonTypeParm && OldNonTypeParm->hasDefaultArgument() &&
+      } else if (OldNonTypeParm && hasVisibleDefaultArgument(OldNonTypeParm) &&
                  NewNonTypeParm->hasDefaultArgument()) {
         OldDefaultLoc = OldNonTypeParm->getDefaultArgumentLoc();
         NewDefaultLoc = NewNonTypeParm->getDefaultArgumentLoc();
@@ -1375,12 +1368,7 @@ bool Sema::CheckTemplateParameterList(TemplateParameterList *NewParams,
       } else if (OldNonTypeParm && OldNonTypeParm->hasDefaultArgument()) {
         // Merge the default argument from the old declaration to the
         // new declaration.
-        // FIXME: We need to create a new kind of "default argument"
-        // expression that points to a previous non-type template
-        // parameter.
-        NewNonTypeParm->setDefaultArgument(
-                                         OldNonTypeParm->getDefaultArgument(),
-                                         /*Inherited=*/ true);
+        NewNonTypeParm->setInheritedDefaultArgument(Context, OldNonTypeParm);
         PreviousDefaultArgLoc = OldNonTypeParm->getDefaultArgumentLoc();
       } else if (NewNonTypeParm->hasDefaultArgument()) {
         SawDefaultArgument = true;
@@ -1407,15 +1395,14 @@ bool Sema::CheckTemplateParameterList(TemplateParameterList *NewParams,
       // Merge default arguments for template template parameters
       TemplateTemplateParmDecl *OldTemplateParm
         = OldParams? cast<TemplateTemplateParmDecl>(*OldParam) : nullptr;
-      if (OldTemplateParm && !LookupResult::isVisible(*this, OldTemplateParm))
-        OldTemplateParm = nullptr;
       if (NewTemplateParm->isParameterPack()) {
         assert(!NewTemplateParm->hasDefaultArgument() &&
                "Parameter packs can't have a default argument!");
         if (!NewTemplateParm->isPackExpansion())
           SawParameterPack = true;
-      } else if (OldTemplateParm && OldTemplateParm->hasDefaultArgument() &&
-          NewTemplateParm->hasDefaultArgument()) {
+      } else if (OldTemplateParm &&
+                 hasVisibleDefaultArgument(OldTemplateParm) &&
+                 NewTemplateParm->hasDefaultArgument()) {
         OldDefaultLoc = OldTemplateParm->getDefaultArgument().getLocation();
         NewDefaultLoc = NewTemplateParm->getDefaultArgument().getLocation();
         SawDefaultArgument = true;
@@ -1424,11 +1411,7 @@ bool Sema::CheckTemplateParameterList(TemplateParameterList *NewParams,
       } else if (OldTemplateParm && OldTemplateParm->hasDefaultArgument()) {
         // Merge the default argument from the old declaration to the
         // new declaration.
-        // FIXME: We need to create a new kind of "default argument" expression
-        // that points to a previous template template parameter.
-        NewTemplateParm->setDefaultArgument(
-                                          OldTemplateParm->getDefaultArgument(),
-                                          /*Inherited=*/ true);
+        NewTemplateParm->setInheritedDefaultArgument(Context, OldTemplateParm);
         PreviousDefaultArgLoc
           = OldTemplateParm->getDefaultArgument().getLocation();
       } else if (NewTemplateParm->hasDefaultArgument()) {
@@ -3316,7 +3299,7 @@ Sema::SubstDefaultTemplateArgumentIfAvailable(TemplateDecl *Template,
   HasDefaultArg = false;
 
   if (TemplateTypeParmDecl *TypeParm = dyn_cast<TemplateTypeParmDecl>(Param)) {
-    if (!TypeParm->hasDefaultArgument())
+    if (!hasVisibleDefaultArgument(TypeParm))
       return TemplateArgumentLoc();
 
     HasDefaultArg = true;
@@ -3333,7 +3316,7 @@ Sema::SubstDefaultTemplateArgumentIfAvailable(TemplateDecl *Template,
 
   if (NonTypeTemplateParmDecl *NonTypeParm
         = dyn_cast<NonTypeTemplateParmDecl>(Param)) {
-    if (!NonTypeParm->hasDefaultArgument())
+    if (!hasVisibleDefaultArgument(NonTypeParm))
       return TemplateArgumentLoc();
 
     HasDefaultArg = true;
@@ -3351,7 +3334,7 @@ Sema::SubstDefaultTemplateArgumentIfAvailable(TemplateDecl *Template,
 
   TemplateTemplateParmDecl *TempTempParm
     = cast<TemplateTemplateParmDecl>(Param);
-  if (!TempTempParm->hasDefaultArgument())
+  if (!hasVisibleDefaultArgument(TempTempParm))
     return TemplateArgumentLoc();
 
   HasDefaultArg = true;
@@ -3661,6 +3644,35 @@ static Optional<unsigned> getExpandedPackSize(NamedDecl *Param) {
   return None;
 }
 
+/// Diagnose a missing template argument.
+template<typename TemplateParmDecl>
+static bool diagnoseMissingArgument(Sema &S, SourceLocation Loc,
+                                    TemplateDecl *TD,
+                                    const TemplateParmDecl *D,
+                                    TemplateArgumentListInfo &Args) {
+  // Dig out the most recent declaration of the template parameter; there may be
+  // declarations of the template that are more recent than TD.
+  D = cast<TemplateParmDecl>(cast<TemplateDecl>(TD->getMostRecentDecl())
+                                 ->getTemplateParameters()
+                                 ->getParam(D->getIndex()));
+
+  // If there's a default argument that's not visible, diagnose that we're
+  // missing a module import.
+  llvm::SmallVector<Module*, 8> Modules;
+  if (D->hasDefaultArgument() && !S.hasVisibleDefaultArgument(D, &Modules)) {
+    S.diagnoseMissingImport(Loc, cast<NamedDecl>(TD),
+                            D->getDefaultArgumentLoc(), Modules,
+                            Sema::MissingImportKind::DefaultArgument,
+                            /*Recover*/ true);
+    return true;
+  }
+
+  // FIXME: If there's a more recent default argument that *is* visible,
+  // diagnose that it was declared too late.
+
+  return diagnoseArityMismatch(S, TD, Loc, Args);
+}
+
 /// \brief Check that the given template argument list is well-formed
 /// for specializing the given template.
 bool Sema::CheckTemplateArgumentList(TemplateDecl *Template,
@@ -3816,8 +3828,9 @@ bool Sema::CheckTemplateArgumentList(TemplateDecl *Template,
     // (when the template parameter was part of a nested template) into
     // the default argument.
     if (TemplateTypeParmDecl *TTP = dyn_cast<TemplateTypeParmDecl>(*Param)) {
-      if (!TTP->hasDefaultArgument())
-        return diagnoseArityMismatch(*this, Template, TemplateLoc, NewArgs);
+      if (!hasVisibleDefaultArgument(TTP))
+        return diagnoseMissingArgument(*this, TemplateLoc, Template, TTP,
+                                       NewArgs);
 
       TypeSourceInfo *ArgType = SubstDefaultTemplateArgument(*this,
                                                              Template,
@@ -3832,8 +3845,9 @@ bool Sema::CheckTemplateArgumentList(TemplateDecl *Template,
                                 ArgType);
     } else if (NonTypeTemplateParmDecl *NTTP
                  = dyn_cast<NonTypeTemplateParmDecl>(*Param)) {
-      if (!NTTP->hasDefaultArgument())
-        return diagnoseArityMismatch(*this, Template, TemplateLoc, NewArgs);
+      if (!hasVisibleDefaultArgument(NTTP))
+        return diagnoseMissingArgument(*this, TemplateLoc, Template, NTTP,
+                                       NewArgs);
 
       ExprResult E = SubstDefaultTemplateArgument(*this, Template,
                                                               TemplateLoc,
@@ -3849,8 +3863,9 @@ bool Sema::CheckTemplateArgumentList(TemplateDecl *Template,
       TemplateTemplateParmDecl *TempParm
         = cast<TemplateTemplateParmDecl>(*Param);
 
-      if (!TempParm->hasDefaultArgument())
-        return diagnoseArityMismatch(*this, Template, TemplateLoc, NewArgs);
+      if (!hasVisibleDefaultArgument(TempParm))
+        return diagnoseMissingArgument(*this, TemplateLoc, Template, TempParm,
+                                       NewArgs);
 
       NestedNameSpecifierLoc QualifierLoc;
       TemplateName Name = SubstDefaultTemplateArgument(*this, Template,
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index 5c994f8..f35d1aa 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -1922,12 +1922,12 @@ Decl *TemplateDeclInstantiator::VisitTemplateTypeParmDecl(
                                  D->isParameterPack());
   Inst->setAccess(AS_public);
 
-  if (D->hasDefaultArgument()) {
+  if (D->hasDefaultArgument() && !D->defaultArgumentWasInherited()) {
     TypeSourceInfo *InstantiatedDefaultArg =
         SemaRef.SubstType(D->getDefaultArgumentInfo(), TemplateArgs,
                           D->getDefaultArgumentLoc(), D->getDeclName());
     if (InstantiatedDefaultArg)
-      Inst->setDefaultArgument(InstantiatedDefaultArg, false);
+      Inst->setDefaultArgument(InstantiatedDefaultArg);
   }
 
   // Introduce this template parameter's instantiation into the instantiation
@@ -2078,10 +2078,10 @@ Decl *TemplateDeclInstantiator::VisitNonTypeTemplateParmDecl(
   if (Invalid)
     Param->setInvalidDecl();
 
-  if (D->hasDefaultArgument()) {
+  if (D->hasDefaultArgument() && !D->defaultArgumentWasInherited()) {
     ExprResult Value = SemaRef.SubstExpr(D->getDefaultArgument(), TemplateArgs);
     if (!Value.isInvalid())
-      Param->setDefaultArgument(Value.get(), false);
+      Param->setDefaultArgument(Value.get());
   }
 
   // Introduce this template parameter's instantiation into the instantiation
@@ -2205,7 +2205,7 @@ TemplateDeclInstantiator::VisitTemplateTemplateParmDecl(
                                              D->getPosition(),
                                              D->isParameterPack(),
                                              D->getIdentifier(), InstParams);
-  if (D->hasDefaultArgument()) {
+  if (D->hasDefaultArgument() && !D->defaultArgumentWasInherited()) {
     NestedNameSpecifierLoc QualifierLoc =
         D->getDefaultArgument().getTemplateQualifierLoc();
     QualifierLoc =
@@ -2215,10 +2215,10 @@ TemplateDeclInstantiator::VisitTemplateTemplateParmDecl(
         D->getDefaultArgument().getTemplateNameLoc(), TemplateArgs);
     if (!TName.isNull())
       Param->setDefaultArgument(
+          SemaRef.Context,
           TemplateArgumentLoc(TemplateArgument(TName),
                               D->getDefaultArgument().getTemplateQualifierLoc(),
-                              D->getDefaultArgument().getTemplateNameLoc()),
-          false);
+                              D->getDefaultArgument().getTemplateNameLoc()));
   }
   Param->setAccess(AS_public);
 
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaType.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaType.cpp
index 628eb73..d72f259 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaType.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaType.cpp
@@ -22,8 +22,10 @@
 #include "clang/AST/Expr.h"
 #include "clang/AST/TypeLoc.h"
 #include "clang/AST/TypeLocVisitor.h"
+#include "clang/Lex/Preprocessor.h"
 #include "clang/Basic/PartialDiagnostic.h"
 #include "clang/Basic/TargetInfo.h"
+#include "clang/Lex/Preprocessor.h"
 #include "clang/Parse/ParseDiagnostic.h"
 #include "clang/Sema/DeclSpec.h"
 #include "clang/Sema/DelayedDiagnostic.h"
@@ -121,6 +123,12 @@ static void diagnoseBadTypeAttribute(Sema &S, const AttributeList &attr,
     case AttributeList::AT_SPtr: \
     case AttributeList::AT_UPtr
 
+// Nullability qualifiers.
+#define NULLABILITY_TYPE_ATTRS_CASELIST         \
+    case AttributeList::AT_TypeNonNull:         \
+    case AttributeList::AT_TypeNullable:        \
+    case AttributeList::AT_TypeNullUnspecified
+
 namespace {
   /// An object which stores processing state for the entire
   /// GetTypeForDeclarator process.
@@ -307,8 +315,12 @@ static bool handleObjCPointerTypeAttr(TypeProcessingState &state,
 ///
 /// \param i - a notional index which the search will start
 ///   immediately inside
+///
+/// \param onlyBlockPointers Whether we should only look into block
+/// pointer types (vs. all pointer types).
 static DeclaratorChunk *maybeMovePastReturnType(Declarator &declarator,
-                                                unsigned i) {
+                                                unsigned i,
+                                                bool onlyBlockPointers) {
   assert(i <= declarator.getNumTypeObjects());
 
   DeclaratorChunk *result = nullptr;
@@ -329,20 +341,26 @@ static DeclaratorChunk *maybeMovePastReturnType(Declarator &declarator,
       return result;
 
     // If we do find a function declarator, scan inwards from that,
-    // looking for a block-pointer declarator.
+    // looking for a (block-)pointer declarator.
     case DeclaratorChunk::Function:
       for (--i; i != 0; --i) {
-        DeclaratorChunk &blockChunk = declarator.getTypeObject(i-1);
-        switch (blockChunk.Kind) {
+        DeclaratorChunk &ptrChunk = declarator.getTypeObject(i-1);
+        switch (ptrChunk.Kind) {
         case DeclaratorChunk::Paren:
-        case DeclaratorChunk::Pointer:
         case DeclaratorChunk::Array:
         case DeclaratorChunk::Function:
         case DeclaratorChunk::Reference:
-        case DeclaratorChunk::MemberPointer:
           continue;
+
+        case DeclaratorChunk::MemberPointer:
+        case DeclaratorChunk::Pointer:
+          if (onlyBlockPointers)
+            continue;
+
+          // fallthrough
+
         case DeclaratorChunk::BlockPointer:
-          result = &blockChunk;
+          result = &ptrChunk;
           goto continue_outer;
         }
         llvm_unreachable("bad declarator chunk kind");
@@ -382,7 +400,8 @@ static void distributeObjCPointerTypeAttr(TypeProcessingState &state,
       DeclaratorChunk *destChunk = nullptr;
       if (state.isProcessingDeclSpec() &&
           attr.getKind() == AttributeList::AT_ObjCOwnership)
-        destChunk = maybeMovePastReturnType(declarator, i - 1);
+        destChunk = maybeMovePastReturnType(declarator, i - 1,
+                                            /*onlyBlockPointers=*/true);
       if (!destChunk) destChunk = &chunk;
 
       moveAttrFromListToList(attr, state.getCurrentAttrListRef(),
@@ -398,7 +417,9 @@ static void distributeObjCPointerTypeAttr(TypeProcessingState &state,
     case DeclaratorChunk::Function:
       if (state.isProcessingDeclSpec() &&
           attr.getKind() == AttributeList::AT_ObjCOwnership) {
-        if (DeclaratorChunk *dest = maybeMovePastReturnType(declarator, i)) {
+        if (DeclaratorChunk *dest = maybeMovePastReturnType(
+                                      declarator, i,
+                                      /*onlyBlockPointers=*/true)) {
           moveAttrFromListToList(attr, state.getCurrentAttrListRef(),
                                  dest->getAttrListRef());
           return;
@@ -620,6 +641,10 @@ static void distributeTypeAttrsFromDeclarator(TypeProcessingState &state,
       // Microsoft type attributes cannot go after the declarator-id.
       continue;
 
+    NULLABILITY_TYPE_ATTRS_CASELIST:
+      // Nullability specifiers cannot go after the declarator-id.
+      continue;
+
     default:
       break;
     }
@@ -2529,6 +2554,285 @@ getCCForDeclaratorChunk(Sema &S, Declarator &D,
   return CC;
 }
 
+namespace {
+  /// A simple notion of pointer kinds, which matches up with the various
+  /// pointer declarators.
+  enum class SimplePointerKind {
+    Pointer,
+    BlockPointer,
+    MemberPointer,
+  };
+}
+
+IdentifierInfo *Sema::getNullabilityKeyword(NullabilityKind nullability) {
+  switch (nullability) {
+  case NullabilityKind::NonNull:
+    if (!Ident___nonnull)
+      Ident___nonnull = PP.getIdentifierInfo("__nonnull");
+    return Ident___nonnull;
+
+  case NullabilityKind::Nullable:
+    if (!Ident___nullable)
+      Ident___nullable = PP.getIdentifierInfo("__nullable");
+    return Ident___nullable;
+
+  case NullabilityKind::Unspecified:
+    if (!Ident___null_unspecified)
+      Ident___null_unspecified = PP.getIdentifierInfo("__null_unspecified");
+    return Ident___null_unspecified;
+  }
+  llvm_unreachable("Unknown nullability kind.");
+}
+
+/// Retrieve the identifier "NSError".
+IdentifierInfo *Sema::getNSErrorIdent() {
+  if (!Ident_NSError)
+    Ident_NSError = PP.getIdentifierInfo("NSError");
+
+  return Ident_NSError;
+}
+
+/// Check whether there is a nullability attribute of any kind in the given
+/// attribute list.
+static bool hasNullabilityAttr(const AttributeList *attrs) {
+  for (const AttributeList *attr = attrs; attr;
+       attr = attr->getNext()) {
+    if (attr->getKind() == AttributeList::AT_TypeNonNull ||
+        attr->getKind() == AttributeList::AT_TypeNullable ||
+        attr->getKind() == AttributeList::AT_TypeNullUnspecified)
+      return true;
+  }
+
+  return false;
+}
+
+namespace {
+  /// Describes the kind of a pointer a declarator describes.
+  enum class PointerDeclaratorKind {
+    // Not a pointer.
+    NonPointer,
+    // Single-level pointer.
+    SingleLevelPointer,
+    // Multi-level pointer (of any pointer kind).
+    MultiLevelPointer,
+    // CFFooRef*
+    MaybePointerToCFRef,
+    // CFErrorRef*
+    CFErrorRefPointer,
+    // NSError**
+    NSErrorPointerPointer,
+  };
+}
+
+/// Classify the given declarator, whose type-specified is \c type, based on
+/// what kind of pointer it refers to.
+///
+/// This is used to determine the default nullability.
+static PointerDeclaratorKind classifyPointerDeclarator(Sema &S,
+                                                       QualType type,
+                                                       Declarator &declarator) {
+  unsigned numNormalPointers = 0;
+
+  // For any dependent type, we consider it a non-pointer.
+  if (type->isDependentType())
+    return PointerDeclaratorKind::NonPointer;
+
+  // Look through the declarator chunks to identify pointers.
+  for (unsigned i = 0, n = declarator.getNumTypeObjects(); i != n; ++i) {
+    DeclaratorChunk &chunk = declarator.getTypeObject(i);
+    switch (chunk.Kind) {
+    case DeclaratorChunk::Array:
+    case DeclaratorChunk::Function:
+      break;
+
+    case DeclaratorChunk::BlockPointer:
+    case DeclaratorChunk::MemberPointer:
+      return numNormalPointers > 0 ? PointerDeclaratorKind::MultiLevelPointer
+                                   : PointerDeclaratorKind::SingleLevelPointer;
+
+    case DeclaratorChunk::Paren:
+    case DeclaratorChunk::Reference:
+      continue;
+
+    case DeclaratorChunk::Pointer:
+      ++numNormalPointers;
+      if (numNormalPointers > 2)
+        return PointerDeclaratorKind::MultiLevelPointer;
+      continue;
+    }
+  }
+
+  // Then, dig into the type specifier itself.
+  unsigned numTypeSpecifierPointers = 0;
+  do {
+    // Decompose normal pointers.
+    if (auto ptrType = type->getAs<PointerType>()) {
+      ++numNormalPointers;
+
+      if (numNormalPointers > 2)
+        return PointerDeclaratorKind::MultiLevelPointer;
+
+      type = ptrType->getPointeeType();
+      ++numTypeSpecifierPointers;
+      continue;
+    }
+
+    // Decompose block pointers.
+    if (type->getAs<BlockPointerType>()) {
+      return numNormalPointers > 0 ? PointerDeclaratorKind::MultiLevelPointer
+                                   : PointerDeclaratorKind::SingleLevelPointer;
+    }
+
+    // Decompose member pointers.
+    if (type->getAs<MemberPointerType>()) {
+      return numNormalPointers > 0 ? PointerDeclaratorKind::MultiLevelPointer
+                                   : PointerDeclaratorKind::SingleLevelPointer;
+    }
+
+    // Look at Objective-C object pointers.
+    if (auto objcObjectPtr = type->getAs<ObjCObjectPointerType>()) {
+      ++numNormalPointers;
+      ++numTypeSpecifierPointers;
+
+      // If this is NSError**, report that.
+      if (auto objcClassDecl = objcObjectPtr->getInterfaceDecl()) {
+        if (objcClassDecl->getIdentifier() == S.getNSErrorIdent() &&
+            numNormalPointers == 2 && numTypeSpecifierPointers < 2) {
+          return PointerDeclaratorKind::NSErrorPointerPointer;
+        }
+      }
+
+      break;
+    }
+
+    // Look at Objective-C class types.
+    if (auto objcClass = type->getAs<ObjCInterfaceType>()) {
+      if (objcClass->getInterface()->getIdentifier() == S.getNSErrorIdent()) {
+        if (numNormalPointers == 2 && numTypeSpecifierPointers < 2)
+          return PointerDeclaratorKind::NSErrorPointerPointer;;
+      }
+
+      break;
+    }
+
+    // If at this point we haven't seen a pointer, we won't see one.
+    if (numNormalPointers == 0)
+      return PointerDeclaratorKind::NonPointer;
+
+    if (auto recordType = type->getAs<RecordType>()) {
+      RecordDecl *recordDecl = recordType->getDecl();
+
+      bool isCFError = false;
+      if (S.CFError) {
+        // If we already know about CFError, test it directly.
+        isCFError = (S.CFError == recordDecl);
+      } else {
+        // Check whether this is CFError, which we identify based on its bridge
+        // to NSError.
+        if (recordDecl->getTagKind() == TTK_Struct && numNormalPointers > 0) {
+          if (auto bridgeAttr = recordDecl->getAttr<ObjCBridgeAttr>()) {
+            if (bridgeAttr->getBridgedType() == S.getNSErrorIdent()) {
+              S.CFError = recordDecl;
+              isCFError = true;
+            }
+          }
+        }
+      }
+
+      // If this is CFErrorRef*, report it as such.
+      if (isCFError && numNormalPointers == 2 && numTypeSpecifierPointers < 2) {
+        return PointerDeclaratorKind::CFErrorRefPointer;
+      }
+      break;
+    }
+
+    break;
+  } while (true);
+
+
+  switch (numNormalPointers) {
+  case 0:
+    return PointerDeclaratorKind::NonPointer;
+
+  case 1:
+    return PointerDeclaratorKind::SingleLevelPointer;
+
+  case 2:
+    return PointerDeclaratorKind::MaybePointerToCFRef;
+
+  default:
+    return PointerDeclaratorKind::MultiLevelPointer;
+  }
+}
+
+static FileID getNullabilityCompletenessCheckFileID(Sema &S,
+                                                    SourceLocation loc) {
+  // If we're anywhere in a function, method, or closure context, don't perform
+  // completeness checks.
+  for (DeclContext *ctx = S.CurContext; ctx; ctx = ctx->getParent()) {
+    if (ctx->isFunctionOrMethod())
+      return FileID();
+
+    if (ctx->isFileContext())
+      break;
+  }
+
+  // We only care about the expansion location.
+  loc = S.SourceMgr.getExpansionLoc(loc);
+  FileID file = S.SourceMgr.getFileID(loc);
+  if (file.isInvalid())
+    return FileID();
+
+  // Retrieve file information.
+  bool invalid = false;
+  const SrcMgr::SLocEntry &sloc = S.SourceMgr.getSLocEntry(file, &invalid);
+  if (invalid || !sloc.isFile())
+    return FileID();
+
+  // We don't want to perform completeness checks on the main file or in
+  // system headers.
+  const SrcMgr::FileInfo &fileInfo = sloc.getFile();
+  if (fileInfo.getIncludeLoc().isInvalid())
+    return FileID();
+  if (fileInfo.getFileCharacteristic() != SrcMgr::C_User &&
+      S.Diags.getSuppressSystemWarnings()) {
+    return FileID();
+  }
+
+  return file;
+}
+
+/// Check for consistent use of nullability.
+static void checkNullabilityConsistency(TypeProcessingState &state,
+                                        SimplePointerKind pointerKind,
+                                        SourceLocation pointerLoc) {
+  Sema &S = state.getSema();
+
+  // Determine which file we're performing consistency checking for.
+  FileID file = getNullabilityCompletenessCheckFileID(S, pointerLoc);
+  if (file.isInvalid())
+    return;
+
+  // If we haven't seen any type nullability in this file, we won't warn now
+  // about anything.
+  FileNullability &fileNullability = S.NullabilityMap[file];
+  if (!fileNullability.SawTypeNullability) {
+    // If this is the first pointer declarator in the file, record it.
+    if (fileNullability.PointerLoc.isInvalid() &&
+        !S.Context.getDiagnostics().isIgnored(diag::warn_nullability_missing,
+                                              pointerLoc)) {
+      fileNullability.PointerLoc = pointerLoc;
+      fileNullability.PointerKind = static_cast<unsigned>(pointerKind);
+    }
+
+    return;
+  }
+
+  // Complain about missing nullability.
+  S.Diag(pointerLoc, diag::warn_nullability_missing)
+    << static_cast<unsigned>(pointerKind);
+}
+
 static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
                                                 QualType declSpecType,
                                                 TypeSourceInfo *TInfo) {
@@ -2596,6 +2900,245 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
     }
   }
 
+  // Determine whether we should infer __nonnull on pointer types.
+  Optional<NullabilityKind> inferNullability;
+  bool inferNullabilityCS = false;
+  bool inferNullabilityInnerOnly = false;
+  bool inferNullabilityInnerOnlyComplete = false;
+
+  // Are we in an assume-nonnull region?
+  bool inAssumeNonNullRegion = false;
+  if (S.PP.getPragmaAssumeNonNullLoc().isValid() &&
+      !state.getDeclarator().isObjCWeakProperty() &&
+      !S.deduceWeakPropertyFromType(T)) {
+    inAssumeNonNullRegion = true;
+    // Determine which file we saw the assume-nonnull region in.
+    FileID file = getNullabilityCompletenessCheckFileID(
+                    S, S.PP.getPragmaAssumeNonNullLoc());
+    if (!file.isInvalid()) {
+      FileNullability &fileNullability = S.NullabilityMap[file];
+
+      // If we haven't seen any type nullability before, now we have.
+      if (!fileNullability.SawTypeNullability) {
+        if (fileNullability.PointerLoc.isValid()) {
+          S.Diag(fileNullability.PointerLoc, diag::warn_nullability_missing)
+            << static_cast<unsigned>(fileNullability.PointerKind);
+        }
+
+        fileNullability.SawTypeNullability = true;
+      }
+    }
+  }
+
+  // Whether to complain about missing nullability specifiers or not.
+  enum {
+    /// Never complain.
+    CAMN_No,
+    /// Complain on the inner pointers (but not the outermost
+    /// pointer).
+    CAMN_InnerPointers,
+    /// Complain about any pointers that don't have nullability
+    /// specified or inferred.
+    CAMN_Yes
+  } complainAboutMissingNullability = CAMN_No;
+  unsigned NumPointersRemaining = 0;
+
+  if (IsTypedefName) {
+    // For typedefs, we do not infer any nullability (the default),
+    // and we only complain about missing nullability specifiers on
+    // inner pointers.
+    complainAboutMissingNullability = CAMN_InnerPointers;
+
+    if (T->canHaveNullability() && !T->getNullability(S.Context)) {
+      ++NumPointersRemaining;
+    }
+
+    for (unsigned i = 0, n = D.getNumTypeObjects(); i != n; ++i) {
+      DeclaratorChunk &chunk = D.getTypeObject(i);
+      switch (chunk.Kind) {
+      case DeclaratorChunk::Array:
+      case DeclaratorChunk::Function:
+        break;
+
+      case DeclaratorChunk::BlockPointer:
+      case DeclaratorChunk::MemberPointer:
+        ++NumPointersRemaining;
+        break;
+
+      case DeclaratorChunk::Paren:
+      case DeclaratorChunk::Reference:
+        continue;
+
+      case DeclaratorChunk::Pointer:
+        ++NumPointersRemaining;
+        continue;
+      }
+    }
+  } else {
+    bool isFunctionOrMethod = false;
+    switch (auto context = state.getDeclarator().getContext()) {
+    case Declarator::ObjCParameterContext:
+    case Declarator::ObjCResultContext:
+    case Declarator::PrototypeContext:
+    case Declarator::TrailingReturnContext:
+      isFunctionOrMethod = true;
+      // fallthrough
+
+    case Declarator::MemberContext:
+      if (state.getDeclarator().isObjCIvar() && !isFunctionOrMethod) {
+        complainAboutMissingNullability = CAMN_No;
+        break;
+      }
+      // fallthrough
+
+    case Declarator::FileContext:
+    case Declarator::KNRTypeListContext:
+      complainAboutMissingNullability = CAMN_Yes;
+
+      // Nullability inference depends on the type and declarator.
+      switch (classifyPointerDeclarator(S, T, D)) {
+      case PointerDeclaratorKind::NonPointer:
+      case PointerDeclaratorKind::MultiLevelPointer:
+        // Cannot infer nullability.
+        break;
+
+      case PointerDeclaratorKind::SingleLevelPointer:
+        // Infer __nonnull if we are in an assumes-nonnull region.
+        if (inAssumeNonNullRegion) {
+          inferNullability = NullabilityKind::NonNull;
+          inferNullabilityCS = (context == Declarator::ObjCParameterContext ||
+                                context == Declarator::ObjCResultContext);
+        }
+        break;
+
+      case PointerDeclaratorKind::CFErrorRefPointer:
+      case PointerDeclaratorKind::NSErrorPointerPointer:
+        // Within a function or method signature, infer __nullable at both
+        // levels.
+        if (isFunctionOrMethod && inAssumeNonNullRegion)
+          inferNullability = NullabilityKind::Nullable;
+        break;
+
+      case PointerDeclaratorKind::MaybePointerToCFRef:
+        if (isFunctionOrMethod) {
+          // On pointer-to-pointer parameters marked cf_returns_retained or
+          // cf_returns_not_retained, if the outer pointer is explicit then
+          // infer the inner pointer as __nullable.
+          auto hasCFReturnsAttr = [](const AttributeList *NextAttr) -> bool {
+            while (NextAttr) {
+              if (NextAttr->getKind() == AttributeList::AT_CFReturnsRetained ||
+                  NextAttr->getKind() == AttributeList::AT_CFReturnsNotRetained)
+                return true;
+              NextAttr = NextAttr->getNext();
+            }
+            return false;
+          };
+          if (const auto *InnermostChunk = D.getInnermostNonParenChunk()) {
+            if (hasCFReturnsAttr(D.getAttributes()) ||
+                hasCFReturnsAttr(InnermostChunk->getAttrs()) ||
+                hasCFReturnsAttr(D.getDeclSpec().getAttributes().getList())) {
+              inferNullability = NullabilityKind::Nullable;
+              inferNullabilityInnerOnly = true;
+            }
+          }
+        }
+        break;
+      }
+      break;
+
+    case Declarator::ConversionIdContext:
+      complainAboutMissingNullability = CAMN_Yes;
+      break;
+
+    case Declarator::AliasDeclContext:
+    case Declarator::AliasTemplateContext:
+    case Declarator::BlockContext:
+    case Declarator::BlockLiteralContext:
+    case Declarator::ConditionContext:
+    case Declarator::CXXCatchContext:
+    case Declarator::CXXNewContext:
+    case Declarator::ForContext:
+    case Declarator::LambdaExprContext:
+    case Declarator::LambdaExprParameterContext:
+    case Declarator::ObjCCatchContext:
+    case Declarator::TemplateParamContext:
+    case Declarator::TemplateTypeArgContext:
+    case Declarator::TypeNameContext:
+      // Don't infer in these contexts.
+      break;
+    }
+  }
+
+  // Local function that checks the nullability for a given pointer declarator.
+  // Returns true if __nonnull was inferred.
+  auto inferPointerNullability = [&](SimplePointerKind pointerKind,
+                                     SourceLocation pointerLoc,
+                                     AttributeList *&attrs) -> AttributeList * {
+    // We've seen a pointer.
+    if (NumPointersRemaining > 0)
+      --NumPointersRemaining;
+
+    // If a nullability attribute is present, there's nothing to do.
+    if (hasNullabilityAttr(attrs))
+      return nullptr;
+
+    // If we're supposed to infer nullability, do so now.
+    if (inferNullability && !inferNullabilityInnerOnlyComplete) {
+      AttributeList::Syntax syntax
+        = inferNullabilityCS ? AttributeList::AS_ContextSensitiveKeyword
+                             : AttributeList::AS_Keyword;
+      AttributeList *nullabilityAttr = state.getDeclarator().getAttributePool()
+                                         .create(
+                                           S.getNullabilityKeyword(
+                                             *inferNullability),
+                                           SourceRange(pointerLoc),
+                                           nullptr, SourceLocation(),
+                                           nullptr, 0, syntax);
+
+      spliceAttrIntoList(*nullabilityAttr, attrs);
+
+      if (inferNullabilityInnerOnly)
+        inferNullabilityInnerOnlyComplete = true;
+      return nullabilityAttr;
+    }
+
+    // If we're supposed to complain about missing nullability, do so
+    // now if it's truly missing.
+    switch (complainAboutMissingNullability) {
+    case CAMN_No:
+      break;
+
+    case CAMN_InnerPointers:
+      if (NumPointersRemaining == 0)
+        break;
+      // Fallthrough.
+
+    case CAMN_Yes:
+      checkNullabilityConsistency(state, pointerKind, pointerLoc);
+    }
+
+    return nullptr;
+  };
+
+  // If the type itself could have nullability but does not, infer pointer
+  // nullability and perform consistency checking.
+  if (T->canHaveNullability() && S.ActiveTemplateInstantiations.empty() &&
+      !T->getNullability(S.Context)) {
+    SimplePointerKind pointerKind = SimplePointerKind::Pointer;
+    if (T->isBlockPointerType())
+      pointerKind = SimplePointerKind::BlockPointer;
+    else if (T->isMemberPointerType())
+      pointerKind = SimplePointerKind::MemberPointer;
+
+    if (auto *attr = inferPointerNullability(
+                       pointerKind, D.getDeclSpec().getTypeSpecTypeLoc(),
+                       D.getMutableDeclSpec().getAttributes().getListRef())) {
+      T = Context.getAttributedType(
+            AttributedType::getNullabilityAttrKind(*inferNullability), T, T);
+      attr->setUsedAsTypeAttr();
+    }
+  }
+
   // Walk the DeclTypeInfo, building the recursive type as we go.
   // DeclTypeInfos are ordered from the identifier out, which is
   // opposite of what we want :).
@@ -2613,6 +3156,10 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
       if (!LangOpts.Blocks)
         S.Diag(DeclType.Loc, diag::err_blocks_disable);
 
+      // Handle pointer nullability.
+      inferPointerNullability(SimplePointerKind::BlockPointer,
+                              DeclType.Loc, DeclType.getAttrListRef());
+
       T = S.BuildBlockPointerType(T, D.getIdentifierLoc(), Name);
       if (DeclType.Cls.TypeQuals)
         T = S.BuildQualifiedType(T, DeclType.Loc, DeclType.Cls.TypeQuals);
@@ -2625,6 +3172,11 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
         D.setInvalidType(true);
         // Build the type anyway.
       }
+
+      // Handle pointer nullability
+      inferPointerNullability(SimplePointerKind::Pointer, DeclType.Loc,
+                              DeclType.getAttrListRef());
+
       if (LangOpts.ObjC1 && T->getAs<ObjCObjectType>()) {
         T = Context.getObjCObjectPointerType(T);
         if (DeclType.Ptr.TypeQuals)
@@ -3066,6 +3618,11 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
       // The scope spec must refer to a class, or be dependent.
       CXXScopeSpec &SS = DeclType.Mem.Scope();
       QualType ClsType;
+
+      // Handle pointer nullability.
+      inferPointerNullability(SimplePointerKind::MemberPointer,
+                              DeclType.Loc, DeclType.getAttrListRef());
+
       if (SS.isInvalid()) {
         // Avoid emitting extra errors if we already errored on the scope.
         D.setInvalidType(true);
@@ -3495,6 +4052,12 @@ static AttributeList::Kind getAttrListKind(AttributedType::Kind kind) {
     return AttributeList::AT_SPtr;
   case AttributedType::attr_uptr:
     return AttributeList::AT_UPtr;
+  case AttributedType::attr_nonnull:
+    return AttributeList::AT_TypeNonNull;
+  case AttributedType::attr_nullable:
+    return AttributeList::AT_TypeNullable;
+  case AttributedType::attr_null_unspecified:
+    return AttributeList::AT_TypeNullUnspecified;
   }
   llvm_unreachable("unexpected attribute kind!");
 }
@@ -4114,7 +4677,8 @@ static bool handleObjCOwnershipTypeAttr(TypeProcessingState &state,
     // just be the return type of a block pointer.
     if (state.isProcessingDeclSpec()) {
       Declarator &D = state.getDeclarator();
-      if (maybeMovePastReturnType(D, D.getNumTypeObjects()))
+      if (maybeMovePastReturnType(D, D.getNumTypeObjects(),
+                                  /*onlyBlockPointers=*/true))
         return false;
     }
   }
@@ -4491,6 +5055,212 @@ static bool handleMSPointerTypeQualifierAttr(TypeProcessingState &State,
   return false;
 }
 
+bool Sema::checkNullabilityTypeSpecifier(QualType &type,
+                                         NullabilityKind nullability,
+                                         SourceLocation nullabilityLoc,
+                                         bool isContextSensitive) {
+  // We saw a nullability type specifier. If this is the first one for
+  // this file, note that.
+  FileID file = getNullabilityCompletenessCheckFileID(*this, nullabilityLoc);
+  if (!file.isInvalid()) {
+    FileNullability &fileNullability = NullabilityMap[file];
+    if (!fileNullability.SawTypeNullability) {
+      // If we have already seen a pointer declarator without a nullability
+      // annotation, complain about it.
+      if (fileNullability.PointerLoc.isValid()) {
+        Diag(fileNullability.PointerLoc, diag::warn_nullability_missing)
+          << static_cast<unsigned>(fileNullability.PointerKind);
+      }
+
+      fileNullability.SawTypeNullability = true;
+    }
+  }
+
+  // Check for existing nullability attributes on the type.
+  QualType desugared = type;
+  while (auto attributed = dyn_cast<AttributedType>(desugared.getTypePtr())) {
+    // Check whether there is already a null
+    if (auto existingNullability = attributed->getImmediateNullability()) {
+      // Duplicated nullability.
+      if (nullability == *existingNullability) {
+        Diag(nullabilityLoc, diag::warn_nullability_duplicate)
+          << static_cast<unsigned>(nullability)
+          << isContextSensitive
+          << FixItHint::CreateRemoval(nullabilityLoc);
+
+        break;
+      } 
+
+      // Conflicting nullability.
+      Diag(nullabilityLoc, diag::err_nullability_conflicting)
+        << static_cast<unsigned>(nullability) 
+        << isContextSensitive
+        << static_cast<unsigned>(*existingNullability)
+        << false;
+      return true;
+    }
+
+    desugared = attributed->getModifiedType();
+  }
+
+  // If there is already a different nullability specifier, complain.
+  // This (unlike the code above) looks through typedefs that might
+  // have nullability specifiers on them, which means we cannot
+  // provide a useful Fix-It.
+  if (auto existingNullability = desugared->getNullability(Context)) {
+    if (nullability != *existingNullability) {
+      Diag(nullabilityLoc, diag::err_nullability_conflicting)
+        << static_cast<unsigned>(nullability)
+        << isContextSensitive
+        << static_cast<unsigned>(*existingNullability)
+        << false;
+
+      // Try to find the typedef with the existing nullability specifier.
+      if (auto typedefType = desugared->getAs<TypedefType>()) {
+        TypedefNameDecl *typedefDecl = typedefType->getDecl();
+        QualType underlyingType = typedefDecl->getUnderlyingType();
+        if (auto typedefNullability
+              = AttributedType::stripOuterNullability(underlyingType)) {
+          if (*typedefNullability == *existingNullability) {
+            Diag(typedefDecl->getLocation(), diag::note_nullability_here)
+              << static_cast<unsigned>(*existingNullability);
+          }
+        }
+      }
+
+      return true;
+    }
+  }
+
+  // If this definitely isn't a pointer type, reject the specifier.
+  if (!desugared->canHaveNullability()) {
+    Diag(nullabilityLoc, diag::err_nullability_nonpointer)
+      << static_cast<unsigned>(nullability) << isContextSensitive << type;
+    return true;
+  }
+  
+  // For the context-sensitive keywords/Objective-C property
+  // attributes, require that the type be a single-level pointer.
+  if (isContextSensitive) {
+    // Make sure that the pointee isn't itself a pointer type.
+    QualType pointeeType = desugared->getPointeeType();
+    if (pointeeType->isAnyPointerType() ||
+        pointeeType->isObjCObjectPointerType() ||
+        pointeeType->isMemberPointerType()) {
+      Diag(nullabilityLoc, diag::err_nullability_cs_multilevel)
+        << static_cast<unsigned>(nullability)
+        << type;
+      Diag(nullabilityLoc, diag::note_nullability_type_specifier)
+        << static_cast<unsigned>(nullability)
+        << type
+        << FixItHint::CreateReplacement(nullabilityLoc,
+                                        getNullabilitySpelling(nullability));
+      return true;
+    }
+  }
+
+  // Form the attributed type.
+  type = Context.getAttributedType(
+           AttributedType::getNullabilityAttrKind(nullability), type, type);
+  return false;
+}
+
+/// Map a nullability attribute kind to a nullability kind.
+static NullabilityKind mapNullabilityAttrKind(AttributeList::Kind kind) {
+  switch (kind) {
+  case AttributeList::AT_TypeNonNull:
+    return NullabilityKind::NonNull;
+
+  case AttributeList::AT_TypeNullable:
+    return NullabilityKind::Nullable;
+
+  case AttributeList::AT_TypeNullUnspecified:
+    return NullabilityKind::Unspecified;
+
+  default:
+    llvm_unreachable("not a nullability attribute kind");
+  }
+}
+
+/// Distribute a nullability type attribute that cannot be applied to
+/// the type specifier to a pointer, block pointer, or member pointer
+/// declarator, complaining if necessary.
+///
+/// \returns true if the nullability annotation was distributed, false
+/// otherwise.
+static bool distributeNullabilityTypeAttr(TypeProcessingState &state,
+                                          QualType type,
+                                          AttributeList &attr) {
+  Declarator &declarator = state.getDeclarator();
+
+  /// Attempt to move the attribute to the specified chunk.
+  auto moveToChunk = [&](DeclaratorChunk &chunk, bool inFunction) -> bool {
+    // If there is already a nullability attribute there, don't add
+    // one.
+    if (hasNullabilityAttr(chunk.getAttrListRef()))
+      return false;
+
+    // Complain about the nullability qualifier being in the wrong
+    // place.
+    unsigned pointerKind
+      = chunk.Kind == DeclaratorChunk::Pointer ? (inFunction ? 3 : 0)
+        : chunk.Kind == DeclaratorChunk::BlockPointer ? 1
+        : inFunction? 4 : 2;
+
+    auto diag = state.getSema().Diag(attr.getLoc(),
+                                     diag::warn_nullability_declspec)
+      << static_cast<unsigned>(mapNullabilityAttrKind(attr.getKind()))
+      << type
+      << pointerKind;
+
+    // FIXME: MemberPointer chunks don't carry the location of the *.
+    if (chunk.Kind != DeclaratorChunk::MemberPointer) {
+      diag << FixItHint::CreateRemoval(attr.getLoc())
+           << FixItHint::CreateInsertion(
+                state.getSema().getPreprocessor()
+                  .getLocForEndOfToken(chunk.Loc),
+                " " + attr.getName()->getName().str() + " ");
+    }
+
+    moveAttrFromListToList(attr, state.getCurrentAttrListRef(),
+                           chunk.getAttrListRef());
+    return true;
+  };
+
+  // Move it to the outermost pointer, member pointer, or block
+  // pointer declarator.
+  for (unsigned i = state.getCurrentChunkIndex(); i != 0; --i) {
+    DeclaratorChunk &chunk = declarator.getTypeObject(i-1);
+    switch (chunk.Kind) {
+    case DeclaratorChunk::Pointer:
+    case DeclaratorChunk::BlockPointer:
+    case DeclaratorChunk::MemberPointer:
+      return moveToChunk(chunk, false);
+
+    case DeclaratorChunk::Paren:
+    case DeclaratorChunk::Array:
+      continue;
+
+    case DeclaratorChunk::Function:
+      // Try to move past the return type to a function/block/member
+      // function pointer.
+      if (DeclaratorChunk *dest = maybeMovePastReturnType(
+                                    declarator, i,
+                                    /*onlyBlockPointers=*/false)) {
+        return moveToChunk(*dest, true);
+      }
+
+      return false;
+      
+    // Don't walk through these.
+    case DeclaratorChunk::Reference:
+      return false;
+    }
+  }
+
+  return false;
+}
+
 static AttributedType::Kind getCCTypeAttrKind(AttributeList &Attr) {
   assert(!Attr.isInvalid());
   switch (Attr.getKind()) {
@@ -4997,6 +5767,24 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type,
         attr.setUsedAsTypeAttr();
       break;
 
+    NULLABILITY_TYPE_ATTRS_CASELIST:
+      // Either add nullability here or try to distribute it.  We
+      // don't want to distribute the nullability specifier past any
+      // dependent type, because that complicates the user model.
+      if (type->canHaveNullability() || type->isDependentType() ||
+          !distributeNullabilityTypeAttr(state, type, attr)) {
+        if (state.getSema().checkNullabilityTypeSpecifier(
+              type,
+              mapNullabilityAttrKind(attr.getKind()),
+              attr.getLoc(),
+              attr.isContextSensitiveKeywordAttribute())) {
+          attr.setInvalid();
+        }
+
+        attr.setUsedAsTypeAttr();
+      }
+      break;
+
     case AttributeList::AT_NSReturnsRetained:
       if (!state.getSema().getLangOpts().ObjCAutoRefCount)
         break;
@@ -5153,7 +5941,7 @@ bool Sema::RequireCompleteType(SourceLocation Loc, QualType T,
 ///        in order to provide a definition of this entity.
 bool Sema::hasVisibleDefinition(NamedDecl *D, NamedDecl **Suggested) {
   // Easy case: if we don't have modules, all declarations are visible.
-  if (!getLangOpts().Modules)
+  if (!getLangOpts().Modules && !getLangOpts().ModulesLocalVisibility)
     return true;
 
   // If this definition was instantiated from a template, map back to the
@@ -5185,10 +5973,18 @@ bool Sema::hasVisibleDefinition(NamedDecl *D, NamedDecl **Suggested) {
   }
   assert(D && "missing definition for pattern of instantiated definition");
 
-  // FIXME: If we merged any other decl into D, and that declaration is visible,
-  // then we should consider a definition to be visible.
   *Suggested = D;
-  return LookupResult::isVisible(*this, D);
+  if (LookupResult::isVisible(*this, D))
+    return true;
+
+  // The external source may have additional definitions of this type that are
+  // visible, so complete the redeclaration chain now and ask again.
+  if (auto *Source = Context.getExternalSource()) {
+    Source->CompleteRedeclChain(D);
+    return LookupResult::isVisible(*this, D);
+  }
+
+  return false;
 }
 
 /// Locks in the inheritance model for the given class and all of its bases.
@@ -5239,20 +6035,8 @@ bool Sema::RequireCompleteTypeImpl(SourceLocation Loc, QualType T,
     // If we know about the definition but it is not visible, complain.
     NamedDecl *SuggestedDef = nullptr;
     if (!Diagnoser.Suppressed && Def &&
-        !hasVisibleDefinition(Def, &SuggestedDef)) {
-      // Suppress this error outside of a SFINAE context if we've already
-      // emitted the error once for this type. There's no usefulness in
-      // repeating the diagnostic.
-      // FIXME: Add a Fix-It that imports the corresponding module or includes
-      // the header.
-      Module *Owner = getOwningModule(SuggestedDef);
-      Diag(Loc, diag::err_module_private_definition)
-        << T << Owner->getFullModuleName();
-      Diag(SuggestedDef->getLocation(), diag::note_previous_definition);
-
-      // Try to recover by implicitly importing this module.
-      createImplicitModuleImportForErrorRecovery(Loc, Owner);
-    }
+        !hasVisibleDefinition(Def, &SuggestedDef))
+      diagnoseMissingImport(Loc, SuggestedDef, /*NeedDefinition*/true);
 
     // We lock in the inheritance model once somebody has asked us to ensure
     // that a pointer-to-member type is complete.
diff --git a/contrib/llvm/tools/clang/lib/Sema/TreeTransform.h b/contrib/llvm/tools/clang/lib/Sema/TreeTransform.h
index f5249fd..73dde7c 100644
--- a/contrib/llvm/tools/clang/lib/Sema/TreeTransform.h
+++ b/contrib/llvm/tools/clang/lib/Sema/TreeTransform.h
@@ -5386,6 +5386,17 @@ QualType TreeTransform<Derived>::TransformAttributedType(
       = getDerived().TransformType(oldType->getEquivalentType());
     if (equivalentType.isNull())
       return QualType();
+
+    // Check whether we can add nullability; it is only represented as
+    // type sugar, and therefore cannot be diagnosed in any other way.
+    if (auto nullability = oldType->getImmediateNullability()) {
+      if (!modifiedType->canHaveNullability()) {
+        SemaRef.Diag(TL.getAttrNameLoc(), diag::err_nullability_nonpointer)
+          << static_cast<unsigned>(*nullability) << false << modifiedType;
+        return QualType();
+      }
+    }
+
     result = SemaRef.Context.getAttributedType(oldType->getAttrKind(),
                                                modifiedType,
                                                equivalentType);
@@ -6871,6 +6882,17 @@ TreeTransform<Derived>::TransformOMPTaskwaitDirective(OMPTaskwaitDirective *D) {
 }
 
 template <typename Derived>
+StmtResult TreeTransform<Derived>::TransformOMPTaskgroupDirective(
+    OMPTaskgroupDirective *D) {
+  DeclarationNameInfo DirName;
+  getDerived().getSema().StartOpenMPDSABlock(OMPD_taskgroup, DirName, nullptr,
+                                             D->getLocStart());
+  StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
+  getDerived().getSema().EndOpenMPDSABlock(Res.get());
+  return Res;
+}
+
+template <typename Derived>
 StmtResult
 TreeTransform<Derived>::TransformOMPFlushDirective(OMPFlushDirective *D) {
   DeclarationNameInfo DirName;
@@ -7950,6 +7972,25 @@ TreeTransform<Derived>::TransformDesignatedInitExpr(DesignatedInitExpr *E) {
                                                 E->usesGNUSyntax(), Init.get());
 }
 
+// Seems that if TransformInitListExpr() only works on the syntactic form of an
+// InitListExpr, then a DesignatedInitUpdateExpr is not encountered.
+template<typename Derived>
+ExprResult
+TreeTransform<Derived>::TransformDesignatedInitUpdateExpr(
+    DesignatedInitUpdateExpr *E) {
+  llvm_unreachable("Unexpected DesignatedInitUpdateExpr in syntactic form of "
+                   "initializer");
+  return ExprError();
+}
+
+template<typename Derived>
+ExprResult
+TreeTransform<Derived>::TransformNoInitExpr(
+    NoInitExpr *E) {
+  llvm_unreachable("Unexpected NoInitExpr in syntactic form of initializer");
+  return ExprError();
+}
+
 template<typename Derived>
 ExprResult
 TreeTransform<Derived>::TransformImplicitValueInitExpr(
diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTReader.cpp b/contrib/llvm/tools/clang/lib/Serialization/ASTReader.cpp
index 609c25d..d75b5eb 100644
--- a/contrib/llvm/tools/clang/lib/Serialization/ASTReader.cpp
+++ b/contrib/llvm/tools/clang/lib/Serialization/ASTReader.cpp
@@ -19,6 +19,7 @@
 #include "clang/AST/DeclTemplate.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
+#include "clang/Frontend/PCHContainerOperations.h"
 #include "clang/AST/NestedNameSpecifier.h"
 #include "clang/AST/Type.h"
 #include "clang/AST/TypeLocVisitor.h"
@@ -3590,6 +3591,7 @@ ASTReader::ReadASTCore(StringRef FileName,
 
   ModuleFile &F = *M;
   BitstreamCursor &Stream = F.Stream;
+  PCHContainerOps.ExtractPCH(F.Buffer->getMemBufferRef(), F.StreamFile);
   Stream.init(&F.StreamFile);
   F.SizeInBits = F.Buffer->getBufferSize() * 8;
   
@@ -3858,9 +3860,9 @@ static ASTFileSignature readASTFileSignature(llvm::BitstreamReader &StreamFile){
 /// \brief Retrieve the name of the original source file name
 /// directly from the AST file, without actually loading the AST
 /// file.
-std::string ASTReader::getOriginalSourceFile(const std::string &ASTFileName,
-                                             FileManager &FileMgr,
-                                             DiagnosticsEngine &Diags) {
+std::string ASTReader::getOriginalSourceFile(
+    const std::string &ASTFileName, FileManager &FileMgr,
+    const PCHContainerOperations &PCHContainerOps, DiagnosticsEngine &Diags) {
   // Open the AST file.
   auto Buffer = FileMgr.getBufferForFile(ASTFileName);
   if (!Buffer) {
@@ -3871,8 +3873,7 @@ std::string ASTReader::getOriginalSourceFile(const std::string &ASTFileName,
 
   // Initialize the stream
   llvm::BitstreamReader StreamFile;
-  StreamFile.init((const unsigned char *)(*Buffer)->getBufferStart(),
-                  (const unsigned char *)(*Buffer)->getBufferEnd());
+  PCHContainerOps.ExtractPCH((*Buffer)->getMemBufferRef(), StreamFile);
   BitstreamCursor Stream(StreamFile);
 
   // Sniff for the signature.
@@ -3954,9 +3955,10 @@ namespace {
   };
 }
 
-bool ASTReader::readASTFileControlBlock(StringRef Filename,
-                                        FileManager &FileMgr,
-                                        ASTReaderListener &Listener) {
+bool ASTReader::readASTFileControlBlock(
+    StringRef Filename, FileManager &FileMgr,
+    const PCHContainerOperations &PCHContainerOps,
+    ASTReaderListener &Listener) {
   // Open the AST file.
   // FIXME: This allows use of the VFS; we do not allow use of the
   // VFS when actually loading a module.
@@ -4147,16 +4149,15 @@ bool ASTReader::readASTFileControlBlock(StringRef Filename,
   }
 }
 
-
-bool ASTReader::isAcceptableASTFile(StringRef Filename,
-                                    FileManager &FileMgr,
-                                    const LangOptions &LangOpts,
-                                    const TargetOptions &TargetOpts,
-                                    const PreprocessorOptions &PPOpts,
-                                    std::string ExistingModuleCachePath) {
+bool ASTReader::isAcceptableASTFile(
+    StringRef Filename, FileManager &FileMgr,
+    const PCHContainerOperations &PCHContainerOps, const LangOptions &LangOpts,
+    const TargetOptions &TargetOpts, const PreprocessorOptions &PPOpts,
+    std::string ExistingModuleCachePath) {
   SimplePCHValidator validator(LangOpts, TargetOpts, PPOpts,
                                ExistingModuleCachePath, FileMgr);
-  return !readASTFileControlBlock(Filename, FileMgr, validator);
+  return !readASTFileControlBlock(Filename, FileMgr, PCHContainerOps,
+                                  validator);
 }
 
 ASTReader::ASTReadResult
@@ -8407,24 +8408,26 @@ void ASTReader::pushExternalDeclIntoScope(NamedDecl *D, DeclarationName Name) {
   }
 }
 
-ASTReader::ASTReader(Preprocessor &PP, ASTContext &Context, StringRef isysroot,
-                     bool DisableValidation, bool AllowASTWithCompilerErrors,
+ASTReader::ASTReader(Preprocessor &PP, ASTContext &Context,
+                     const PCHContainerOperations &PCHContainerOps,
+                     StringRef isysroot, bool DisableValidation,
+                     bool AllowASTWithCompilerErrors,
                      bool AllowConfigurationMismatch, bool ValidateSystemInputs,
                      bool UseGlobalIndex)
     : Listener(new PCHValidator(PP, *this)), DeserializationListener(nullptr),
       OwnsDeserializationListener(false), SourceMgr(PP.getSourceManager()),
-      FileMgr(PP.getFileManager()), Diags(PP.getDiagnostics()),
-      SemaObj(nullptr), PP(PP), Context(Context), Consumer(nullptr),
-      ModuleMgr(PP.getFileManager()), isysroot(isysroot),
-      DisableValidation(DisableValidation),
+      FileMgr(PP.getFileManager()), PCHContainerOps(PCHContainerOps),
+      Diags(PP.getDiagnostics()), SemaObj(nullptr), PP(PP), Context(Context),
+      Consumer(nullptr), ModuleMgr(PP.getFileManager(), PCHContainerOps),
+      isysroot(isysroot), DisableValidation(DisableValidation),
       AllowASTWithCompilerErrors(AllowASTWithCompilerErrors),
       AllowConfigurationMismatch(AllowConfigurationMismatch),
       ValidateSystemInputs(ValidateSystemInputs),
       UseGlobalIndex(UseGlobalIndex), TriedLoadingGlobalIndex(false),
-      CurrSwitchCaseStmts(&SwitchCaseStmts),
-      NumSLocEntriesRead(0), TotalNumSLocEntries(0), NumStatementsRead(0),
-      TotalNumStatements(0), NumMacrosRead(0), TotalNumMacros(0),
-      NumIdentifierLookups(0), NumIdentifierLookupHits(0), NumSelectorsRead(0),
+      CurrSwitchCaseStmts(&SwitchCaseStmts), NumSLocEntriesRead(0),
+      TotalNumSLocEntries(0), NumStatementsRead(0), TotalNumStatements(0),
+      NumMacrosRead(0), TotalNumMacros(0), NumIdentifierLookups(0),
+      NumIdentifierLookupHits(0), NumSelectorsRead(0),
       NumMethodPoolEntriesRead(0), NumMethodPoolLookups(0),
       NumMethodPoolHits(0), NumMethodPoolTableLookups(0),
       NumMethodPoolTableHits(0), TotalNumMethodPoolEntries(0),
diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTReaderDecl.cpp b/contrib/llvm/tools/clang/lib/Serialization/ASTReaderDecl.cpp
index 02273ed..00ebd3e 100644
--- a/contrib/llvm/tools/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/contrib/llvm/tools/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -365,9 +365,72 @@ namespace clang {
     void VisitObjCPropertyDecl(ObjCPropertyDecl *D);
     void VisitObjCPropertyImplDecl(ObjCPropertyImplDecl *D);
     void VisitOMPThreadPrivateDecl(OMPThreadPrivateDecl *D);
+
+    /// We've merged the definition \p MergedDef into the existing definition
+    /// \p Def. Ensure that \p Def is made visible whenever \p MergedDef is made
+    /// visible.
+    void mergeDefinitionVisibility(NamedDecl *Def, NamedDecl *MergedDef) {
+      if (Def->isHidden()) {
+        // If MergedDef is visible or becomes visible, make the definition visible.
+        if (!MergedDef->isHidden())
+          Def->Hidden = false;
+        else if (Reader.getContext().getLangOpts().ModulesLocalVisibility) {
+          Reader.getContext().mergeDefinitionIntoModule(
+              Def, MergedDef->getImportedOwningModule(),
+              /*NotifyListeners*/ false);
+          Reader.PendingMergedDefinitionsToDeduplicate.insert(Def);
+        } else {
+          auto SubmoduleID = MergedDef->getOwningModuleID();
+          assert(SubmoduleID && "hidden definition in no module");
+          Reader.HiddenNamesMap[Reader.getSubmodule(SubmoduleID)].push_back(Def);
+        }
+      }
+    }
   };
 }
 
+namespace {
+/// Iterator over the redeclarations of a declaration that have already
+/// been merged into the same redeclaration chain.
+template<typename DeclT>
+class MergedRedeclIterator {
+  DeclT *Start, *Canonical, *Current;
+public:
+  MergedRedeclIterator() : Current(nullptr) {}
+  MergedRedeclIterator(DeclT *Start)
+      : Start(Start), Canonical(nullptr), Current(Start) {}
+
+  DeclT *operator*() { return Current; }
+
+  MergedRedeclIterator &operator++() {
+    if (Current->isFirstDecl()) {
+      Canonical = Current;
+      Current = Current->getMostRecentDecl();
+    } else
+      Current = Current->getPreviousDecl();
+
+    // If we started in the merged portion, we'll reach our start position
+    // eventually. Otherwise, we'll never reach it, but the second declaration
+    // we reached was the canonical declaration, so stop when we see that one
+    // again.
+    if (Current == Start || Current == Canonical)
+      Current = nullptr;
+    return *this;
+  }
+
+  friend bool operator!=(const MergedRedeclIterator &A,
+                         const MergedRedeclIterator &B) {
+    return A.Current != B.Current;
+  }
+};
+}
+template<typename DeclT>
+llvm::iterator_range<MergedRedeclIterator<DeclT>> merged_redecls(DeclT *D) {
+  return llvm::iterator_range<MergedRedeclIterator<DeclT>>(
+      MergedRedeclIterator<DeclT>(D),
+      MergedRedeclIterator<DeclT>());
+}
+
 uint64_t ASTDeclReader::GetCurrentCursorOffset() {
   return F.DeclsCursor.GetCurrentBitNo() + F.GlobalBitOffset;
 }
@@ -585,9 +648,21 @@ void ASTDeclReader::VisitEnumDecl(EnumDecl *ED) {
   if (ED->IsCompleteDefinition &&
       Reader.getContext().getLangOpts().Modules &&
       Reader.getContext().getLangOpts().CPlusPlus) {
-    if (EnumDecl *&OldDef = Reader.EnumDefinitions[ED->getCanonicalDecl()]) {
+    EnumDecl *&OldDef = Reader.EnumDefinitions[ED->getCanonicalDecl()];
+    if (!OldDef) {
+      // This is the first time we've seen an imported definition. Look for a
+      // local definition before deciding that we are the first definition.
+      for (auto *D : merged_redecls(ED->getCanonicalDecl())) {
+        if (!D->isFromASTFile() && D->isCompleteDefinition()) {
+          OldDef = D;
+          break;
+        }
+      }
+    }
+    if (OldDef) {
       Reader.MergedDeclContexts.insert(std::make_pair(ED, OldDef));
       ED->IsCompleteDefinition = false;
+      mergeDefinitionVisibility(OldDef, ED);
     } else {
       OldDef = ED;
     }
@@ -967,7 +1042,9 @@ void ASTDeclReader::VisitObjCPropertyDecl(ObjCPropertyDecl *D) {
   VisitNamedDecl(D);
   D->setAtLoc(ReadSourceLocation(Record, Idx));
   D->setLParenLoc(ReadSourceLocation(Record, Idx));
-  D->setType(GetTypeSourceInfo(Record, Idx));
+  QualType T = Reader.readType(F, Record, Idx);
+  TypeSourceInfo *TSI = GetTypeSourceInfo(Record, Idx);
+  D->setType(T, TSI);
   // FIXME: stable encoding
   D->setPropertyAttributes(
                       (ObjCPropertyDecl::PropertyAttributeKind)Record[Idx++]);
@@ -1391,32 +1468,22 @@ void ASTDeclReader::MergeDefinitionData(
          "merging class definition into non-definition");
   auto &DD = *D->DefinitionData.getNotUpdated();
 
-  // If the new definition has new special members, let the name lookup
-  // code know that it needs to look in the new definition too.
-  //
-  // FIXME: We only need to do this if the merged definition declares members
-  // that this definition did not declare, or if it defines members that this
-  // definition did not define.
   if (DD.Definition != MergeDD.Definition) {
+    // If the new definition has new special members, let the name lookup
+    // code know that it needs to look in the new definition too.
+    //
+    // FIXME: We only need to do this if the merged definition declares members
+    // that this definition did not declare, or if it defines members that this
+    // definition did not define.
     Reader.MergedLookups[DD.Definition].push_back(MergeDD.Definition);
     DD.Definition->setHasExternalVisibleStorage();
 
-    if (DD.Definition->isHidden()) {
-      // If MergeDD is visible or becomes visible, make the definition visible.
-      if (!MergeDD.Definition->isHidden())
-        DD.Definition->Hidden = false;
-      else if (Reader.getContext().getLangOpts().ModulesLocalVisibility) {
-        Reader.getContext().mergeDefinitionIntoModule(
-            DD.Definition, MergeDD.Definition->getImportedOwningModule(),
-            /*NotifyListeners*/ false);
-        Reader.PendingMergedDefinitionsToDeduplicate.insert(DD.Definition);
-      } else {
-        auto SubmoduleID = MergeDD.Definition->getOwningModuleID();
-        assert(SubmoduleID && "hidden definition in no module");
-        Reader.HiddenNamesMap[Reader.getSubmodule(SubmoduleID)].push_back(
-            DD.Definition);
-      }
-    }
+    // Track that we merged the definitions.
+    Reader.MergedDeclContexts.insert(std::make_pair(MergeDD.Definition,
+                                                    DD.Definition));
+    Reader.PendingDefinitions.erase(MergeDD.Definition);
+    MergeDD.Definition->IsCompleteDefinition = false;
+    mergeDefinitionVisibility(DD.Definition, MergeDD.Definition);
   }
 
   auto PFDI = Reader.PendingFakeDefinitionData.find(&DD);
@@ -1525,42 +1592,21 @@ void ASTDeclReader::ReadCXXRecordDefinition(CXXRecordDecl *D, bool Update) {
   // because we're reading an update record, or because we've already done some
   // merging. Either way, just merge into it.
   CXXRecordDecl *Canon = D->getCanonicalDecl();
-  if (auto *CanonDD = Canon->DefinitionData.getNotUpdated()) {
-    if (CanonDD->Definition != DD->Definition)
-      Reader.MergedDeclContexts.insert(
-          std::make_pair(DD->Definition, CanonDD->Definition));
+  if (Canon->DefinitionData.getNotUpdated()) {
     MergeDefinitionData(Canon, std::move(*DD));
     D->DefinitionData = Canon->DefinitionData;
     return;
   }
 
-  // Propagate the DefinitionData pointer to the canonical declaration, so
-  // that all other deserialized declarations will see it.
-  if (Canon == D) {
-    D->DefinitionData = DD;
-    D->IsCompleteDefinition = true;
-
-    // If this is an update record, we can have redeclarations already. Make a
-    // note that we need to propagate the DefinitionData pointer onto them.
-    if (Update)
-      Reader.PendingDefinitions.insert(D);
-  } else if (auto *CanonDD = Canon->DefinitionData.getNotUpdated()) {
-    // We have already deserialized a definition of this record. This
-    // definition is no longer really a definition. Note that the pre-existing
-    // definition is the *real* definition.
-    Reader.MergedDeclContexts.insert(
-        std::make_pair(D, CanonDD->Definition));
-    D->DefinitionData = Canon->DefinitionData;
-    D->IsCompleteDefinition = false;
-    MergeDefinitionData(D, std::move(*DD));
-  } else {
-    Canon->DefinitionData = DD;
-    D->DefinitionData = Canon->DefinitionData;
-    D->IsCompleteDefinition = true;
+  // Mark this declaration as being a definition.
+  D->IsCompleteDefinition = true;
+  D->DefinitionData = DD;
 
-    // Note that we have deserialized a definition. Any declarations
-    // deserialized before this one will be be given the DefinitionData
-    // pointer at the end.
+  // If this is not the first declaration or is an update record, we can have
+  // other redeclarations already. Make a note that we need to propagate the
+  // DefinitionData pointer onto them.
+  if (Update || Canon != D) {
+    Canon->DefinitionData = D->DefinitionData;
     Reader.PendingDefinitions.insert(D);
   }
 }
@@ -1880,15 +1926,10 @@ ASTDeclReader::VisitClassTemplateSpecializationDeclImpl(
         // This declaration might be a definition. Merge with any existing
         // definition.
         if (auto *DDD = D->DefinitionData.getNotUpdated()) {
-          if (auto *CanonDD = CanonSpec->DefinitionData.getNotUpdated()) {
+          if (CanonSpec->DefinitionData.getNotUpdated())
             MergeDefinitionData(CanonSpec, std::move(*DDD));
-            Reader.PendingDefinitions.erase(D);
-            Reader.MergedDeclContexts.insert(
-                std::make_pair(D, CanonDD->Definition));
-            D->IsCompleteDefinition = false;
-          } else {
+          else
             CanonSpec->DefinitionData = D->DefinitionData;
-          }
         }
         D->DefinitionData = CanonSpec->DefinitionData;
       }
@@ -2033,9 +2074,8 @@ void ASTDeclReader::VisitTemplateTypeParmDecl(TemplateTypeParmDecl *D) {
 
   D->setDeclaredWithTypename(Record[Idx++]);
 
-  bool Inherited = Record[Idx++];
-  TypeSourceInfo *DefArg = GetTypeSourceInfo(Record, Idx);
-  D->setDefaultArgument(DefArg, Inherited);
+  if (Record[Idx++])
+    D->setDefaultArgument(GetTypeSourceInfo(Record, Idx));
 }
 
 void ASTDeclReader::VisitNonTypeTemplateParmDecl(NonTypeTemplateParmDecl *D) {
@@ -2052,11 +2092,8 @@ void ASTDeclReader::VisitNonTypeTemplateParmDecl(NonTypeTemplateParmDecl *D) {
   } else {
     // Rest of NonTypeTemplateParmDecl.
     D->ParameterPack = Record[Idx++];
-    if (Record[Idx++]) {
-      Expr *DefArg = Reader.ReadExpr(F);
-      bool Inherited = Record[Idx++];
-      D->setDefaultArgument(DefArg, Inherited);
-   }
+    if (Record[Idx++])
+      D->setDefaultArgument(Reader.ReadExpr(F));
   }
 }
 
@@ -2072,10 +2109,10 @@ void ASTDeclReader::VisitTemplateTemplateParmDecl(TemplateTemplateParmDecl *D) {
       Data[I] = Reader.ReadTemplateParameterList(F, Record, Idx);
   } else {
     // Rest of TemplateTemplateParmDecl.
-    TemplateArgumentLoc Arg = Reader.ReadTemplateArgumentLoc(F, Record, Idx);
-    bool IsInherited = Record[Idx++];
-    D->setDefaultArgument(Arg, IsInherited);
     D->ParameterPack = Record[Idx++];
+    if (Record[Idx++])
+      D->setDefaultArgument(Reader.getContext(),
+                            Reader.ReadTemplateArgumentLoc(F, Record, Idx));
   }
 }
 
@@ -2189,14 +2226,12 @@ void ASTDeclReader::mergeTemplatePattern(RedeclarableTemplateDecl *D,
     auto *ExistingClass =
         cast<CXXRecordDecl>(ExistingPattern)->getCanonicalDecl();
     if (auto *DDD = DClass->DefinitionData.getNotUpdated()) {
-      if (auto *ExistingDD = ExistingClass->DefinitionData.getNotUpdated()) {
+      if (ExistingClass->DefinitionData.getNotUpdated()) {
         MergeDefinitionData(ExistingClass, std::move(*DDD));
-        Reader.PendingDefinitions.erase(DClass);
-        Reader.MergedDeclContexts.insert(
-            std::make_pair(DClass, ExistingDD->Definition));
-        DClass->IsCompleteDefinition = false;
       } else {
         ExistingClass->DefinitionData = DClass->DefinitionData;
+        // We may have skipped this before because we thought that DClass
+        // was the canonical declaration.
         Reader.PendingDefinitions.insert(DClass);
       }
     }
@@ -2904,6 +2939,43 @@ void ASTDeclReader::attachPreviousDeclImpl(ASTReader &Reader, ...) {
   llvm_unreachable("attachPreviousDecl on non-redeclarable declaration");
 }
 
+/// Inherit the default template argument from \p From to \p To. Returns
+/// \c false if there is no default template for \p From.
+template <typename ParmDecl>
+static bool inheritDefaultTemplateArgument(ASTContext &Context, ParmDecl *From,
+                                           Decl *ToD) {
+  auto *To = cast<ParmDecl>(ToD);
+  if (!From->hasDefaultArgument())
+    return false;
+  To->setInheritedDefaultArgument(Context, From);
+  return true;
+}
+
+static void inheritDefaultTemplateArguments(ASTContext &Context,
+                                            TemplateDecl *From,
+                                            TemplateDecl *To) {
+  auto *FromTP = From->getTemplateParameters();
+  auto *ToTP = To->getTemplateParameters();
+  assert(FromTP->size() == ToTP->size() && "merged mismatched templates?");
+
+  for (unsigned I = 0, N = FromTP->size(); I != N; ++I) {
+    NamedDecl *FromParam = FromTP->getParam(N - I - 1);
+    NamedDecl *ToParam = ToTP->getParam(N - I - 1);
+
+    if (auto *FTTP = dyn_cast<TemplateTypeParmDecl>(FromParam)) {
+      if (!inheritDefaultTemplateArgument(Context, FTTP, ToParam))
+        break;
+    } else if (auto *FNTTP = dyn_cast<NonTypeTemplateParmDecl>(FromParam)) {
+      if (!inheritDefaultTemplateArgument(Context, FNTTP, ToParam))
+        break;
+    } else {
+      if (!inheritDefaultTemplateArgument(
+              Context, cast<TemplateTemplateParmDecl>(FromParam), ToParam))
+        break;
+    }
+  }
+}
+
 void ASTDeclReader::attachPreviousDecl(ASTReader &Reader, Decl *D,
                                        Decl *Previous, Decl *Canon) {
   assert(D && Previous);
@@ -2930,6 +3002,12 @@ void ASTDeclReader::attachPreviousDecl(ASTReader &Reader, Decl *D,
   // be too.
   if (Previous->Used)
     D->Used = true;
+
+  // If the declaration declares a template, it may inherit default arguments
+  // from the previous declaration.
+  if (TemplateDecl *TD = dyn_cast<TemplateDecl>(D))
+    inheritDefaultTemplateArguments(Reader.getContext(),
+                                    cast<TemplateDecl>(Previous), TD);
 }
 
 template<typename DeclT>
@@ -3579,48 +3657,6 @@ void ASTReader::loadObjCCategories(serialization::GlobalDeclID ID,
   ModuleMgr.visit(ObjCCategoriesVisitor::visit, &Visitor);
 }
 
-namespace {
-/// Iterator over the redeclarations of a declaration that have already
-/// been merged into the same redeclaration chain.
-template<typename DeclT>
-class MergedRedeclIterator {
-  DeclT *Start, *Canonical, *Current;
-public:
-  MergedRedeclIterator() : Current(nullptr) {}
-  MergedRedeclIterator(DeclT *Start)
-      : Start(Start), Canonical(nullptr), Current(Start) {}
-
-  DeclT *operator*() { return Current; }
-
-  MergedRedeclIterator &operator++() {
-    if (Current->isFirstDecl()) {
-      Canonical = Current;
-      Current = Current->getMostRecentDecl();
-    } else
-      Current = Current->getPreviousDecl();
-
-    // If we started in the merged portion, we'll reach our start position
-    // eventually. Otherwise, we'll never reach it, but the second declaration
-    // we reached was the canonical declaration, so stop when we see that one
-    // again.
-    if (Current == Start || Current == Canonical)
-      Current = nullptr;
-    return *this;
-  }
-
-  friend bool operator!=(const MergedRedeclIterator &A,
-                         const MergedRedeclIterator &B) {
-    return A.Current != B.Current;
-  }
-};
-}
-template<typename DeclT>
-llvm::iterator_range<MergedRedeclIterator<DeclT>> merged_redecls(DeclT *D) {
-  return llvm::iterator_range<MergedRedeclIterator<DeclT>>(
-      MergedRedeclIterator<DeclT>(D),
-      MergedRedeclIterator<DeclT>());
-}
-
 template<typename DeclT, typename Fn>
 static void forAllLaterRedecls(DeclT *D, Fn F) {
   F(D);
@@ -3854,6 +3890,9 @@ void ASTDeclReader::UpdateDecl(Decl *D, ModuleFile &ModuleFile,
 
     case UPD_DECL_EXPORTED:
       unsigned SubmoduleID = readSubmoduleID(Record, Idx);
+      auto *Exported = cast<NamedDecl>(D);
+      if (auto *TD = dyn_cast<TagDecl>(Exported))
+        Exported = TD->getDefinition();
       Module *Owner = SubmoduleID ? Reader.getSubmodule(SubmoduleID) : nullptr;
       if (Reader.getContext().getLangOpts().ModulesLocalVisibility) {
         // FIXME: This doesn't send the right notifications if there are
diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTReaderStmt.cpp b/contrib/llvm/tools/clang/lib/Serialization/ASTReaderStmt.cpp
index d1ecd46..c15f6b0 100644
--- a/contrib/llvm/tools/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/contrib/llvm/tools/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -801,6 +801,16 @@ void ASTStmtReader::VisitDesignatedInitExpr(DesignatedInitExpr *E) {
                     Designators.data(), Designators.size());
 }
 
+void ASTStmtReader::VisitDesignatedInitUpdateExpr(DesignatedInitUpdateExpr *E) {
+  VisitExpr(E);
+  E->setBase(Reader.ReadSubExpr());
+  E->setUpdater(Reader.ReadSubExpr());
+}
+
+void ASTStmtReader::VisitNoInitExpr(NoInitExpr *E) {
+  VisitExpr(E);
+}
+
 void ASTStmtReader::VisitImplicitValueInitExpr(ImplicitValueInitExpr *E) {
   VisitExpr(E);
 }
@@ -2063,9 +2073,7 @@ void ASTStmtReader::VisitOMPLoopDirective(OMPLoopDirective *D) {
   D->setLastIteration(Reader.ReadSubExpr());
   D->setCalcLastIteration(Reader.ReadSubExpr());
   D->setPreCond(Reader.ReadSubExpr());
-  auto Fst = Reader.ReadSubExpr();
-  auto Snd = Reader.ReadSubExpr();
-  D->setCond(Fst, Snd);
+  D->setCond(Reader.ReadSubExpr());
   D->setInit(Reader.ReadSubExpr());
   D->setInc(Reader.ReadSubExpr());
   if (isOpenMPWorksharingDirective(D->getDirectiveKind())) {
@@ -2181,6 +2189,11 @@ void ASTStmtReader::VisitOMPTaskwaitDirective(OMPTaskwaitDirective *D) {
   VisitOMPExecutableDirective(D);
 }
 
+void ASTStmtReader::VisitOMPTaskgroupDirective(OMPTaskgroupDirective *D) {
+  VisitStmt(D);
+  VisitOMPExecutableDirective(D);
+}
+
 void ASTStmtReader::VisitOMPFlushDirective(OMPFlushDirective *D) {
   VisitStmt(D);
   // The NumClauses field was read in ReadStmtFromStream.
@@ -2549,10 +2562,18 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
 
       break;
 
+    case EXPR_DESIGNATED_INIT_UPDATE:
+      S = new (Context) DesignatedInitUpdateExpr(Empty);
+      break;
+
     case EXPR_IMPLICIT_VALUE_INIT:
       S = new (Context) ImplicitValueInitExpr(Empty);
       break;
 
+    case EXPR_NO_INIT:
+      S = new (Context) NoInitExpr(Empty);
+      break;
+
     case EXPR_VA_ARG:
       S = new (Context) VAArgExpr(Empty);
       break;
@@ -2787,6 +2808,10 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
       S = OMPTaskwaitDirective::CreateEmpty(Context, Empty);
       break;
 
+    case STMT_OMP_TASKGROUP_DIRECTIVE:
+      S = OMPTaskgroupDirective::CreateEmpty(Context, Empty);
+      break;
+
     case STMT_OMP_FLUSH_DIRECTIVE:
       S = OMPFlushDirective::CreateEmpty(
           Context, Record[ASTStmtReader::NumStmtFields], Empty);
diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTWriter.cpp b/contrib/llvm/tools/clang/lib/Serialization/ASTWriter.cpp
index e689234..5bb0bec 100644
--- a/contrib/llvm/tools/clang/lib/Serialization/ASTWriter.cpp
+++ b/contrib/llvm/tools/clang/lib/Serialization/ASTWriter.cpp
@@ -774,7 +774,9 @@ static void AddStmtsExprs(llvm::BitstreamWriter &Stream,
   RECORD(EXPR_EXT_VECTOR_ELEMENT);
   RECORD(EXPR_INIT_LIST);
   RECORD(EXPR_DESIGNATED_INIT);
+  RECORD(EXPR_DESIGNATED_INIT_UPDATE);
   RECORD(EXPR_IMPLICIT_VALUE_INIT);
+  RECORD(EXPR_NO_INIT);
   RECORD(EXPR_VA_ARG);
   RECORD(EXPR_ADDR_LABEL);
   RECORD(EXPR_STMT);
@@ -5762,8 +5764,5 @@ void ASTWriter::DeclarationMarkedOpenMPThreadPrivate(const Decl *D) {
 void ASTWriter::RedefinedHiddenDefinition(const NamedDecl *D, Module *M) {
   assert(!WritingAST && "Already writing the AST!");
   assert(D->isHidden() && "expected a hidden declaration");
-  if (!D->isFromASTFile())
-    return;
-
   DeclUpdates[D].push_back(DeclUpdate(UPD_DECL_EXPORTED, M));
 }
diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTWriterDecl.cpp b/contrib/llvm/tools/clang/lib/Serialization/ASTWriterDecl.cpp
index 0fa4f93..6c5bc5b 100644
--- a/contrib/llvm/tools/clang/lib/Serialization/ASTWriterDecl.cpp
+++ b/contrib/llvm/tools/clang/lib/Serialization/ASTWriterDecl.cpp
@@ -190,8 +190,7 @@ namespace clang {
         assert(D->isCanonicalDecl() && "non-canonical decl in set");
         Writer.AddDeclRef(D, Record);
       }
-      for (DeclID ID : LazySpecializations)
-        Record.push_back(ID);
+      Record.append(LazySpecializations.begin(), LazySpecializations.end());
     }
   };
 }
@@ -543,7 +542,7 @@ void ASTDeclWriter::VisitObjCMethodDecl(ObjCMethodDecl *D) {
 
   // FIXME: stable encoding for @required/@optional
   Record.push_back(D->getImplementationControl());
-  // FIXME: stable encoding for in/out/inout/bycopy/byref/oneway
+  // FIXME: stable encoding for in/out/inout/bycopy/byref/oneway/nullability
   Record.push_back(D->getObjCDeclQualifier());
   Record.push_back(D->hasRelatedResultType());
   Writer.AddTypeRef(D->getReturnType(), Record);
@@ -679,6 +678,7 @@ void ASTDeclWriter::VisitObjCPropertyDecl(ObjCPropertyDecl *D) {
   VisitNamedDecl(D);
   Writer.AddSourceLocation(D->getAtLoc(), Record);
   Writer.AddSourceLocation(D->getLParenLoc(), Record);
+  Writer.AddTypeRef(D->getType(), Record);
   Writer.AddTypeSourceInfo(D->getTypeSourceInfo(), Record);
   // FIXME: stable encoding
   Record.push_back((unsigned)D->getPropertyAttributes());
@@ -1380,8 +1380,12 @@ void ASTDeclWriter::VisitTemplateTypeParmDecl(TemplateTypeParmDecl *D) {
   VisitTypeDecl(D);
 
   Record.push_back(D->wasDeclaredWithTypename());
-  Record.push_back(D->defaultArgumentWasInherited());
-  Writer.AddTypeSourceInfo(D->getDefaultArgumentInfo(), Record);
+
+  bool OwnsDefaultArg = D->hasDefaultArgument() &&
+                        !D->defaultArgumentWasInherited();
+  Record.push_back(OwnsDefaultArg);
+  if (OwnsDefaultArg)
+    Writer.AddTypeSourceInfo(D->getDefaultArgumentInfo(), Record);
 
   Code = serialization::DECL_TEMPLATE_TYPE_PARM;
 }
@@ -1408,11 +1412,11 @@ void ASTDeclWriter::VisitNonTypeTemplateParmDecl(NonTypeTemplateParmDecl *D) {
   } else {
     // Rest of NonTypeTemplateParmDecl.
     Record.push_back(D->isParameterPack());
-    Record.push_back(D->getDefaultArgument() != nullptr);
-    if (D->getDefaultArgument()) {
+    bool OwnsDefaultArg = D->hasDefaultArgument() &&
+                          !D->defaultArgumentWasInherited();
+    Record.push_back(OwnsDefaultArg);
+    if (OwnsDefaultArg)
       Writer.AddStmt(D->getDefaultArgument());
-      Record.push_back(D->defaultArgumentWasInherited());
-    }
     Code = serialization::DECL_NON_TYPE_TEMPLATE_PARM;
   }
 }
@@ -1437,9 +1441,12 @@ void ASTDeclWriter::VisitTemplateTemplateParmDecl(TemplateTemplateParmDecl *D) {
     Code = serialization::DECL_EXPANDED_TEMPLATE_TEMPLATE_PARM_PACK;
   } else {
     // Rest of TemplateTemplateParmDecl.
-    Writer.AddTemplateArgumentLoc(D->getDefaultArgument(), Record);
-    Record.push_back(D->defaultArgumentWasInherited());
     Record.push_back(D->isParameterPack());
+    bool OwnsDefaultArg = D->hasDefaultArgument() &&
+                          !D->defaultArgumentWasInherited();
+    Record.push_back(OwnsDefaultArg);
+    if (OwnsDefaultArg)
+      Writer.AddTemplateArgumentLoc(D->getDefaultArgument(), Record);
     Code = serialization::DECL_TEMPLATE_TEMPLATE_PARM;
   }
 }
diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTWriterStmt.cpp b/contrib/llvm/tools/clang/lib/Serialization/ASTWriterStmt.cpp
index ec822f0..a461d3f 100644
--- a/contrib/llvm/tools/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/contrib/llvm/tools/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -738,6 +738,18 @@ void ASTStmtWriter::VisitDesignatedInitExpr(DesignatedInitExpr *E) {
   Code = serialization::EXPR_DESIGNATED_INIT;
 }
 
+void ASTStmtWriter::VisitDesignatedInitUpdateExpr(DesignatedInitUpdateExpr *E) {
+  VisitExpr(E);
+  Writer.AddStmt(E->getBase());
+  Writer.AddStmt(E->getUpdater());
+  Code = serialization::EXPR_DESIGNATED_INIT_UPDATE;
+}
+
+void ASTStmtWriter::VisitNoInitExpr(NoInitExpr *E) {
+  VisitExpr(E);
+  Code = serialization::EXPR_NO_INIT;
+}
+
 void ASTStmtWriter::VisitImplicitValueInitExpr(ImplicitValueInitExpr *E) {
   VisitExpr(E);
   Code = serialization::EXPR_IMPLICIT_VALUE_INIT;
@@ -1917,8 +1929,7 @@ void ASTStmtWriter::VisitOMPLoopDirective(OMPLoopDirective *D) {
   Writer.AddStmt(D->getLastIteration());
   Writer.AddStmt(D->getCalcLastIteration());
   Writer.AddStmt(D->getPreCond());
-  Writer.AddStmt(D->getCond(/* SeparateIter */ false));
-  Writer.AddStmt(D->getCond(/* SeparateIter */ true));
+  Writer.AddStmt(D->getCond());
   Writer.AddStmt(D->getInit());
   Writer.AddStmt(D->getInc());
   if (isOpenMPWorksharingDirective(D->getDirectiveKind())) {
@@ -2060,6 +2071,12 @@ void ASTStmtWriter::VisitOMPTaskwaitDirective(OMPTaskwaitDirective *D) {
   Code = serialization::STMT_OMP_TASKWAIT_DIRECTIVE;
 }
 
+void ASTStmtWriter::VisitOMPTaskgroupDirective(OMPTaskgroupDirective *D) {
+  VisitStmt(D);
+  VisitOMPExecutableDirective(D);
+  Code = serialization::STMT_OMP_TASKGROUP_DIRECTIVE;
+}
+
 void ASTStmtWriter::VisitOMPFlushDirective(OMPFlushDirective *D) {
   VisitStmt(D);
   Record.push_back(D->getNumClauses());
diff --git a/contrib/llvm/tools/clang/lib/Serialization/GeneratePCH.cpp b/contrib/llvm/tools/clang/lib/Serialization/GeneratePCH.cpp
index b5031fd..c50b3f9 100644
--- a/contrib/llvm/tools/clang/lib/Serialization/GeneratePCH.cpp
+++ b/contrib/llvm/tools/clang/lib/Serialization/GeneratePCH.cpp
@@ -24,16 +24,14 @@
 
 using namespace clang;
 
-PCHGenerator::PCHGenerator(const Preprocessor &PP,
-                           StringRef OutputFile,
-                           clang::Module *Module,
-                           StringRef isysroot,
-                           raw_ostream *OS, bool AllowASTWithErrors)
-  : PP(PP), OutputFile(OutputFile), Module(Module), 
-    isysroot(isysroot.str()), Out(OS), 
-    SemaPtr(nullptr), Stream(Buffer), Writer(Stream),
-    AllowASTWithErrors(AllowASTWithErrors),
-    HasEmittedPCH(false) {
+PCHGenerator::PCHGenerator(const Preprocessor &PP, StringRef OutputFile,
+                           clang::Module *Module, StringRef isysroot,
+                           std::shared_ptr<PCHBuffer> Buffer,
+                           bool AllowASTWithErrors)
+    : PP(PP), OutputFile(OutputFile), Module(Module), isysroot(isysroot.str()),
+      SemaPtr(nullptr), Buffer(Buffer), Stream(Buffer->Data), Writer(Stream),
+      AllowASTWithErrors(AllowASTWithErrors) {
+  Buffer->IsComplete = false;
 }
 
 PCHGenerator::~PCHGenerator() {
@@ -47,21 +45,12 @@ void PCHGenerator::HandleTranslationUnit(ASTContext &Ctx) {
   bool hasErrors = PP.getDiagnostics().hasErrorOccurred();
   if (hasErrors && !AllowASTWithErrors)
     return;
-  
-  // Emit the PCH file
+
+  // Emit the PCH file to the Buffer.
   assert(SemaPtr && "No Sema?");
   Writer.WriteAST(*SemaPtr, OutputFile, Module, isysroot, hasErrors);
 
-  // Write the generated bitstream to "Out".
-  Out->write((char *)&Buffer.front(), Buffer.size());
-
-  // Make sure it hits disk now.
-  Out->flush();
-
-  // Free up some memory, in case the process is kept alive.
-  Buffer.clear();
-
-  HasEmittedPCH = true;
+  Buffer->IsComplete = true;
 }
 
 ASTMutationListener *PCHGenerator::GetASTMutationListener() {
diff --git a/contrib/llvm/tools/clang/lib/Serialization/GlobalModuleIndex.cpp b/contrib/llvm/tools/clang/lib/Serialization/GlobalModuleIndex.cpp
index 1b52b44..2c7da3e 100644
--- a/contrib/llvm/tools/clang/lib/Serialization/GlobalModuleIndex.cpp
+++ b/contrib/llvm/tools/clang/lib/Serialization/GlobalModuleIndex.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "ASTReaderInternals.h"
+#include "clang/Frontend/PCHContainerOperations.h"
 #include "clang/Basic/FileManager.h"
 #include "clang/Lex/HeaderSearch.h"
 #include "clang/Serialization/ASTBitCodes.h"
@@ -384,6 +385,7 @@ namespace {
   /// \brief Builder that generates the global module index file.
   class GlobalModuleIndexBuilder {
     FileManager &FileMgr;
+    const PCHContainerOperations &PCHContainerOps;
 
     /// \brief Mapping from files to module file information.
     typedef llvm::MapVector<const FileEntry *, ModuleFileInfo> ModuleFilesMap;
@@ -416,7 +418,9 @@ namespace {
     }
 
   public:
-    explicit GlobalModuleIndexBuilder(FileManager &FileMgr) : FileMgr(FileMgr){}
+    explicit GlobalModuleIndexBuilder(
+        FileManager &FileMgr, const PCHContainerOperations &PCHContainerOps)
+        : FileMgr(FileMgr), PCHContainerOps(PCHContainerOps) {}
 
     /// \brief Load the contents of the given module file into the builder.
     ///
@@ -501,8 +505,7 @@ bool GlobalModuleIndexBuilder::loadModuleFile(const FileEntry *File) {
 
   // Initialize the input stream
   llvm::BitstreamReader InStreamFile;
-  InStreamFile.init((const unsigned char *)(*Buffer)->getBufferStart(),
-                    (const unsigned char *)(*Buffer)->getBufferEnd());
+  PCHContainerOps.ExtractPCH((*Buffer)->getMemBufferRef(), InStreamFile);
   llvm::BitstreamCursor InStream(InStreamFile);
 
   // Sniff for the signature.
@@ -764,7 +767,9 @@ void GlobalModuleIndexBuilder::writeIndex(llvm::BitstreamWriter &Stream) {
 }
 
 GlobalModuleIndex::ErrorCode
-GlobalModuleIndex::writeIndex(FileManager &FileMgr, StringRef Path) {
+GlobalModuleIndex::writeIndex(FileManager &FileMgr,
+                              const PCHContainerOperations &PCHContainerOps,
+                              StringRef Path) {
   llvm::SmallString<128> IndexPath;
   IndexPath += Path;
   llvm::sys::path::append(IndexPath, IndexFileName);
@@ -787,8 +792,8 @@ GlobalModuleIndex::writeIndex(FileManager &FileMgr, StringRef Path) {
   }
 
   // The module index builder.
-  GlobalModuleIndexBuilder Builder(FileMgr);
-  
+  GlobalModuleIndexBuilder Builder(FileMgr, PCHContainerOps);
+
   // Load each of the module files.
   std::error_code EC;
   for (llvm::sys::fs::directory_iterator D(Path, EC), DEnd;
diff --git a/contrib/llvm/tools/clang/lib/Serialization/ModuleManager.cpp b/contrib/llvm/tools/clang/lib/Serialization/ModuleManager.cpp
index 30d9c89..03d8ed0 100644
--- a/contrib/llvm/tools/clang/lib/Serialization/ModuleManager.cpp
+++ b/contrib/llvm/tools/clang/lib/Serialization/ModuleManager.cpp
@@ -11,6 +11,7 @@
 //  modules for the ASTReader.
 //
 //===----------------------------------------------------------------------===//
+#include "clang/Frontend/PCHContainerOperations.h"
 #include "clang/Lex/HeaderSearch.h"
 #include "clang/Lex/ModuleMap.h"
 #include "clang/Serialization/GlobalModuleIndex.h"
@@ -136,10 +137,9 @@ ModuleManager::addModule(StringRef FileName, ModuleKind Type,
 
       New->Buffer = std::move(*Buf);
     }
-    
-    // Initialize the stream
-    New->StreamFile.init((const unsigned char *)New->Buffer->getBufferStart(),
-                         (const unsigned char *)New->Buffer->getBufferEnd());
+
+    // Initialize the stream.
+    PCHContainerOps.ExtractPCH(New->Buffer->getMemBufferRef(), New->StreamFile);
   }
 
   if (ExpectedSignature) {
@@ -289,8 +289,10 @@ void ModuleManager::moduleFileAccepted(ModuleFile *MF) {
   ModulesInCommonWithGlobalIndex.push_back(MF);
 }
 
-ModuleManager::ModuleManager(FileManager &FileMgr)
-  : FileMgr(FileMgr), GlobalIndex(), FirstVisitState(nullptr) {}
+ModuleManager::ModuleManager(FileManager &FileMgr,
+                             const PCHContainerOperations &PCHContainerOps)
+    : FileMgr(FileMgr), PCHContainerOps(PCHContainerOps), GlobalIndex(),
+      FirstVisitState(nullptr) {}
 
 ModuleManager::~ModuleManager() {
   for (unsigned i = 0, e = Chain.size(); i != e; ++i)
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp
index d1938a0..4f0b7e5 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp
@@ -29,7 +29,8 @@ using namespace ento;
 
 namespace {
 class ObjCContainersChecker : public Checker< check::PreStmt<CallExpr>,
-                                             check::PostStmt<CallExpr> > {
+                                             check::PostStmt<CallExpr>,
+                                             check::PointerEscape> {
   mutable std::unique_ptr<BugType> BT;
   inline void initBugType() const {
     if (!BT)
@@ -52,6 +53,10 @@ public:
 
   void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
   void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
+  ProgramStateRef checkPointerEscape(ProgramStateRef State,
+                                     const InvalidatedSymbols &Escaped,
+                                     const CallEvent *Call,
+                                     PointerEscapeKind Kind) const;
 };
 } // end anonymous namespace
 
@@ -146,6 +151,24 @@ void ObjCContainersChecker::checkPreStmt(const CallExpr *CE,
   }
 }
 
+ProgramStateRef
+ObjCContainersChecker::checkPointerEscape(ProgramStateRef State,
+                                          const InvalidatedSymbols &Escaped,
+                                          const CallEvent *Call,
+                                          PointerEscapeKind Kind) const {
+  for (InvalidatedSymbols::const_iterator I = Escaped.begin(),
+                                          E = Escaped.end();
+                                          I != E; ++I) {
+    SymbolRef Sym = *I;
+    // When a symbol for a mutable array escapes, we can't reason precisely
+    // about its size any more -- so remove it from the map.
+    // Note that we aren't notified here when a CFMutableArrayRef escapes as a
+    // CFArrayRef. This is because CFArrayRef is typedef'd as a pointer to a
+    // const-qualified type.
+    State = State->remove<ArraySizeMap>(Sym);
+  }
+  return State;
+}
 /// Register checker.
 void ento::registerObjCContainersChecker(CheckerManager &mgr) {
   mgr.registerChecker<ObjCContainersChecker>();
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp
index 58c27d4..49eef23 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp
@@ -906,6 +906,8 @@ static ArgEffect getStopTrackingHardEquivalent(ArgEffect E) {
   case IncRef:
   case IncRefMsg:
   case MakeCollectable:
+  case UnretainedOutParameter:
+  case RetainedOutParameter:
   case MayEscape:
   case StopTracking:
   case StopTrackingHard:
@@ -1335,7 +1337,18 @@ RetainSummaryManager::updateSummaryFromAnnotations(const RetainSummary *&Summ,
     if (pd->hasAttr<NSConsumedAttr>())
       Template->addArg(AF, parm_idx, DecRefMsg);
     else if (pd->hasAttr<CFConsumedAttr>())
-      Template->addArg(AF, parm_idx, DecRef);      
+      Template->addArg(AF, parm_idx, DecRef);
+    else if (pd->hasAttr<CFReturnsRetainedAttr>()) {
+      QualType PointeeTy = pd->getType()->getPointeeType();
+      if (!PointeeTy.isNull())
+        if (coreFoundation::isCFObjectRef(PointeeTy))
+          Template->addArg(AF, parm_idx, RetainedOutParameter);
+    } else if (pd->hasAttr<CFReturnsNotRetainedAttr>()) {
+      QualType PointeeTy = pd->getType()->getPointeeType();
+      if (!PointeeTy.isNull())
+        if (coreFoundation::isCFObjectRef(PointeeTy))
+          Template->addArg(AF, parm_idx, UnretainedOutParameter);
+    }
   }
 
   QualType RetTy = FD->getReturnType();
@@ -1366,7 +1379,17 @@ RetainSummaryManager::updateSummaryFromAnnotations(const RetainSummary *&Summ,
       Template->addArg(AF, parm_idx, DecRefMsg);      
     else if (pd->hasAttr<CFConsumedAttr>()) {
       Template->addArg(AF, parm_idx, DecRef);      
-    }   
+    } else if (pd->hasAttr<CFReturnsRetainedAttr>()) {
+      QualType PointeeTy = pd->getType()->getPointeeType();
+      if (!PointeeTy.isNull())
+        if (coreFoundation::isCFObjectRef(PointeeTy))
+          Template->addArg(AF, parm_idx, RetainedOutParameter);
+    } else if (pd->hasAttr<CFReturnsNotRetainedAttr>()) {
+      QualType PointeeTy = pd->getType()->getPointeeType();
+      if (!PointeeTy.isNull())
+        if (coreFoundation::isCFObjectRef(PointeeTy))
+          Template->addArg(AF, parm_idx, UnretainedOutParameter);
+    }
   }
 
   QualType RetTy = MD->getReturnType();
@@ -2746,7 +2769,6 @@ void RetainCountChecker::checkPostStmt(const CastExpr *CE,
   
   if (hasErr) {
     // FIXME: If we get an error during a bridge cast, should we report it?
-    // Should we assert that there is no error?
     return;
   }
 
@@ -2951,6 +2973,40 @@ void RetainCountChecker::processSummaryOfInlined(const RetainSummary &Summ,
   C.addTransition(state);
 }
 
+static ProgramStateRef updateOutParameter(ProgramStateRef State,
+                                          SVal ArgVal,
+                                          ArgEffect Effect) {
+  auto *ArgRegion = dyn_cast_or_null<TypedValueRegion>(ArgVal.getAsRegion());
+  if (!ArgRegion)
+    return State;
+
+  QualType PointeeTy = ArgRegion->getValueType();
+  if (!coreFoundation::isCFObjectRef(PointeeTy))
+    return State;
+
+  SVal PointeeVal = State->getSVal(ArgRegion);
+  SymbolRef Pointee = PointeeVal.getAsLocSymbol();
+  if (!Pointee)
+    return State;
+
+  switch (Effect) {
+  case UnretainedOutParameter:
+    State = setRefBinding(State, Pointee,
+                          RefVal::makeNotOwned(RetEffect::CF, PointeeTy));
+    break;
+  case RetainedOutParameter:
+    // Do nothing. Retained out parameters will either point to a +1 reference
+    // or NULL, but the way you check for failure differs depending on the API.
+    // Consequently, we don't have a good way to track them yet.
+    break;
+
+  default:
+    llvm_unreachable("only for out parameters");
+  }
+
+  return State;
+}
+
 void RetainCountChecker::checkSummary(const RetainSummary &Summ,
                                       const CallEvent &CallOrMsg,
                                       CheckerContext &C) const {
@@ -2964,9 +3020,12 @@ void RetainCountChecker::checkSummary(const RetainSummary &Summ,
   for (unsigned idx = 0, e = CallOrMsg.getNumArgs(); idx != e; ++idx) {
     SVal V = CallOrMsg.getArgSVal(idx);
 
-    if (SymbolRef Sym = V.getAsLocSymbol()) {
+    ArgEffect Effect = Summ.getArg(idx);
+    if (Effect == RetainedOutParameter || Effect == UnretainedOutParameter) {
+      state = updateOutParameter(state, V, Effect);
+    } else if (SymbolRef Sym = V.getAsLocSymbol()) {
       if (const RefVal *T = getRefBinding(state, Sym)) {
-        state = updateSymbol(state, Sym, *T, Summ.getArg(idx), hasErr, C);
+        state = updateSymbol(state, Sym, *T, Effect, hasErr, C);
         if (hasErr) {
           ErrorRange = CallOrMsg.getArgSourceRange(idx);
           ErrorSym = Sym;
@@ -3115,6 +3174,11 @@ RetainCountChecker::updateSymbol(ProgramStateRef state, SymbolRef sym,
     case DecRefMsgAndStopTrackingHard:
       llvm_unreachable("DecRefMsg/IncRefMsg/MakeCollectable already converted");
 
+    case UnretainedOutParameter:
+    case RetainedOutParameter:
+      llvm_unreachable("Applies to pointer-to-pointer parameters, which should "
+                       "not have ref state.");
+
     case Dealloc:
       // Any use of -dealloc in GC is *bad*.
       if (C.isObjCGCEnabled()) {
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
index b906cc9..fa7884f 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
@@ -1529,7 +1529,7 @@ LikelyFalsePositiveSuppressionBRVisitor::getEndPath(BugReporterContext &BRC,
       return nullptr;
 
     } else {
-      // If the the complete 'std' suppression is not enabled, suppress reports
+      // If the complete 'std' suppression is not enabled, suppress reports
       // from the 'std' namespace that are known to produce false positives.
 
       // The analyzer issues a false use-after-free when std::list::pop_front
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/CallEvent.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/CallEvent.cpp
index f673544..8542f7d 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/CallEvent.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/CallEvent.cpp
@@ -785,7 +785,7 @@ RuntimeDefinition ObjCMethodCall::getRuntimeDefinition() const {
 
   if (E->isInstanceMessage()) {
 
-    // Find the the receiver type.
+    // Find the receiver type.
     const ObjCObjectPointerType *ReceiverT = nullptr;
     bool CanBeSubClassed = false;
     QualType SupersType = E->getSuperType();
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 8b7f18f..ef515fb 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -816,6 +816,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
     case Stmt::OMPTaskyieldDirectiveClass:
     case Stmt::OMPBarrierDirectiveClass:
     case Stmt::OMPTaskwaitDirectiveClass:
+    case Stmt::OMPTaskgroupDirectiveClass:
     case Stmt::OMPFlushDirectiveClass:
     case Stmt::OMPOrderedDirectiveClass:
     case Stmt::OMPAtomicDirectiveClass:
@@ -859,6 +860,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
 
     // Cases not handled yet; but will handle some day.
     case Stmt::DesignatedInitExprClass:
+    case Stmt::DesignatedInitUpdateExprClass:
     case Stmt::ExtVectorElementExprClass:
     case Stmt::ImaginaryLiteralClass:
     case Stmt::ObjCAtCatchStmtClass:
@@ -891,6 +893,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
     case Stmt::CXXBoolLiteralExprClass:
     case Stmt::ObjCBoolLiteralExprClass:
     case Stmt::FloatingLiteralClass:
+    case Stmt::NoInitExprClass:
     case Stmt::SizeOfPackExprClass:
     case Stmt::StringLiteralClass:
     case Stmt::ObjCStringLiteralClass:
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
index 3c1a3b4..cfcf7c6 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
@@ -22,6 +22,7 @@
 #include "clang/Rewrite/Core/Rewriter.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h"
 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "llvm/Support/Errc.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
@@ -306,7 +307,7 @@ void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D,
                                                FD,
                                                llvm::sys::fs::F_RW |
                                                llvm::sys::fs::F_Excl);
-          if (EC && EC != std::errc::file_exists) {
+          if (EC && EC != llvm::errc::file_exists) {
               llvm::errs() << "warning: could not create file '" << Model
                            << "': " << EC.message() << '\n';
               return;
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Frontend/ModelInjector.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Frontend/ModelInjector.cpp
index 699549f..ee2c3f5 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Frontend/ModelInjector.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Frontend/ModelInjector.cpp
@@ -76,7 +76,7 @@ void ModelInjector::onBodySynthesis(const NamedDecl *D) {
 
   // Modules are parsed by a separate CompilerInstance, so this code mimics that
   // behavior for models
-  CompilerInstance Instance;
+  CompilerInstance Instance(CI.getPCHContainerOperations());
   Instance.setInvocation(&*Invocation);
   Instance.createDiagnostics(
       new ForwardingDiagnosticConsumer(CI.getDiagnosticClient()),
diff --git a/contrib/llvm/tools/clang/lib/Tooling/Core/Replacement.cpp b/contrib/llvm/tools/clang/lib/Tooling/Core/Replacement.cpp
index 32e8e5b..6d37a49 100644
--- a/contrib/llvm/tools/clang/lib/Tooling/Core/Replacement.cpp
+++ b/contrib/llvm/tools/clang/lib/Tooling/Core/Replacement.cpp
@@ -88,8 +88,13 @@ std::string Replacement::toString() const {
 bool operator<(const Replacement &LHS, const Replacement &RHS) {
   if (LHS.getOffset() != RHS.getOffset())
     return LHS.getOffset() < RHS.getOffset();
+
+  // Apply longer replacements first, specifically so that deletions are
+  // executed before insertions. It is (hopefully) never the intention to
+  // delete parts of newly inserted code.
   if (LHS.getLength() != RHS.getLength())
-    return LHS.getLength() < RHS.getLength();
+    return LHS.getLength() > RHS.getLength();
+
   if (LHS.getFilePath() != RHS.getFilePath())
     return LHS.getFilePath() < RHS.getFilePath();
   return LHS.getReplacementText() < RHS.getReplacementText();
diff --git a/contrib/llvm/tools/clang/lib/Tooling/Refactoring.cpp b/contrib/llvm/tools/clang/lib/Tooling/Refactoring.cpp
index c817306..d32452f 100644
--- a/contrib/llvm/tools/clang/lib/Tooling/Refactoring.cpp
+++ b/contrib/llvm/tools/clang/lib/Tooling/Refactoring.cpp
@@ -25,9 +25,10 @@
 namespace clang {
 namespace tooling {
 
-RefactoringTool::RefactoringTool(const CompilationDatabase &Compilations,
-                                 ArrayRef<std::string> SourcePaths)
-  : ClangTool(Compilations, SourcePaths) {}
+RefactoringTool::RefactoringTool(
+    const CompilationDatabase &Compilations, ArrayRef<std::string> SourcePaths,
+    std::shared_ptr<PCHContainerOperations> PCHContainerOps)
+    : ClangTool(Compilations, SourcePaths, PCHContainerOps) {}
 
 Replacements &RefactoringTool::getReplacements() { return Replace; }
 
diff --git a/contrib/llvm/tools/clang/lib/Tooling/Tooling.cpp b/contrib/llvm/tools/clang/lib/Tooling/Tooling.cpp
index e100003..f9cb7c6 100644
--- a/contrib/llvm/tools/clang/lib/Tooling/Tooling.cpp
+++ b/contrib/llvm/tools/clang/lib/Tooling/Tooling.cpp
@@ -105,9 +105,10 @@ clang::CompilerInvocation *newInvocation(
 }
 
 bool runToolOnCode(clang::FrontendAction *ToolAction, const Twine &Code,
-                   const Twine &FileName) {
-  return runToolOnCodeWithArgs(
-      ToolAction, Code, std::vector<std::string>(), FileName);
+                   const Twine &FileName,
+                   std::shared_ptr<PCHContainerOperations> PCHContainerOps) {
+  return runToolOnCodeWithArgs(ToolAction, Code, std::vector<std::string>(),
+                               FileName, PCHContainerOps);
 }
 
 static std::vector<std::string>
@@ -121,17 +122,18 @@ getSyntaxOnlyToolArgs(const std::vector<std::string> &ExtraArgs,
   return Args;
 }
 
-bool runToolOnCodeWithArgs(clang::FrontendAction *ToolAction, const Twine &Code,
-                           const std::vector<std::string> &Args,
-                           const Twine &FileName,
-                           const FileContentMappings &VirtualMappedFiles) {
+bool runToolOnCodeWithArgs(
+    clang::FrontendAction *ToolAction, const Twine &Code,
+    const std::vector<std::string> &Args, const Twine &FileName,
+    std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+    const FileContentMappings &VirtualMappedFiles) {
 
   SmallString<16> FileNameStorage;
   StringRef FileNameRef = FileName.toNullTerminatedStringRef(FileNameStorage);
   llvm::IntrusiveRefCntPtr<FileManager> Files(
       new FileManager(FileSystemOptions()));
   ToolInvocation Invocation(getSyntaxOnlyToolArgs(Args, FileNameRef),
-                            ToolAction, Files.get());
+                            ToolAction, Files.get(), PCHContainerOps);
 
   SmallString<1024> CodeStorage;
   Invocation.mapVirtualFile(FileNameRef,
@@ -173,21 +175,18 @@ public:
 
 }
 
-ToolInvocation::ToolInvocation(std::vector<std::string> CommandLine,
-                               ToolAction *Action, FileManager *Files)
-    : CommandLine(std::move(CommandLine)),
-      Action(Action),
-      OwnsAction(false),
-      Files(Files),
-      DiagConsumer(nullptr) {}
+ToolInvocation::ToolInvocation(
+    std::vector<std::string> CommandLine, ToolAction *Action,
+    FileManager *Files, std::shared_ptr<PCHContainerOperations> PCHContainerOps)
+    : CommandLine(std::move(CommandLine)), Action(Action), OwnsAction(false),
+      Files(Files), PCHContainerOps(PCHContainerOps), DiagConsumer(nullptr) {}
 
-ToolInvocation::ToolInvocation(std::vector<std::string> CommandLine,
-                               FrontendAction *FAction, FileManager *Files)
+ToolInvocation::ToolInvocation(
+    std::vector<std::string> CommandLine, FrontendAction *FAction,
+    FileManager *Files, std::shared_ptr<PCHContainerOperations> PCHContainerOps)
     : CommandLine(std::move(CommandLine)),
-      Action(new SingleFrontendActionFactory(FAction)),
-      OwnsAction(true),
-      Files(Files),
-      DiagConsumer(nullptr) {}
+      Action(new SingleFrontendActionFactory(FAction)), OwnsAction(true),
+      Files(Files), PCHContainerOps(PCHContainerOps), DiagConsumer(nullptr) {}
 
 ToolInvocation::~ToolInvocation() {
   if (OwnsAction)
@@ -232,13 +231,14 @@ bool ToolInvocation::run() {
     Invocation->getPreprocessorOpts().addRemappedFile(It.getKey(),
                                                       Input.release());
   }
-  return runInvocation(BinaryName, Compilation.get(), Invocation.release());
+  return runInvocation(BinaryName, Compilation.get(), Invocation.release(),
+                       PCHContainerOps);
 }
 
 bool ToolInvocation::runInvocation(
-    const char *BinaryName,
-    clang::driver::Compilation *Compilation,
-    clang::CompilerInvocation *Invocation) {
+    const char *BinaryName, clang::driver::Compilation *Compilation,
+    clang::CompilerInvocation *Invocation,
+    std::shared_ptr<PCHContainerOperations> PCHContainerOps) {
   // Show the invocation, with -v.
   if (Invocation->getHeaderSearchOpts().Verbose) {
     llvm::errs() << "clang Invocation:\n";
@@ -246,14 +246,16 @@ bool ToolInvocation::runInvocation(
     llvm::errs() << "\n";
   }
 
-  return Action->runInvocation(Invocation, Files, DiagConsumer);
+  return Action->runInvocation(Invocation, Files, PCHContainerOps,
+                               DiagConsumer);
 }
 
-bool FrontendActionFactory::runInvocation(CompilerInvocation *Invocation,
-                                          FileManager *Files,
-                                          DiagnosticConsumer *DiagConsumer) {
+bool FrontendActionFactory::runInvocation(
+    CompilerInvocation *Invocation, FileManager *Files,
+    std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+    DiagnosticConsumer *DiagConsumer) {
   // Create a compiler instance to handle the actual work.
-  clang::CompilerInstance Compiler;
+  clang::CompilerInstance Compiler(PCHContainerOps);
   Compiler.setInvocation(Invocation);
   Compiler.setFileManager(Files);
 
@@ -276,8 +278,10 @@ bool FrontendActionFactory::runInvocation(CompilerInvocation *Invocation,
 }
 
 ClangTool::ClangTool(const CompilationDatabase &Compilations,
-                     ArrayRef<std::string> SourcePaths)
+                     ArrayRef<std::string> SourcePaths,
+                     std::shared_ptr<PCHContainerOperations> PCHContainerOps)
     : Compilations(Compilations), SourcePaths(SourcePaths),
+      PCHContainerOps(PCHContainerOps),
       Files(new FileManager(FileSystemOptions())), DiagConsumer(nullptr) {
   appendArgumentsAdjuster(getClangStripOutputAdjuster());
   appendArgumentsAdjuster(getClangSyntaxOnlyAdjuster());
@@ -357,7 +361,8 @@ int ClangTool::run(ToolAction *Action) {
       // FIXME: We need a callback mechanism for the tool writer to output a
       // customized message for each file.
       DEBUG({ llvm::dbgs() << "Processing: " << File << ".\n"; });
-      ToolInvocation Invocation(std::move(CommandLine), Action, Files.get());
+      ToolInvocation Invocation(std::move(CommandLine), Action, Files.get(),
+                                PCHContainerOps);
       Invocation.setDiagnosticConsumer(DiagConsumer);
       for (const auto &MappedFile : MappedFileContents)
         Invocation.mapVirtualFile(MappedFile.first, MappedFile.second);
@@ -385,12 +390,14 @@ public:
   ASTBuilderAction(std::vector<std::unique_ptr<ASTUnit>> &ASTs) : ASTs(ASTs) {}
 
   bool runInvocation(CompilerInvocation *Invocation, FileManager *Files,
+                     std::shared_ptr<PCHContainerOperations> PCHContainerOps,
                      DiagnosticConsumer *DiagConsumer) override {
     // FIXME: This should use the provided FileManager.
     std::unique_ptr<ASTUnit> AST = ASTUnit::LoadFromCompilerInvocation(
-        Invocation, CompilerInstance::createDiagnostics(
-                        &Invocation->getDiagnosticOpts(), DiagConsumer,
-                        /*ShouldOwnClient=*/false));
+        Invocation, PCHContainerOps,
+        CompilerInstance::createDiagnostics(&Invocation->getDiagnosticOpts(),
+                                            DiagConsumer,
+                                            /*ShouldOwnClient=*/false));
     if (!AST)
       return false;
 
@@ -406,22 +413,24 @@ int ClangTool::buildASTs(std::vector<std::unique_ptr<ASTUnit>> &ASTs) {
   return run(&Action);
 }
 
-std::unique_ptr<ASTUnit> buildASTFromCode(const Twine &Code,
-                                          const Twine &FileName) {
-  return buildASTFromCodeWithArgs(Code, std::vector<std::string>(), FileName);
+std::unique_ptr<ASTUnit>
+buildASTFromCode(const Twine &Code, const Twine &FileName,
+                 std::shared_ptr<PCHContainerOperations> PCHContainerOps) {
+  return buildASTFromCodeWithArgs(Code, std::vector<std::string>(), FileName,
+                                  PCHContainerOps);
 }
 
-std::unique_ptr<ASTUnit>
-buildASTFromCodeWithArgs(const Twine &Code,
-                         const std::vector<std::string> &Args,
-                         const Twine &FileName) {
+std::unique_ptr<ASTUnit> buildASTFromCodeWithArgs(
+    const Twine &Code, const std::vector<std::string> &Args,
+    const Twine &FileName,
+    std::shared_ptr<PCHContainerOperations> PCHContainerOps) {
   SmallString<16> FileNameStorage;
   StringRef FileNameRef = FileName.toNullTerminatedStringRef(FileNameStorage);
 
   std::vector<std::unique_ptr<ASTUnit>> ASTs;
   ASTBuilderAction Action(ASTs);
   ToolInvocation Invocation(getSyntaxOnlyToolArgs(Args, FileNameRef), &Action,
-                            nullptr);
+                            nullptr, PCHContainerOps);
 
   SmallString<1024> CodeStorage;
   Invocation.mapVirtualFile(FileNameRef,
diff --git a/contrib/llvm/tools/clang/tools/driver/cc1_main.cpp b/contrib/llvm/tools/clang/tools/driver/cc1_main.cpp
index fa1a10e..972bf5b 100644
--- a/contrib/llvm/tools/clang/tools/driver/cc1_main.cpp
+++ b/contrib/llvm/tools/clang/tools/driver/cc1_main.cpp
@@ -14,6 +14,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Option/Arg.h"
+#include "clang/Frontend/PCHContainerOperations.h"
 #include "clang/Driver/DriverDiagnostic.h"
 #include "clang/Driver/Options.h"
 #include "clang/Frontend/CompilerInstance.h"
@@ -64,7 +65,8 @@ void initializePollyPasses(llvm::PassRegistry &Registry);
 #endif
 
 int cc1_main(ArrayRef<const char *> Argv, const char *Argv0, void *MainAddr) {
-  std::unique_ptr<CompilerInstance> Clang(new CompilerInstance());
+  std::unique_ptr<CompilerInstance> Clang(
+      new CompilerInstance(std::make_shared<RawPCHContainerOperations>()));
   IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
 
   // Initialize targets first, so that --version shows registered targets.
diff --git a/contrib/llvm/tools/clang/tools/driver/cc1as_main.cpp b/contrib/llvm/tools/clang/tools/driver/cc1as_main.cpp
index f73d07b..f7ac17f 100644
--- a/contrib/llvm/tools/clang/tools/driver/cc1as_main.cpp
+++ b/contrib/llvm/tools/clang/tools/driver/cc1as_main.cpp
@@ -323,8 +323,8 @@ static bool ExecuteAssembler(AssemblerInvocation &Opts,
 
   MCContext Ctx(MAI.get(), MRI.get(), MOFI.get(), &SrcMgr);
   // FIXME: Assembler behavior can change with -static.
-  MOFI->InitMCObjectFileInfo(Opts.Triple,
-                             Reloc::Default, CodeModel::Default, Ctx);
+  MOFI->InitMCObjectFileInfo(Triple(Opts.Triple), Reloc::Default,
+                             CodeModel::Default, Ctx);
   if (Opts.SaveTemporaryLabels)
     Ctx.setAllowTemporaryLabels(false);
   if (Opts.GenDwarfForAssembly)
diff --git a/contrib/llvm/tools/clang/tools/driver/driver.cpp b/contrib/llvm/tools/clang/tools/driver/driver.cpp
index 9ea960d..c0bcc36 100644
--- a/contrib/llvm/tools/clang/tools/driver/driver.cpp
+++ b/contrib/llvm/tools/clang/tools/driver/driver.cpp
@@ -43,6 +43,7 @@
 #include "llvm/Support/Program.h"
 #include "llvm/Support/Regex.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/StringSaver.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/Timer.h"
@@ -292,18 +293,6 @@ static void ParseProgName(SmallVectorImpl<const char *> &ArgVector,
   }
 }
 
-namespace {
-  class StringSetSaver : public llvm::cl::StringSaver {
-  public:
-    StringSetSaver(std::set<std::string> &Storage) : Storage(Storage) {}
-    const char *SaveString(const char *Str) override {
-      return GetStableCStr(Storage, Str);
-    }
-  private:
-    std::set<std::string> &Storage;
-  };
-}
-
 static void SetBackdoorDriverOutputsFromEnvVars(Driver &TheDriver) {
   // Handle CC_PRINT_OPTIONS and CC_PRINT_OPTIONS_FILE.
   TheDriver.CCPrintOptions = !!::getenv("CC_PRINT_OPTIONS");
@@ -393,8 +382,8 @@ int main(int argc_, const char **argv_) {
     return 1;
   }
 
-  std::set<std::string> SavedStrings;
-  StringSetSaver Saver(SavedStrings);
+  llvm::BumpPtrAllocator A;
+  llvm::BumpPtrStringSaver Saver(A);
 
   // Determines whether we want nullptr markers in argv to indicate response
   // files end-of-lines. We only use this for the /LINK driver argument.
@@ -428,6 +417,7 @@ int main(int argc_, const char **argv_) {
     }
   }
 
+  std::set<std::string> SavedStrings;
   // Handle CCC_OVERRIDE_OPTIONS, used for editing a command line behind the
   // scenes.
   if (const char *OverrideStr = ::getenv("CCC_OVERRIDE_OPTIONS")) {
diff --git a/contrib/llvm/tools/clang/utils/TableGen/ClangAttrEmitter.cpp b/contrib/llvm/tools/clang/utils/TableGen/ClangAttrEmitter.cpp
index 11a766c..f79c4a5 100644
--- a/contrib/llvm/tools/clang/utils/TableGen/ClangAttrEmitter.cpp
+++ b/contrib/llvm/tools/clang/utils/TableGen/ClangAttrEmitter.cpp
@@ -2721,7 +2721,8 @@ void EmitClangAttrParsedAttrKinds(RecordKeeper &Records, raw_ostream &OS) {
   StringMatcher("Name", Declspec, OS).Emit();
   OS << "  } else if (AttributeList::AS_CXX11 == Syntax) {\n";
   StringMatcher("Name", CXX11, OS).Emit();
-  OS << "  } else if (AttributeList::AS_Keyword == Syntax) {\n";
+  OS << "  } else if (AttributeList::AS_Keyword == Syntax || ";
+  OS << "AttributeList::AS_ContextSensitiveKeyword == Syntax) {\n";
   StringMatcher("Name", Keywords, OS).Emit();
   OS << "  } else if (AttributeList::AS_Pragma == Syntax) {\n";
   StringMatcher("Name", Pragma, OS).Emit();
diff --git a/contrib/llvm/tools/llc/llc.cpp b/contrib/llvm/tools/llc/llc.cpp
index fadcfa9..88e7371 100644
--- a/contrib/llvm/tools/llc/llc.cpp
+++ b/contrib/llvm/tools/llc/llc.cpp
@@ -210,6 +210,7 @@ static int compileModule(char **argv, LLVMContext &Context) {
   // Load the module to be compiled...
   SMDiagnostic Err;
   std::unique_ptr<Module> M;
+  std::unique_ptr<MIRParser> MIR;
   Triple TheTriple;
 
   bool SkipModule = MCPU == "help" ||
@@ -217,9 +218,13 @@ static int compileModule(char **argv, LLVMContext &Context) {
 
   // If user just wants to list available options, skip module loading
   if (!SkipModule) {
-    if (StringRef(InputFilename).endswith_lower(".mir"))
-      M = parseMIRFile(InputFilename, Err, Context);
-    else
+    if (StringRef(InputFilename).endswith_lower(".mir")) {
+      MIR = createMIRParserFromFile(InputFilename, Err, Context);
+      if (MIR) {
+        M = MIR->parseLLVMModule();
+        assert(M && "parseLLVMModule should exit on failure");
+      }
+    } else
       M = parseIRFile(InputFilename, Err, Context);
     if (!M) {
       Err.print(argv[0], errs());
@@ -350,7 +355,7 @@ static int compileModule(char **argv, LLVMContext &Context) {
 
     // Ask the target to add backend passes as necessary.
     if (Target->addPassesToEmitFile(PM, *OS, FileType, NoVerify, StartAfterID,
-                                    StopAfterID)) {
+                                    StopAfterID, MIR.get())) {
       errs() << argv[0] << ": target does not support generation of this"
              << " file type!\n";
       return 1;
diff --git a/contrib/llvm/tools/lli/OrcLazyJIT.cpp b/contrib/llvm/tools/lli/OrcLazyJIT.cpp
index afccfa6..ae276e6 100644
--- a/contrib/llvm/tools/lli/OrcLazyJIT.cpp
+++ b/contrib/llvm/tools/lli/OrcLazyJIT.cpp
@@ -131,7 +131,7 @@ int llvm::runOrcLazyJIT(std::unique_ptr<Module> M, int ArgC, char* ArgV[]) {
   // manager for this target. Bail out.
   if (!CallbackMgrBuilder) {
     errs() << "No callback manager available for target '"
-           << TM->getTargetTriple() << "'.\n";
+           << TM->getTargetTriple().str() << "'.\n";
     return 1;
   }
 
diff --git a/contrib/llvm/tools/lli/OrcLazyJIT.h b/contrib/llvm/tools/lli/OrcLazyJIT.h
index c4a12b6..9257225 100644
--- a/contrib/llvm/tools/lli/OrcLazyJIT.h
+++ b/contrib/llvm/tools/lli/OrcLazyJIT.h
@@ -55,7 +55,7 @@ public:
       CompileLayer(ObjectLayer, orc::SimpleCompiler(*this->TM)),
       IRDumpLayer(CompileLayer, createDebugDumper()),
       CCMgr(BuildCallbackMgr(IRDumpLayer, CCMgrMemMgr, Context)),
-      CODLayer(IRDumpLayer, *CCMgr),
+      CODLayer(IRDumpLayer, *CCMgr, false),
       CXXRuntimeOverrides([this](const std::string &S) { return mangle(S); }) {}
 
   ~OrcLazyJIT() {
@@ -88,22 +88,24 @@ public:
     //   1) Search the JIT symbols.
     //   2) Check for C++ runtime overrides.
     //   3) Search the host process (LLI)'s symbol table.
-    auto Resolver =
+    std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver =
       orc::createLambdaResolver(
         [this](const std::string &Name) {
-
           if (auto Sym = CODLayer.findSymbol(Name, true))
-            return RuntimeDyld::SymbolInfo(Sym.getAddress(), Sym.getFlags());
-
+            return RuntimeDyld::SymbolInfo(Sym.getAddress(),
+                                           Sym.getFlags());
           if (auto Sym = CXXRuntimeOverrides.searchOverrides(Name))
             return Sym;
 
-          if (auto Addr = RTDyldMemoryManager::getSymbolAddressInProcess(Name))
+          if (auto Addr =
+              RTDyldMemoryManager::getSymbolAddressInProcess(Name))
             return RuntimeDyld::SymbolInfo(Addr, JITSymbolFlags::Exported);
 
           return RuntimeDyld::SymbolInfo(nullptr);
         },
-        [](const std::string &Name) { return RuntimeDyld::SymbolInfo(nullptr); }
+        [](const std::string &Name) {
+          return RuntimeDyld::SymbolInfo(nullptr);
+        }
       );
 
     // Add the module to the JIT.
diff --git a/contrib/llvm/tools/llvm-ar/llvm-ar.cpp b/contrib/llvm/tools/llvm-ar/llvm-ar.cpp
index 1f55e8a..6782b9c 100644
--- a/contrib/llvm/tools/llvm-ar/llvm-ar.cpp
+++ b/contrib/llvm/tools/llvm-ar/llvm-ar.cpp
@@ -15,6 +15,7 @@
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
+#include "llvm/LibDriver/LibDriver.h"
 #include "llvm/Object/Archive.h"
 #include "llvm/Object/ArchiveWriter.h"
 #include "llvm/Object/ObjectFile.h"
@@ -716,6 +717,15 @@ int main(int argc, char **argv) {
   PrettyStackTraceProgram X(argc, argv);
   llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
 
+  llvm::InitializeAllTargetInfos();
+  llvm::InitializeAllTargetMCs();
+  llvm::InitializeAllAsmParsers();
+
+  StringRef Stem = sys::path::stem(ToolName);
+  if (Stem.find("ranlib") == StringRef::npos &&
+      Stem.find("lib") != StringRef::npos)
+    return libDriverMain(argc, const_cast<const char **>(argv));
+
   // Have the command line options parsed and handle things
   // like --help and --version.
   cl::ParseCommandLineOptions(argc, argv,
@@ -723,14 +733,9 @@ int main(int argc, char **argv) {
     "  This program archives bitcode files into single libraries\n"
   );
 
-  llvm::InitializeAllTargetInfos();
-  llvm::InitializeAllTargetMCs();
-  llvm::InitializeAllAsmParsers();
-
-  StringRef Stem = sys::path::stem(ToolName);
   if (Stem.find("ar") != StringRef::npos)
     return ar_main();
   if (Stem.find("ranlib") != StringRef::npos)
     return ranlib_main();
-  fail("Not ranlib or ar!");
+  fail("Not ranlib, ar or lib!");
 }
diff --git a/contrib/llvm/tools/llvm-dis/llvm-dis.cpp b/contrib/llvm/tools/llvm-dis/llvm-dis.cpp
index 26f14b9..4b7d94d 100644
--- a/contrib/llvm/tools/llvm-dis/llvm-dis.cpp
+++ b/contrib/llvm/tools/llvm-dis/llvm-dis.cpp
@@ -148,7 +148,8 @@ int main(int argc, char **argv) {
   std::unique_ptr<Module> M;
 
   // Use the bitcode streaming interface
-  DataStreamer *Streamer = getDataFileStreamer(InputFilename, &ErrorMessage);
+  std::unique_ptr<DataStreamer> Streamer =
+      getDataFileStreamer(InputFilename, &ErrorMessage);
   if (Streamer) {
     std::string DisplayFilename;
     if (InputFilename == "-")
@@ -156,7 +157,7 @@ int main(int argc, char **argv) {
     else
       DisplayFilename = InputFilename;
     ErrorOr<std::unique_ptr<Module>> MOrErr =
-        getStreamedBitcodeModule(DisplayFilename, Streamer, Context);
+        getStreamedBitcodeModule(DisplayFilename, std::move(Streamer), Context);
     M = std::move(*MOrErr);
     M->materializeAllPermanently();
   } else {
diff --git a/contrib/llvm/tools/llvm-dwarfdump/LLVMBuild.txt b/contrib/llvm/tools/llvm-dwarfdump/LLVMBuild.txt
deleted file mode 100644
index a9dca3e..0000000
--- a/contrib/llvm/tools/llvm-dwarfdump/LLVMBuild.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-;===- ./tools/llvm-dwarfdump/LLVMBuild.txt ---------------------*- Conf -*--===;
-;
-;                     The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-;   http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[component_0]
-type = Tool
-name = llvm-dwarfdump
-parent = Tools
-required_libraries = DebugInfoDWARF Object
diff --git a/contrib/llvm/tools/llvm-dwarfdump/fuzzer/CMakeLists.txt b/contrib/llvm/tools/llvm-dwarfdump/fuzzer/CMakeLists.txt
deleted file mode 100644
index 1de35a3..0000000
--- a/contrib/llvm/tools/llvm-dwarfdump/fuzzer/CMakeLists.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-set(LLVM_LINK_COMPONENTS
-  DebugInfoDWARF
-  Object
-  Support
-  )
-
-add_llvm_executable(llvm-dwarfdump-fuzzer
-  EXCLUDE_FROM_ALL
-  llvm-dwarfdump-fuzzer.cpp
-  )
-
-target_link_libraries(llvm-dwarfdump-fuzzer
-  LLVMFuzzer
-  )
diff --git a/contrib/llvm/tools/llvm-dwarfdump/fuzzer/llvm-dwarfdump-fuzzer.cpp b/contrib/llvm/tools/llvm-dwarfdump/fuzzer/llvm-dwarfdump-fuzzer.cpp
deleted file mode 100644
index af0ac36..0000000
--- a/contrib/llvm/tools/llvm-dwarfdump/fuzzer/llvm-dwarfdump-fuzzer.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-//===-- llvm-dwarfdump-fuzzer.cpp - Fuzz the llvm-dwarfdump tool ----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// \brief This file implements a function that runs llvm-dwarfdump
-///  on a single input. This function is then linked into the Fuzzer library.
-///
-//===----------------------------------------------------------------------===//
-#include "llvm/DebugInfo/DIContext.h"
-#include "llvm/DebugInfo/DWARF/DWARFContext.h"
-#include "llvm/Object/ObjectFile.h"
-#include "llvm/Support/MemoryBuffer.h"
-
-using namespace llvm;
-using namespace object;
-
-extern "C" void LLVMFuzzerTestOneInput(uint8_t *data, size_t size) {
-  std::unique_ptr<MemoryBuffer> Buff = MemoryBuffer::getMemBuffer(
-      StringRef((const char *)data, size), "", false);
-
-  ErrorOr<std::unique_ptr<ObjectFile>> ObjOrErr =
-      ObjectFile::createObjectFile(Buff->getMemBufferRef());
-  if (!ObjOrErr)
-    return;
-  ObjectFile &Obj = *ObjOrErr.get();
-  std::unique_ptr<DIContext> DICtx(new DWARFContextInMemory(Obj));
-  DICtx->dump(nulls(), DIDT_All);
-}
diff --git a/contrib/llvm/tools/llvm-mc/llvm-mc.cpp b/contrib/llvm/tools/llvm-mc/llvm-mc.cpp
index 9a9185c..6ecdb2e 100644
--- a/contrib/llvm/tools/llvm-mc/llvm-mc.cpp
+++ b/contrib/llvm/tools/llvm-mc/llvm-mc.cpp
@@ -383,7 +383,6 @@ int main(int argc, char **argv) {
   cl::ParseCommandLineOptions(argc, argv, "llvm machine code playground\n");
   MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags();
   TripleName = Triple::normalize(TripleName);
-  Triple TheTriple(TripleName);
   setDwarfDebugFlags(argc, argv);
 
   setDwarfDebugProducer();
@@ -392,6 +391,9 @@ int main(int argc, char **argv) {
   const Target *TheTarget = GetTarget(ProgName);
   if (!TheTarget)
     return 1;
+  // Now that GetTarget() has (potentially) replaced TripleName, it's safe to
+  // construct the Triple object.
+  Triple TheTriple(TripleName);
 
   ErrorOr<std::unique_ptr<MemoryBuffer>> BufferPtr =
       MemoryBuffer::getFileOrSTDIN(InputFilename);
@@ -429,7 +431,7 @@ int main(int argc, char **argv) {
   // MCObjectFileInfo needs a MCContext reference in order to initialize itself.
   MCObjectFileInfo MOFI;
   MCContext Ctx(MAI.get(), MRI.get(), &MOFI, &SrcMgr);
-  MOFI.InitMCObjectFileInfo(TripleName, RelocModel, CMModel, Ctx);
+  MOFI.InitMCObjectFileInfo(TheTriple, RelocModel, CMModel, Ctx);
 
   if (SaveTempLabels)
     Ctx.setAllowTemporaryLabels(false);
@@ -498,6 +500,9 @@ int main(int argc, char **argv) {
   } else {
     assert(FileType == OFT_ObjectFile && "Invalid file type!");
 
+    // Don't waste memory on names of temp labels.
+    Ctx.setUseNamesOnTempLabels(false);
+
     if (!Out->os().supportsSeeking()) {
       BOS = make_unique<buffer_ostream>(Out->os());
       OS = BOS.get();
diff --git a/contrib/llvm/tools/llvm-objdump/MachODump.cpp b/contrib/llvm/tools/llvm-objdump/MachODump.cpp
index bf7451e..1730bf3 100644
--- a/contrib/llvm/tools/llvm-objdump/MachODump.cpp
+++ b/contrib/llvm/tools/llvm-objdump/MachODump.cpp
@@ -788,7 +788,7 @@ static void DumpLiteralPointerSection(MachOObjectFile *O,
   // Set the size of the literal pointer.
   uint32_t lp_size = O->is64Bit() ? 8 : 4;
 
-  // Collect the external relocation symbols for the the literal pointers.
+  // Collect the external relocation symbols for the literal pointers.
   std::vector<std::pair<uint64_t, SymbolRef>> Relocs;
   for (const RelocationRef &Reloc : Section.relocations()) {
     DataRefImpl Rel;
diff --git a/contrib/llvm/tools/llvm-profdata/llvm-profdata.cpp b/contrib/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 1bfdb18..6fb48d8 100644
--- a/contrib/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/contrib/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -37,7 +37,9 @@ static void exitWithError(const Twine &Message, StringRef Whence = "") {
   ::exit(1);
 }
 
+namespace {
 enum ProfileKinds { instr, sample };
+}
 
 static void mergeInstrProfile(const cl::list<std::string> &Inputs,
                               StringRef OutputFilename) {
diff --git a/contrib/llvm/tools/llvm-readobj/ELFDumper.cpp b/contrib/llvm/tools/llvm-readobj/ELFDumper.cpp
index 0931cb7..99969fd 100644
--- a/contrib/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/contrib/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -58,6 +58,7 @@ public:
   void printAttributes() override;
   void printMipsPLTGOT() override;
   void printMipsABIFlags() override;
+  void printMipsReginfo() override;
 
 private:
   typedef ELFFile<ELFT> ELFO;
@@ -147,12 +148,12 @@ getSectionNameIndex(const ELFO &Obj, typename ELFO::Elf_Sym_Iter Symbol,
     SectionName = "Processor Specific";
   else if (Symbol->isOSSpecific())
     SectionName = "Operating System Specific";
-  else if (Symbol->isReserved())
-    SectionName = "Reserved";
   else if (Symbol->isAbsolute())
     SectionName = "Absolute";
   else if (Symbol->isCommon())
     SectionName = "Common";
+  else if (Symbol->isReserved() && SectionIndex != SHN_XINDEX)
+    SectionName = "Reserved";
   else {
     if (SectionIndex == SHN_XINDEX)
       SectionIndex = Obj.getSymbolTableIndex(&*Symbol);
@@ -233,7 +234,7 @@ static const EnumEntry<unsigned> ElfMachineType[] = {
   LLVM_READOBJ_ENUM_ENT(ELF, EM_386          ),
   LLVM_READOBJ_ENUM_ENT(ELF, EM_68K          ),
   LLVM_READOBJ_ENUM_ENT(ELF, EM_88K          ),
-  LLVM_READOBJ_ENUM_ENT(ELF, EM_486          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_IAMCU        ),
   LLVM_READOBJ_ENUM_ENT(ELF, EM_860          ),
   LLVM_READOBJ_ENUM_ENT(ELF, EM_MIPS         ),
   LLVM_READOBJ_ENUM_ENT(ELF, EM_S370         ),
@@ -1424,3 +1425,30 @@ template <class ELFT> void ELFDumper<ELFT>::printMipsABIFlags() {
   W.printFlags("Flags 1", Flags->flags1, makeArrayRef(ElfMipsFlags1));
   W.printHex("Flags 2", Flags->flags2);
 }
+
+template <class ELFT> void ELFDumper<ELFT>::printMipsReginfo() {
+  const Elf_Shdr *Shdr = findSectionByName(*Obj, ".reginfo");
+  if (!Shdr) {
+    W.startLine() << "There is no .reginfo section in the file.\n";
+    return;
+  }
+  ErrorOr<ArrayRef<uint8_t>> Sec = Obj->getSectionContents(Shdr);
+  if (!Sec) {
+    W.startLine() << "The .reginfo section is empty.\n";
+    return;
+  }
+  if (Sec->size() != sizeof(Elf_Mips_RegInfo<ELFT>)) {
+    W.startLine() << "The .reginfo section has a wrong size.\n";
+    return;
+  }
+
+  auto *Reginfo = reinterpret_cast<const Elf_Mips_RegInfo<ELFT> *>(Sec->data());
+
+  DictScope GS(W, "MIPS RegInfo");
+  W.printHex("GP", Reginfo->ri_gp_value);
+  W.printHex("General Mask", Reginfo->ri_gprmask);
+  W.printHex("Co-Proc Mask0", Reginfo->ri_cprmask[0]);
+  W.printHex("Co-Proc Mask1", Reginfo->ri_cprmask[1]);
+  W.printHex("Co-Proc Mask2", Reginfo->ri_cprmask[2]);
+  W.printHex("Co-Proc Mask3", Reginfo->ri_cprmask[3]);
+}
diff --git a/contrib/llvm/tools/llvm-readobj/MachODumper.cpp b/contrib/llvm/tools/llvm-readobj/MachODumper.cpp
index 40691a2..aeb563a 100644
--- a/contrib/llvm/tools/llvm-readobj/MachODumper.cpp
+++ b/contrib/llvm/tools/llvm-readobj/MachODumper.cpp
@@ -469,35 +469,47 @@ void MachODumper::printRelocation(const MachOObjectFile *Obj,
   DataRefImpl DR = Reloc.getRawDataRefImpl();
   MachO::any_relocation_info RE = Obj->getRelocation(DR);
   bool IsScattered = Obj->isRelocationScattered(RE);
-  SmallString<32> SymbolNameOrOffset("0x");
-  if (IsScattered) {
-    // Scattered relocations don't really have an associated symbol
-    // for some reason, even if one exists in the symtab at the correct address.
-    SymbolNameOrOffset += utohexstr(Obj->getScatteredRelocationValue(RE));
-  } else {
+  bool IsExtern = !IsScattered && Obj->getPlainRelocationExternal(RE);
+
+  StringRef TargetName;
+  if (IsExtern) {
     symbol_iterator Symbol = Reloc.getSymbol();
     if (Symbol != Obj->symbol_end()) {
-      StringRef SymbolName;
-      if (error(Symbol->getName(SymbolName)))
+      if (error(Symbol->getName(TargetName)))
         return;
-      SymbolNameOrOffset = SymbolName;
-    } else
-      SymbolNameOrOffset += utohexstr(Obj->getPlainRelocationSymbolNum(RE));
+    }
+  } else if (!IsScattered) {
+    section_iterator SecI = Obj->getRelocationSection(DR);
+    if (SecI != Obj->section_end()) {
+      if (error(SecI->getName(TargetName)))
+        return;
+    }
   }
+  if (TargetName.empty())
+    TargetName = "-";
 
   if (opts::ExpandRelocs) {
     DictScope Group(W, "Relocation");
     W.printHex("Offset", Offset);
     W.printNumber("PCRel", Obj->getAnyRelocationPCRel(RE));
     W.printNumber("Length", Obj->getAnyRelocationLength(RE));
-    if (IsScattered)
-      W.printString("Extern", StringRef("N/A"));
-    else
-      W.printNumber("Extern", Obj->getPlainRelocationExternal(RE));
     W.printNumber("Type", RelocName, Obj->getAnyRelocationType(RE));
-    W.printString("Symbol", SymbolNameOrOffset);
-    W.printNumber("Scattered", IsScattered);
+    if (IsScattered) {
+      W.printHex("Value", Obj->getScatteredRelocationValue(RE));
+    } else {
+      const char *Kind = IsExtern ? "Symbol" : "Section";
+      W.printNumber(Kind, TargetName, Obj->getPlainRelocationSymbolNum(RE));
+    }
   } else {
+    SmallString<32> SymbolNameOrOffset("0x");
+    if (IsScattered) {
+      // Scattered relocations don't really have an associated symbol for some
+      // reason, even if one exists in the symtab at the correct address.
+      SymbolNameOrOffset += utohexstr(Obj->getScatteredRelocationValue(RE));
+    } else {
+      SymbolNameOrOffset = TargetName;
+    }
+
     raw_ostream& OS = W.startLine();
     OS << W.hex(Offset)
        << " " << Obj->getAnyRelocationPCRel(RE)
diff --git a/contrib/llvm/tools/llvm-readobj/ObjDumper.h b/contrib/llvm/tools/llvm-readobj/ObjDumper.h
index 5750d6f..323f5e3 100644
--- a/contrib/llvm/tools/llvm-readobj/ObjDumper.h
+++ b/contrib/llvm/tools/llvm-readobj/ObjDumper.h
@@ -43,6 +43,7 @@ public:
   // Only implemented for MIPS ELF at this time.
   virtual void printMipsPLTGOT() { }
   virtual void printMipsABIFlags() { }
+  virtual void printMipsReginfo() { }
 
   // Only implemented for PE/COFF.
   virtual void printCOFFImports() { }
diff --git a/contrib/llvm/tools/llvm-readobj/llvm-readobj.cpp b/contrib/llvm/tools/llvm-readobj/llvm-readobj.cpp
index be7bbe9..f960796 100644
--- a/contrib/llvm/tools/llvm-readobj/llvm-readobj.cpp
+++ b/contrib/llvm/tools/llvm-readobj/llvm-readobj.cpp
@@ -152,6 +152,10 @@ namespace opts {
   cl::opt<bool> MipsABIFlags("mips-abi-flags",
                              cl::desc("Display the MIPS.abiflags section"));
 
+  // -mips-reginfo
+  cl::opt<bool> MipsReginfo("mips-reginfo",
+                            cl::desc("Display the MIPS .reginfo section"));
+
   // -coff-imports
   cl::opt<bool>
   COFFImports("coff-imports", cl::desc("Display the PE/COFF import table"));
@@ -296,6 +300,8 @@ static void dumpObject(const ObjectFile *Obj) {
       Dumper->printMipsPLTGOT();
     if (opts::MipsABIFlags)
       Dumper->printMipsABIFlags();
+    if (opts::MipsReginfo)
+      Dumper->printMipsReginfo();
   }
   if (opts::COFFImports)
     Dumper->printCOFFImports();
diff --git a/contrib/llvm/utils/TableGen/CodeGenTarget.cpp b/contrib/llvm/utils/TableGen/CodeGenTarget.cpp
index e79a809..47d68fc 100644
--- a/contrib/llvm/utils/TableGen/CodeGenTarget.cpp
+++ b/contrib/llvm/utils/TableGen/CodeGenTarget.cpp
@@ -297,7 +297,7 @@ void CodeGenTarget::ComputeInstrsByEnum() const {
       "IMPLICIT_DEF", "SUBREG_TO_REG", "COPY_TO_REGCLASS", "DBG_VALUE",
       "REG_SEQUENCE", "COPY",          "BUNDLE",           "LIFETIME_START",
       "LIFETIME_END", "STACKMAP",      "PATCHPOINT",       "LOAD_STACK_GUARD",
-      "STATEPOINT",   "FRAME_ALLOC",
+      "STATEPOINT",   "FRAME_ALLOC",   "FAULTING_LOAD_OP",
       nullptr};
   const auto &Insts = getInstructions();
   for (const char *const *p = FixedInstrs; *p; ++p) {
diff --git a/contrib/llvm/utils/TableGen/SubtargetEmitter.cpp b/contrib/llvm/utils/TableGen/SubtargetEmitter.cpp
index de9c7a6..e5d7577 100644
--- a/contrib/llvm/utils/TableGen/SubtargetEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/SubtargetEmitter.cpp
@@ -1437,7 +1437,7 @@ void SubtargetEmitter::run(raw_ostream &OS) {
   // MCInstrInfo initialization routine.
   OS << "static inline void Init" << Target
      << "MCSubtargetInfo(MCSubtargetInfo *II, "
-     << "StringRef TT, StringRef CPU, StringRef FS) {\n";
+     << "const Triple &TT, StringRef CPU, StringRef FS) {\n";
   OS << "  II->InitMCSubtargetInfo(TT, CPU, FS, ";
   if (NumFeatures)
     OS << Target << "FeatureKV, ";
@@ -1482,10 +1482,11 @@ void SubtargetEmitter::run(raw_ostream &OS) {
   OS << "namespace llvm {\n";
   OS << "class DFAPacketizer;\n";
   OS << "struct " << ClassName << " : public TargetSubtargetInfo {\n"
-     << "  explicit " << ClassName << "(StringRef TT, StringRef CPU, "
+     << "  explicit " << ClassName << "(const Triple &TT, StringRef CPU, "
      << "StringRef FS);\n"
      << "public:\n"
-     << "  unsigned resolveSchedClass(unsigned SchedClass, const MachineInstr *DefMI,"
+     << "  unsigned resolveSchedClass(unsigned SchedClass, "
+     << " const MachineInstr *DefMI,"
      << " const TargetSchedModel *SchedModel) const override;\n"
      << "  DFAPacketizer *createDFAPacketizer(const InstrItineraryData *IID)"
      << " const;\n"
@@ -1515,7 +1516,7 @@ void SubtargetEmitter::run(raw_ostream &OS) {
     OS << "extern const unsigned " << Target << "ForwardingPaths[];\n";
   }
 
-  OS << ClassName << "::" << ClassName << "(StringRef TT, StringRef CPU, "
+  OS << ClassName << "::" << ClassName << "(const Triple &TT, StringRef CPU, "
      << "StringRef FS)\n"
      << "  : TargetSubtargetInfo() {\n"
      << "  InitMCSubtargetInfo(TT, CPU, FS, ";