63 files changed, 2011 insertions, 481 deletions
diff --git a/test/Analysis/Inputs/qt-simulator.h b/test/Analysis/Inputs/qt-simulator.h
new file mode 100644
index 0000000..d1d6c03
--- /dev/null
+++ b/test/Analysis/Inputs/qt-simulator.h
@@ -0,0 +1,16 @@
+#pragma clang system_header
+
+struct QObject {
+};
+
+struct QEvent {
+  enum Type { None };
+  QEvent(Type) {}
+};
+
+struct QCoreApplication : public QObject {
+  static void postEvent(QObject *receiver, QEvent *event);
+  static QCoreApplication *instance();
+};
+
+struct QApplication : public QCoreApplication {};
diff --git a/test/Analysis/Inputs/system-header-simulator-cxx.h b/test/Analysis/Inputs/system-header-simulator-cxx.h
index 3586921..f9049c3 100644
--- a/test/Analysis/Inputs/system-header-simulator-cxx.h
+++ b/test/Analysis/Inputs/system-header-simulator-cxx.h
@@ -198,6 +198,25 @@ namespace std {
       storage.assignExternal(new _CharT[4]);
     }
   };
+
+template<class _Engine, class _UIntType>
+class __independent_bits_engine {
+public:
+  // constructors and seeding functions
+  __independent_bits_engine(_Engine& __e, size_t __w);
+};
+
+template<class _Engine, class _UIntType>
+__independent_bits_engine<_Engine, _UIntType>
+    ::__independent_bits_engine(_Engine& __e, size_t __w)
+{
+  // Fake error trigger.
+  // No warning is expected as we are suppressing warning coming
+  // out of std::basic_string.
+  int z = 0;
+  z = 5/z;
+}
+
 }
 
 void* operator new(std::size_t, const std::nothrow_t&) throw();
diff --git a/test/Analysis/inlining/stl.cpp b/test/Analysis/inlining/stl.cpp
index 711c30f..2a8520f 100644
--- a/test/Analysis/inlining/stl.cpp
+++ b/test/Analysis/inlining/stl.cpp
@@ -47,3 +47,8 @@ void testBasicStringSuppression_assign(std::basic_string<char32_t> &v,
                                        const std::basic_string<char32_t> &v2) {
   v = v2;
 }
+
+class MyEngine;
+void testSupprerssion_independent_bits_engine(MyEngine& e) {
+  std::__independent_bits_engine<MyEngine, unsigned int> x(e, 64); // no-warning
+}
diff --git a/test/Analysis/null-deref-ps.c b/test/Analysis/null-deref-ps.c
index 240e8ed..79b3b3a 100644
--- a/test/Analysis/null-deref-ps.c
+++ b/test/Analysis/null-deref-ps.c
@@ -311,3 +311,21 @@ int foo10595327(int b) {
       return *p; // no-warning
   return 0;
 }
+
+#define AS_ATTRIBUTE volatile __attribute__((address_space(256)))
+#define _get_base() ((void * AS_ATTRIBUTE *)0)
+void* test_address_space_array(unsigned long slot) {
+  return _get_base()[slot]; // no-warning
+}
+void test_address_space_condition(int AS_ATTRIBUTE *cpu_data) {
+   if (cpu_data == 0) {
+    *cpu_data = 3; // no-warning
+  }
+}
+struct X { int member; };
+int test_address_space_member() {
+  struct X AS_ATTRIBUTE *data = (struct X AS_ATTRIBUTE *)0UL;
+  int ret;
+  ret = data->member; // no-warning
+  return ret;
+}
diff --git a/test/Analysis/nullptr.cpp b/test/Analysis/nullptr.cpp
index 17320f3..acc525e 100644
--- a/test/Analysis/nullptr.cpp
+++ b/test/Analysis/nullptr.cpp
@@ -126,3 +126,22 @@ decltype(nullptr) returnsNullPtrType();
 void fromReturnType() {
   ((X *)returnsNullPtrType())->f(); // expected-warning{{Called C++ object pointer is null}}
 }
+
+#define AS_ATTRIBUTE __attribute__((address_space(256)))
+class AS1 {
+public:
+  int x;
+  ~AS1() {
+    int AS_ATTRIBUTE *x = 0;
+    *x = 3; // no-warning
+  }
+};
+void test_address_space_field_access() {
+  AS1 AS_ATTRIBUTE *pa = 0;
+  pa->x = 0; // no-warning
+}
+void test_address_space_bind() {
+  AS1 AS_ATTRIBUTE *pa = 0;
+  AS1 AS_ATTRIBUTE &r = *pa;
+  r.x = 0; // no-warning
+}
diff --git a/test/Analysis/qt_malloc.cpp b/test/Analysis/qt_malloc.cpp
new file mode 100644
index 0000000..d29835f
--- /dev/null
+++ b/test/Analysis/qt_malloc.cpp
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 -analyze -analyzer-checker=core,alpha.deadcode.UnreachableCode,alpha.core.CastSize,unix.Malloc,cplusplus -analyzer-store=region -verify %s
+// expected-no-diagnostics
+#include "Inputs/qt-simulator.h"
+
+void send(QObject *obj)
+{
+  QEvent *e1 = new QEvent(QEvent::None);
+  static_cast<QApplication *>(QCoreApplication::instance())->postEvent(obj, e1);
+  QEvent *e2 = new QEvent(QEvent::None);
+  QCoreApplication::instance()->postEvent(obj, e2);
+  QEvent *e3 = new QEvent(QEvent::None);
+  QCoreApplication::postEvent(obj, e3);
+  QEvent *e4 = new QEvent(QEvent::None);
+  QApplication::postEvent(obj, e4);
+}
diff --git a/test/CXX/drs/dr5xx.cpp b/test/CXX/drs/dr5xx.cpp
index 17b525d..96d3494 100644
--- a/test/CXX/drs/dr5xx.cpp
+++ b/test/CXX/drs/dr5xx.cpp
@@ -148,8 +148,7 @@ namespace dr522 { // dr522: yes
   template<typename T> void b2(volatile T * const *);
   template<typename T> void b2(volatile T * const S::*);
   template<typename T> void b2(volatile T * const S::* const *);
-  // FIXME: This diagnostic isn't very good. The problem is not substitution failure.
-  template<typename T> void b2a(volatile T *S::* const *); // expected-note {{substitution failure}}
+  template<typename T> void b2a(volatile T *S::* const *); // expected-note {{candidate template ignored: deduced type 'volatile int *dr522::S::*const *' of 1st parameter does not match adjusted type 'int *dr522::S::**' of argument}}
 
   template<typename T> struct Base {};
   struct Derived : Base<int> {};
diff --git a/test/CXX/temp/temp.fct.spec/temp.deduct/temp.deduct.call/p3.cpp b/test/CXX/temp/temp.fct.spec/temp.deduct/temp.deduct.call/p3.cpp
index 295f080..f46ea2e 100644
--- a/test/CXX/temp/temp.fct.spec/temp.deduct/temp.deduct.call/p3.cpp
+++ b/test/CXX/temp/temp.fct.spec/temp.deduct/temp.deduct.call/p3.cpp
@@ -139,7 +139,7 @@ namespace N {
 }
 
 namespace PR9233 {
-  template<typename T> void f(const T **q); // expected-note{{candidate template ignored: substitution failure [with T = int]}}
+  template<typename T> void f(const T **q); // expected-note{{candidate template ignored: deduced type 'const int **' of 1st parameter does not match adjusted type 'int **' of argument [with T = int]}}
 
   void g(int **p) {
     f(p); // expected-error{{no matching function for call to 'f'}}
diff --git a/test/CXX/temp/temp.fct.spec/temp.deduct/temp.deduct.type/p9-0x.cpp b/test/CXX/temp/temp.fct.spec/temp.deduct/temp.deduct.type/p9-0x.cpp
index 782057d..b807a0f 100644
--- a/test/CXX/temp/temp.fct.spec/temp.deduct/temp.deduct.type/p9-0x.cpp
+++ b/test/CXX/temp/temp.fct.spec/temp.deduct/temp.deduct.type/p9-0x.cpp
@@ -55,7 +55,7 @@ namespace DeduceNonTypeTemplateArgsInArray {
 }
 
 namespace DeduceWithDefaultArgs {
-  template<template<typename...> class Container> void f(Container<int>); // expected-note {{substitution failure [with Container = X]}}
+  template<template<typename...> class Container> void f(Container<int>); // expected-note {{deduced type 'X<[...], (default) int>' of 1st parameter does not match adjusted type 'X<[...], double>' of argument [with Container = X]}}
   template<typename, typename = int> struct X {};
   void g() {
     // OK, use default argument for the second template parameter.
diff --git a/test/CodeGen/2007-04-14-FNoBuiltin.c b/test/CodeGen/2007-04-14-FNoBuiltin.c
index 4d194b1..b95f41c 100644
--- a/test/CodeGen/2007-04-14-FNoBuiltin.c
+++ b/test/CodeGen/2007-04-14-FNoBuiltin.c
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -emit-llvm %s -O2 -fno-builtin -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm %s -O2 -fno-builtin-printf -o - | FileCheck %s
 // Check that -fno-builtin is honored.
 
 extern int printf(const char*, ...);
diff --git a/test/CodeGen/aarch64-v8.1a-neon-intrinsics.c b/test/CodeGen/aarch64-v8.1a-neon-intrinsics.c
index 078b454..ad5d5dd 100644
--- a/test/CodeGen/aarch64-v8.1a-neon-intrinsics.c
+++ b/test/CodeGen/aarch64-v8.1a-neon-intrinsics.c
@@ -1,128 +1,198 @@
 // REQUIRES: aarch64-registered-target
 
 // RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \
-// RUN:  -target-feature +v8.1a -O3 -S -o - %s \
-// RUN:  | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64
+// RUN:  -target-feature +v8.1a -S -emit-llvm -o - %s | FileCheck %s
 
  #include <arm_neon.h>
 
-// CHECK-AARCH64-LABEL: test_vqrdmlah_laneq_s16
+// CHECK-LABEL: test_vqrdmlah_laneq_s16
 int16x4_t test_vqrdmlah_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) {
-// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+// CHECK: shufflevector <8 x i16> {{%.*}}, <8 x i16> {{%.*}}, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+// CHECK: call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}})
+// CHECK: call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}})
   return vqrdmlah_laneq_s16(a, b, v, 7);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlah_laneq_s32
+// CHECK-LABEL: test_vqrdmlah_laneq_s32
 int32x2_t test_vqrdmlah_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) {
-// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+// CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <2 x i32> <i32 3, i32 3>
+// CHECK: call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}})
+// CHECK: call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}})
   return vqrdmlah_laneq_s32(a, b, v, 3);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlahq_laneq_s16
+// CHECK-LABEL: test_vqrdmlahq_laneq_s16
 int16x8_t test_vqrdmlahq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) {
-// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+// CHECK: shufflevector <8 x i16> {{%.*}}, <8 x i16> {{%.*}}, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK: call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}})
+// CHECK: call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}})
   return vqrdmlahq_laneq_s16(a, b, v, 7);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlahq_laneq_s32
+// CHECK-LABEL: test_vqrdmlahq_laneq_s32
 int32x4_t test_vqrdmlahq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) {
-// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+// CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}})
+// CHECK: call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}})
   return vqrdmlahq_laneq_s32(a, b, v, 3);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlahh_s16
+// CHECK-LABEL: test_vqrdmlahh_s16
 int16_t test_vqrdmlahh_s16(int16_t a, int16_t b, int16_t c) {
-// CHECK-AARCH64: sqrdmlah {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}
+// CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
+// CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
+// CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]])
+// CHECK: extractelement <4 x i16> [[mul]], i64 0
+// CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
+// CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
+// CHECK: [[add:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]])
+// CHECK: extractelement <4 x i16> [[add]], i64 0
   return vqrdmlahh_s16(a, b, c);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlahs_s32
+// CHECK-LABEL: test_vqrdmlahs_s32
 int32_t test_vqrdmlahs_s32(int32_t a, int32_t b, int32_t c) {
-// CHECK-AARCH64: sqrdmlah {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+// CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}})
+// CHECK: call i32 @llvm.aarch64.neon.sqadd.i32(i32 {{%.*}}, i32 {{%.*}})
   return vqrdmlahs_s32(a, b, c);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlahh_lane_s16
+// CHECK-LABEL: test_vqrdmlahh_lane_s16
 int16_t test_vqrdmlahh_lane_s16(int16_t a, int16_t b, int16x4_t c) {
-// CHECK-AARCH64: sqrdmlah {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[3]
+// CHECK: extractelement <4 x i16> {{%.*}}, i32 3
+// CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
+// CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
+// CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]])
+// CHECK: extractelement <4 x i16> [[mul]], i64 0
+// CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
+// CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
+// CHECK: [[add:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]])
+// CHECK: extractelement <4 x i16> [[add]], i64 0
   return vqrdmlahh_lane_s16(a, b, c, 3);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlahs_lane_s32
+// CHECK-LABEL: test_vqrdmlahs_lane_s32
 int32_t test_vqrdmlahs_lane_s32(int32_t a, int32_t b, int32x2_t c) {
-// CHECK-AARCH64: sqrdmlah {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+// CHECK: extractelement <2 x i32> {{%.*}}, i32 1
+// CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}})
+// CHECK: call i32 @llvm.aarch64.neon.sqadd.i32(i32 {{%.*}}, i32 {{%.*}})
   return vqrdmlahs_lane_s32(a, b, c, 1);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlahh_laneq_s16
+// CHECK-LABEL: test_vqrdmlahh_laneq_s16
 int16_t test_vqrdmlahh_laneq_s16(int16_t a, int16_t b, int16x8_t c) {
-// CHECK-AARCH64: sqrdmlah {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[7]
+// CHECK: extractelement <8 x i16> {{%.*}}, i32 7
+// CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
+// CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
+// CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]])
+// CHECK: extractelement <4 x i16> [[mul]], i64 0
+// CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
+// CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
+// CHECK: [[add:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]])
+// CHECK: extractelement <4 x i16> [[add]], i64 0
   return vqrdmlahh_laneq_s16(a, b, c, 7);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlahs_laneq_s32
+// CHECK-LABEL: test_vqrdmlahs_laneq_s32
 int32_t test_vqrdmlahs_laneq_s32(int32_t a, int32_t b, int32x4_t c) {
-// CHECK-AARCH64: sqrdmlah {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+// CHECK: extractelement <4 x i32> {{%.*}}, i32 3
+// CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}})
+// CHECK: call i32 @llvm.aarch64.neon.sqadd.i32(i32 {{%.*}}, i32 {{%.*}})
   return vqrdmlahs_laneq_s32(a, b, c, 3);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlsh_laneq_s16
+// CHECK-LABEL: test_vqrdmlsh_laneq_s16
 int16x4_t test_vqrdmlsh_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) {
-// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+// CHECK: shufflevector <8 x i16> {{%.*}}, <8 x i16> {{%.*}}, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+// CHECK: call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}})
+// CHECK: call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}})
   return vqrdmlsh_laneq_s16(a, b, v, 7);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlsh_laneq_s32
+// CHECK-LABEL: test_vqrdmlsh_laneq_s32
 int32x2_t test_vqrdmlsh_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) {
-// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+// CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <2 x i32> <i32 3, i32 3>
+// CHECK: call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}})
+// CHECK: call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}})
   return vqrdmlsh_laneq_s32(a, b, v, 3);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlshq_laneq_s16
+// CHECK-LABEL: test_vqrdmlshq_laneq_s16
 int16x8_t test_vqrdmlshq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) {
-// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+// CHECK: shufflevector <8 x i16> {{%.*}}, <8 x i16> {{%.*}}, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK: call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}})
+// CHECK: call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}})
   return vqrdmlshq_laneq_s16(a, b, v, 7);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlshq_laneq_s32
+// CHECK-LABEL: test_vqrdmlshq_laneq_s32
 int32x4_t test_vqrdmlshq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) {
-// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+// CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}})
+// CHECK: call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}})
   return vqrdmlshq_laneq_s32(a, b, v, 3);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlshh_s16
+// CHECK-LABEL: test_vqrdmlshh_s16
 int16_t test_vqrdmlshh_s16(int16_t a, int16_t b, int16_t c) {
-// CHECK-AARCH64: sqrdmlsh {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}
+// CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
+// CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
+// CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]])
+// CHECK: extractelement <4 x i16> [[mul]], i64 0
+// CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
+// CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
+// CHECK: [[sub:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]])
+// CHECK: extractelement <4 x i16> [[sub]], i64 0
   return vqrdmlshh_s16(a, b, c);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlshs_s32
+// CHECK-LABEL: test_vqrdmlshs_s32
 int32_t test_vqrdmlshs_s32(int32_t a, int32_t b, int32_t c) {
-// CHECK-AARCH64: sqrdmlsh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+// CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}})
+// CHECK: call i32 @llvm.aarch64.neon.sqsub.i32(i32 {{%.*}}, i32 {{%.*}})
   return vqrdmlshs_s32(a, b, c);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlshh_lane_s16
+// CHECK-LABEL: test_vqrdmlshh_lane_s16
 int16_t test_vqrdmlshh_lane_s16(int16_t a, int16_t b, int16x4_t c) {
-// CHECK-AARCH64: sqrdmlsh {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[3]
+// CHECK: extractelement <4 x i16> {{%.*}}, i32 3
+// CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
+// CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
+// CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]])
+// CHECK: extractelement <4 x i16> [[mul]], i64 0
+// CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
+// CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
+// CHECK: [[sub:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]])
+// CHECK: extractelement <4 x i16> [[sub]], i64 0
   return vqrdmlshh_lane_s16(a, b, c, 3);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlshs_lane_s32
+// CHECK-LABEL: test_vqrdmlshs_lane_s32
 int32_t test_vqrdmlshs_lane_s32(int32_t a, int32_t b, int32x2_t c) {
-// CHECK-AARCH64: sqrdmlsh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+// CHECK: extractelement <2 x i32> {{%.*}}, i32 1
+// CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}})
+// CHECK: call i32 @llvm.aarch64.neon.sqsub.i32(i32 {{%.*}}, i32 {{%.*}})
   return vqrdmlshs_lane_s32(a, b, c, 1);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlshh_laneq_s16
+// CHECK-LABEL: test_vqrdmlshh_laneq_s16
 int16_t test_vqrdmlshh_laneq_s16(int16_t a, int16_t b, int16x8_t c) {
-// CHECK-AARCH64: sqrdmlsh {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[7]
+// CHECK: extractelement <8 x i16> {{%.*}}, i32 7
+// CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
+// CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
+// CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]])
+// CHECK: extractelement <4 x i16> [[mul]], i64 0
+// CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
+// CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
+// CHECK: [[sub:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]])
+// CHECK: extractelement <4 x i16> [[sub]], i64 0
   return vqrdmlshh_laneq_s16(a, b, c, 7);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlshs_laneq_s32
+// CHECK-LABEL: test_vqrdmlshs_laneq_s32
 int32_t test_vqrdmlshs_laneq_s32(int32_t a, int32_t b, int32x4_t c) {
-// CHECK-AARCH64: sqrdmlsh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+// CHECK: extractelement <4 x i32> {{%.*}}, i32 3
+// CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}})
+// CHECK: call i32 @llvm.aarch64.neon.sqsub.i32(i32 {{%.*}}, i32 {{%.*}})
   return vqrdmlshs_laneq_s32(a, b, c, 3);
 }
-
diff --git a/test/CodeGen/arm-target-features.c b/test/CodeGen/arm-target-features.c
index 35c0e04..7829edf 100644
--- a/test/CodeGen/arm-target-features.c
+++ b/test/CodeGen/arm-target-features.c
@@ -26,6 +26,7 @@
 // RUN: %clang_cc1 -triple armv8-linux-gnueabi -target-cpu cortex-a53 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V8
 // RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu cortex-a57 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V8
 // RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu cortex-a72 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V8
+// RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu exynos-m1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V8
 // CHECK-BASIC-V8: "target-features"="+crc,+crypto,+dsp,+fp-armv8,+hwdiv,+hwdiv-arm,+neon"
 
 
diff --git a/test/CodeGen/arm-v8.1a-neon-intrinsics.c b/test/CodeGen/arm-v8.1a-neon-intrinsics.c
index 5fe299a..7888831 100644
--- a/test/CodeGen/arm-v8.1a-neon-intrinsics.c
+++ b/test/CodeGen/arm-v8.1a-neon-intrinsics.c
@@ -1,122 +1,187 @@
 // RUN: %clang_cc1 -triple armv8.1a-linux-gnu -target-feature +neon \
-// RUN:  -O3 -S -o - %s \
+// RUN:  -S -emit-llvm -o - %s \
 // RUN:  | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM
+
 // RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \
-// RUN:  -target-feature +v8.1a -O3 -S -o - %s \
+// RUN:  -target-feature +v8.1a -S -emit-llvm -o - %s \
 // RUN:  | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64
+
 // REQUIRES: arm-registered-target,aarch64-registered-target
 
 #include <arm_neon.h>
 
 // CHECK-LABEL: test_vqrdmlah_s16
 int16x4_t test_vqrdmlah_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
-// CHECK-ARM: vqrdmlah.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+// CHECK-ARM: call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}})
+// CHECK-ARM: call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}})
+
+// CHECK-AARCH64: call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}})
+// CHECK-AARCH64: call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}})
   return vqrdmlah_s16(a, b, c);
 }
 
 // CHECK-LABEL: test_vqrdmlah_s32
 int32x2_t test_vqrdmlah_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
-// CHECK-ARM: vqrdmlah.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+// CHECK-ARM: call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}})
+// CHECK-ARM: call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}})
+
+// CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}})
+// CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}})
   return vqrdmlah_s32(a, b, c);
 }
 
 // CHECK-LABEL: test_vqrdmlahq_s16
 int16x8_t test_vqrdmlahq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
-// CHECK-ARM: vqrdmlah.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+// CHECK-ARM: call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}})
+// CHECK-ARM: call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}})
+
+// CHECK-AARCH64: call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}})
+// CHECK-AARCH64: call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}})
   return vqrdmlahq_s16(a, b, c);
 }
 
 // CHECK-LABEL: test_vqrdmlahq_s32
 int32x4_t test_vqrdmlahq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
-// CHECK-ARM: vqrdmlah.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+// CHECK-ARM: call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}})
+// CHECK-ARM: call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}})
+
+// CHECK-AARCH64: call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}})
+// CHECK-AARCH64: call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}})
   return vqrdmlahq_s32(a, b, c);
 }
 
 // CHECK-LABEL: test_vqrdmlah_lane_s16
 int16x4_t test_vqrdmlah_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
-// CHECK-ARM: vqrdmlah.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[3]
-// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+// CHECK-ARM: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-ARM: call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}})
+// CHECK-ARM: call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}})
+
+// CHECK-AARCH64: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-AARCH64: call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}})
+// CHECK-AARCH64: call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}})
   return vqrdmlah_lane_s16(a, b, c, 3);
 }
 
 // CHECK-LABEL: test_vqrdmlah_lane_s32
 int32x2_t test_vqrdmlah_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
-// CHECK-ARM: vqrdmlah.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[1]
-// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+// CHECK-ARM: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> <i32 1, i32 1>
+// CHECK-ARM: call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}})
+// CHECK-ARM: call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}})
+
+// CHECK-AARCH64: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> <i32 1, i32 1>
+// CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}})
+// CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}})
   return vqrdmlah_lane_s32(a, b, c, 1);
 }
 
 // CHECK-LABEL: test_vqrdmlahq_lane_s16
 int16x8_t test_vqrdmlahq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) {
-// CHECK-ARM: vqrdmlah.s16 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[3]
-// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+// CHECK-ARM: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK-ARM: call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}})
+// CHECK-ARM: call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}})
+
+// CHECK-AARCH64: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK-AARCH64: call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}})
+// CHECK-AARCH64: call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}})
   return vqrdmlahq_lane_s16(a, b, c, 3);
 }
 
 // CHECK-LABEL: test_vqrdmlahq_lane_s32
 int32x4_t test_vqrdmlahq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) {
-// CHECK-ARM: vqrdmlah.s32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[1]
-// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+// CHECK-ARM: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK-ARM: call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}})
+// CHECK-ARM: call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}})
+
+// CHECK-AARCH64: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK-AARCH64: call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}})
+// CHECK-AARCH64: call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}})
   return vqrdmlahq_lane_s32(a, b, c, 1);
 }
 
 // CHECK-LABEL: test_vqrdmlsh_s16
 int16x4_t test_vqrdmlsh_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
-// CHECK-ARM: vqrdmlsh.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+// CHECK-ARM: call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}})
+// CHECK-ARM: call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}})
+
+// CHECK-AARCH64: call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}})
+// CHECK-AARCH64: call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}})
   return vqrdmlsh_s16(a, b, c);
 }
 
 // CHECK-LABEL: test_vqrdmlsh_s32
 int32x2_t test_vqrdmlsh_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
-// CHECK-ARM: vqrdmlsh.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+// CHECK-ARM: call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}})
+// CHECK-ARM: call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}})
+
+// CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}})
+// CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}})
   return vqrdmlsh_s32(a, b, c);
 }
 
 // CHECK-LABEL: test_vqrdmlshq_s16
 int16x8_t test_vqrdmlshq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
-// CHECK-ARM: vqrdmlsh.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+// CHECK-ARM: call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}})
+// CHECK-ARM: call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}})
+
+// CHECK-AARCH64: call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}})
+// CHECK-AARCH64: call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}})
   return vqrdmlshq_s16(a, b, c);
 }
 
 // CHECK-LABEL: test_vqrdmlshq_s32
 int32x4_t test_vqrdmlshq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
-// CHECK-ARM: vqrdmlsh.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+// CHECK-ARM: call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}})
+// CHECK-ARM: call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}})
+
+// CHECK-AARCH64: call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}})
+// CHECK-AARCH64: call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}})
   return vqrdmlshq_s32(a, b, c);
 }
 
 // CHECK-LABEL: test_vqrdmlsh_lane_s16
 int16x4_t test_vqrdmlsh_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
-// CHECK-ARM: vqrdmlsh.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[3]
-// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+// CHECK-ARM: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-ARM: call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}})
+// CHECK-ARM: call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}})
+
+// CHECK-AARCH64: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-AARCH64: call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}})
+// CHECK-AARCH64: call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}})
   return vqrdmlsh_lane_s16(a, b, c, 3);
 }
 
 // CHECK-LABEL: test_vqrdmlsh_lane_s32
 int32x2_t test_vqrdmlsh_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
-// CHECK-ARM: vqrdmlsh.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[1]
-// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+// CHECK-ARM: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> <i32 1, i32 1>
+// CHECK-ARM: call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}})
+// CHECK-ARM: call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}})
+
+// CHECK-AARCH64: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> <i32 1, i32 1>
+// CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}})
+// CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}})
   return vqrdmlsh_lane_s32(a, b, c, 1);
 }
 
 // CHECK-LABEL: test_vqrdmlshq_lane_s16
 int16x8_t test_vqrdmlshq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) {
-// CHECK-ARM: vqrdmlsh.s16 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[3]
-// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+// CHECK-ARM: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK-ARM: call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}})
+// CHECK-ARM: call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}})
+
+// CHECK-AARCH64: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK-AARCH64: call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}})
+// CHECK-AARCH64: call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}})
   return vqrdmlshq_lane_s16(a, b, c, 3);
 }
 
 // CHECK-LABEL: test_vqrdmlshq_lane_s32
 int32x4_t test_vqrdmlshq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) {
-// CHECK-ARM: vqrdmlsh.s32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[1]
-// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+// CHECK-ARM: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK-ARM: call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}})
+// CHECK-ARM: call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}})
+
+// CHECK-AARCH64: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK-AARCH64: call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}})
+// CHECK-AARCH64: call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}})
   return vqrdmlshq_lane_s32(a, b, c, 1);
 }
-
diff --git a/test/CodeGen/libcalls-complex.c b/test/CodeGen/libcalls-complex.c
index 22c97b6..43b7055 100644
--- a/test/CodeGen/libcalls-complex.c
+++ b/test/CodeGen/libcalls-complex.c
@@ -1,4 +1,7 @@
 // RUN: %clang_cc1 -fno-builtin -emit-llvm -o - %s -triple i386-unknown-unknown | FileCheck -check-prefix CHECK-YES %s
+// RUN: %clang_cc1 -fno-builtin-crealf -fno-builtin-creal -fno-builtin-creall \
+// RUN:  -fno-builtin-cimagf  -fno-builtin-cimag -fno-builtin-cimagl -emit-llvm \
+// RUN:  -o - %s -triple i386-unknown-unknown | FileCheck -check-prefix CHECK-YES %s
 // RUN: %clang_cc1 -emit-llvm -o - %s -triple i386-unknown-unknown | FileCheck -check-prefix CHECK-NO %s
 
 extern float crealf(float _Complex);
diff --git a/test/CodeGen/libcalls-fno-builtin.c b/test/CodeGen/libcalls-fno-builtin.c
index e7f3ef7..bc6a430 100644
--- a/test/CodeGen/libcalls-fno-builtin.c
+++ b/test/CodeGen/libcalls-fno-builtin.c
@@ -1,4 +1,11 @@
 // RUN: %clang_cc1 -S -O3 -fno-builtin -o - %s | FileCheck %s
+// RUN: %clang_cc1 -S -O3 -fno-builtin-ceil -fno-builtin-copysign -fno-builtin-cos \
+// RUN:  -fno-builtin-fabs -fno-builtin-floor -fno-builtin-strcat -fno-builtin-strncat \
+// RUN:  -fno-builtin-strchr -fno-builtin-strrchr -fno-builtin-strcmp -fno-builtin-strncmp \
+// RUN:  -fno-builtin-strcpy -fno-builtin-stpcpy -fno-builtin-strncpy -fno-builtin-strlen \
+// RUN:  -fno-builtin-strpbrk -fno-builtin-strspn -fno-builtin-strtod -fno-builtin-strtof \
+// RUN:  -fno-builtin-strtold -fno-builtin-strtol -fno-builtin-strtoll -fno-builtin-strtoul \
+// RUN:  -fno-builtin-strtoull -o - %s | FileCheck %s
 // rdar://10551066
 
 typedef __SIZE_TYPE__ size_t;
diff --git a/test/CodeGen/nobuiltin.c b/test/CodeGen/nobuiltin.c
index 0a8e8bb..7cc8164 100644
--- a/test/CodeGen/nobuiltin.c
+++ b/test/CodeGen/nobuiltin.c
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -fno-builtin -O1 -S -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fno-builtin-memset -O1 -S -o - %s | FileCheck -check-prefix=MEMSET %s
 
 void PR13497() {
   char content[2];
@@ -6,3 +7,11 @@ void PR13497() {
   // CHECK: __strcpy_chk
   __builtin___strcpy_chk(content, "", 1);
 }
+
+void PR4941(char *s) {
+  // Make sure we don't optimize this loop to a memset().
+  // MEMSET-LABEL: PR4941:
+  // MEMSET-NOT: memset
+  for (unsigned i = 0; i < 8192; ++i)
+    s[i] = 0;
+}
diff --git a/test/CodeGen/pku.c b/test/CodeGen/pku.c
new file mode 100644
index 0000000..30565a8
--- /dev/null
+++ b/test/CodeGen/pku.c
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +pku -emit-llvm -o - -Werror | FileCheck %s
+
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
+
+#include <immintrin.h>
+
+unsigned int test_rdpkru_u32() {
+  // CHECK-LABEL: @test_rdpkru_u32
+  // CHECK: @llvm.x86.rdpkru
+  return _rdpkru_u32(); 
+}
+void test_wrpkru(unsigned int __A) {
+  // CHECK-LABEL: @test_wrpkru
+  // CHECK: @llvm.x86.wrpkru
+  _wrpkru(__A);
+  return ;
+}
diff --git a/test/CodeGenCXX/ms-inline-asm-fields.cpp b/test/CodeGenCXX/ms-inline-asm-fields.cpp
index a78d511..6f32933 100644
--- a/test/CodeGenCXX/ms-inline-asm-fields.cpp
+++ b/test/CodeGenCXX/ms-inline-asm-fields.cpp
@@ -29,3 +29,28 @@ extern "C" int test_namespace_global() {
   __asm mov eax, asdf::a_global.a3.b2
 }
 
+template <bool Signed>
+struct make_storage_type {
+  struct type {
+    struct B {
+      int a;
+      int x;
+    } b;
+  };
+};
+
+template <bool Signed>
+struct msvc_dcas_x86 {
+  typedef typename make_storage_type<Signed>::type storage_type;
+  void store() __asm("PR26001") {
+    storage_type p;
+    __asm mov edx, p.b.x;
+  }
+};
+
+template void msvc_dcas_x86<false>::store();
+// CHECK: define weak_odr void @"\01PR26001"(
+// CHECK: %[[P:.*]] = alloca %"struct.make_storage_type<false>::type", align 4
+// CHECK: %[[B:.*]] = getelementptr inbounds %"struct.make_storage_type<false>::type", %"struct.make_storage_type<false>::type"* %[[P]], i32 0, i32 0
+// CHECK: %[[X:.*]] = getelementptr inbounds %"struct.make_storage_type<false>::type::B", %"struct.make_storage_type<false>::type::B"* %[[B]], i32 0, i32 1
+// CHECK: call void asm sideeffect inteldialect "mov edx, dword ptr $0", "*m,~{edx},~{dirflag},~{fpsr},~{flags}"(i32* %[[X]])
diff --git a/test/CodeGenCXX/optnone-and-attributes.cpp b/test/CodeGenCXX/optnone-and-attributes.cpp
new file mode 100644
index 0000000..56173b5
--- /dev/null
+++ b/test/CodeGenCXX/optnone-and-attributes.cpp
@@ -0,0 +1,82 @@
+// RUN: %clang_cc1 < %s -triple i386-mingw32 -fms-extensions -emit-llvm -x c++ | FileCheck %s
+
+// optnone wins over inlinehint.
+// Test that both func1 and func2 are marked optnone and noinline.
+
+// Definition with both optnone and inlinehint.
+__attribute__((optnone))
+inline int func1(int a) {
+  return a + a + a + a;
+}
+// CHECK: @_Z5func1i({{.*}}) [[OPTNONE:#[0-9]+]]
+
+// optnone declaration, inlinehint definition.
+__attribute__((optnone))
+int func2(int a);
+
+inline int func2(int a) {
+  return a + a + a + a;
+}
+// CHECK: @_Z5func2i({{.*}}) [[OPTNONE]]
+
+// Keep alive the definitions of func1 and func2.
+int foo() {
+  int val = func1(1);
+  return val + func2(2);
+}
+
+// optnone wins over minsize.
+__attribute__((optnone))
+int func3(int a);
+
+__attribute__((minsize))
+int func3(int a) {
+  return a + a + a + a;
+}
+// Same attribute set as everything else, therefore no 'minsize'.
+// CHECK: @_Z5func3i({{.*}}) [[OPTNONE]]
+
+
+// Verify that noreturn is compatible with optnone.
+__attribute__((noreturn))
+extern void exit_from_function();
+
+__attribute__((noreturn)) __attribute((optnone))
+extern void noreturn_function(int a) { exit_from_function(); }
+// CHECK: @_Z17noreturn_functioni({{.*}}) [[NORETURN:#[0-9]+]]
+
+
+// Verify that __declspec(noinline) is compatible with optnone.
+__declspec(noinline) __attribute__((optnone))
+void func4() { return; }
+// CHECK: @_Z5func4v() [[OPTNONE]]
+
+__declspec(noinline)
+extern void func5();
+
+__attribute__((optnone))
+void func5() { return; }
+// CHECK: @_Z5func5v() [[OPTNONE]]
+
+
+// Verify also that optnone can be used on dllexport functions.
+// Adding attribute optnone on a dllimport function has no effect.
+
+__attribute__((dllimport))
+__attribute__((optnone))
+int imported_optnone_func(int a);
+
+__attribute__((dllexport))
+__attribute__((optnone))
+int exported_optnone_func(int a) {
+  return imported_optnone_func(a); // use of imported func
+}
+// CHECK: @_Z21exported_optnone_funci({{.*}}) [[OPTNONE]]
+// CHECK: declare dllimport {{.*}} @_Z21imported_optnone_funci({{.*}}) [[DLLIMPORT:#[0-9]+]]
+
+
+// CHECK: attributes [[OPTNONE]] = { noinline {{.*}} optnone
+// CHECK: attributes [[NORETURN]] = { noinline noreturn {{.*}} optnone
+
+// CHECK: attributes [[DLLIMPORT]] =
+// CHECK-SAME-NOT: optnone
diff --git a/test/CodeGenCXX/optnone-class-members.cpp b/test/CodeGenCXX/optnone-class-members.cpp
new file mode 100644
index 0000000..147b821
--- /dev/null
+++ b/test/CodeGenCXX/optnone-class-members.cpp
@@ -0,0 +1,164 @@
+// RUN: %clang_cc1 < %s -triple %itanium_abi_triple -fms-extensions -emit-llvm -x c++ | FileCheck %s
+
+// Test attribute 'optnone' on methods:
+//  -- member functions;
+//  -- static member functions.
+
+// Verify that all methods of struct A are associated to the same attribute set.
+// The attribute set shall contain attributes 'noinline' and 'optnone'.
+
+struct A {
+  // Definition of an optnone static method.
+  __attribute__((optnone))
+  static int static_optnone_method(int a) {
+    return a + a;
+  }
+  // CHECK: @_ZN1A21static_optnone_methodEi({{.*}}) [[OPTNONE:#[0-9]+]]
+
+  // Definition of an optnone normal method.
+  __attribute__((optnone))
+  int optnone_method(int a) {
+    return a + a + a + a;
+  }
+  // CHECK: @_ZN1A14optnone_methodEi({{.*}}) [[OPTNONE]]
+
+  // Declaration of an optnone method with out-of-line definition
+  // that doesn't say optnone.
+  __attribute__((optnone))
+  int optnone_decl_method(int a);
+
+  // Methods declared without attribute optnone; the definitions will
+  // have attribute optnone, and we verify optnone wins.
+  __forceinline static int static_forceinline_method(int a);
+  __attribute__((always_inline)) int alwaysinline_method(int a);
+  __attribute__((noinline)) int noinline_method(int a);
+  __attribute__((minsize)) int minsize_method(int a);
+};
+
+void foo() {
+  A a;
+  A::static_optnone_method(4);
+  a.optnone_method(14);
+  a.optnone_decl_method(12);
+  A::static_forceinline_method(5);
+  a.alwaysinline_method(5);
+  a.noinline_method(6);
+  a.minsize_method(7);
+}
+
+// No attribute here, should still be on the definition.
+int A::optnone_decl_method(int a) {
+  return a;
+}
+// CHECK: @_ZN1A19optnone_decl_methodEi({{.*}}) [[OPTNONE]]
+
+// optnone implies noinline; therefore attribute noinline is added to
+// the set of function attributes.
+// forceinline is instead translated as 'always_inline'.
+// However 'noinline' wins over 'always_inline' and therefore
+// the resulting attributes for this method are: noinline + optnone
+__attribute__((optnone))
+int A::static_forceinline_method(int a) {
+  return a + a + a + a;
+}
+// CHECK: @_ZN1A25static_forceinline_methodEi({{.*}}) [[OPTNONE]]
+
+__attribute__((optnone))
+int A::alwaysinline_method(int a) {
+  return a + a + a + a;
+}
+// CHECK: @_ZN1A19alwaysinline_methodEi({{.*}}) [[OPTNONE]]
+
+// 'noinline' + 'noinline and optnone' = 'noinline and optnone'
+__attribute__((optnone))
+int A::noinline_method(int a) {
+  return a + a + a + a;
+}
+// CHECK: @_ZN1A15noinline_methodEi({{.*}}) [[OPTNONE]]
+
+// 'optnone' wins over 'minsize'
+__attribute__((optnone))
+int A::minsize_method(int a) {
+  return a + a + a + a;
+}
+// CHECK: @_ZN1A14minsize_methodEi({{.*}}) [[OPTNONE]]
+
+
+// Test attribute 'optnone' on methods:
+//  -- pure virtual functions
+//  -- base virtual and derived virtual
+//  -- base virtual but not derived virtual
+//  -- optnone methods redefined in override
+
+// A method defined in override doesn't inherit the function attributes of the
+// superclass method.
+
+struct B {
+  virtual int pure_virtual(int a) = 0;
+  __attribute__((optnone))
+  virtual int pure_virtual_with_optnone(int a) = 0;
+
+  virtual int base(int a) {
+    return a + a + a + a;
+  }
+
+  __attribute__((optnone))
+  virtual int optnone_base(int a) {
+    return a + a + a + a;
+  }
+
+  __attribute__((optnone))
+  virtual int only_base_virtual(int a) {
+    return a + a;
+  }
+};
+
+struct C : public B {
+  __attribute__((optnone))
+  virtual int pure_virtual(int a) {
+    return a + a + a + a;
+  }
+
+  virtual int pure_virtual_with_optnone(int a) {
+    return a + a + a + a;
+  }
+
+  __attribute__((optnone))
+  virtual int base(int a) {
+    return a + a;
+  }
+
+  virtual int optnone_base(int a) {
+    return a + a;
+  }
+
+  int only_base_virtual(int a) {
+    return a + a + a + a;
+  }
+};
+
+int bar() {
+  C c;
+  int result;
+  result = c.pure_virtual(3);
+  result += c.pure_virtual_with_optnone(2);
+  result += c.base(5);
+  result += c.optnone_base(7);
+  result += c.only_base_virtual(9);
+  return result;
+}
+
+// CHECK: @_ZN1C12pure_virtualEi({{.*}}) {{.*}} [[OPTNONE]]
+// CHECK: @_ZN1C25pure_virtual_with_optnoneEi({{.*}}) {{.*}} [[NORMAL:#[0-9]+]]
+// CHECK: @_ZN1C4baseEi({{.*}}) {{.*}} [[OPTNONE]]
+// CHECK: @_ZN1C12optnone_baseEi({{.*}}) {{.*}} [[NORMAL]]
+// CHECK: @_ZN1C17only_base_virtualEi({{.*}}) {{.*}} [[NORMAL]]
+// CHECK: @_ZN1B4baseEi({{.*}}) {{.*}} [[NORMAL]]
+// CHECK: @_ZN1B12optnone_baseEi({{.*}}) {{.*}} [[OPTNONE]]
+// CHECK: @_ZN1B17only_base_virtualEi({{.*}}) {{.*}} [[OPTNONE]]
+
+
+// CHECK: attributes [[NORMAL]] =
+// CHECK-SAME-NOT: noinline
+// CHECK-SAME-NOT: optnone
+// CHECK: attributes [[OPTNONE]] = {{.*}} noinline {{.*}} optnone
diff --git a/test/CodeGenCXX/optnone-def-decl.cpp b/test/CodeGenCXX/optnone-def-decl.cpp
index ab6eb3f..cb3a677 100644
--- a/test/CodeGenCXX/optnone-def-decl.cpp
+++ b/test/CodeGenCXX/optnone-def-decl.cpp
@@ -90,5 +90,6 @@ int user_of_forceinline_optnone_function() {
 // CHECK: @_Z28forceinline_optnone_functionii({{.*}}) [[OPTNONE]]
 
 // CHECK: attributes [[OPTNONE]] = { noinline nounwind optnone {{.*}} }
-// CHECK: attributes [[NORMAL]] = { nounwind {{.*}} }
-
+// CHECK: attributes [[NORMAL]] =
+// CHECK-SAME-NOT: noinline
+// CHECK-SAME-NOT: optnone
diff --git a/test/CodeGenCXX/optnone-templates.cpp b/test/CodeGenCXX/optnone-templates.cpp
new file mode 100644
index 0000000..45a72b3
--- /dev/null
+++ b/test/CodeGenCXX/optnone-templates.cpp
@@ -0,0 +1,104 @@
+// RUN: %clang_cc1 %s -triple %itanium_abi_triple -std=c++11 -emit-llvm -o - | FileCheck %s
+
+// Test optnone on template instantiations.
+
+//-- Effect of optnone on generic add template function.
+
+template <typename T> T template_normal(T a)
+{
+  return a + a;
+}
+
+template <typename T> __attribute__((optnone)) T template_optnone(T a)
+{
+  return a + a + a;
+}
+
+// This function should cause instantiations of each template, one marked
+// with the 'optnone' attribute.
+int container(int i)
+{
+  return template_normal<int>(i) + template_optnone<int>(i);
+}
+
+// CHECK: @_Z15template_normalIiET_S0_({{.*}}) [[NORMAL:#[0-9]+]]
+// CHECK: @_Z16template_optnoneIiET_S0_({{.*}}) [[OPTNONE:#[0-9]+]]
+
+
+//-- Effect of optnone on a partial specialization.
+//   FIRST TEST: a method becomes marked with optnone in the specialization.
+
+template <typename T, typename U> class template_normal_base {
+public:
+  T method(T t, U u) 
+  {
+    return t + static_cast<T>(u);
+  }
+};
+
+template <typename U> class template_normal_base<int, U>
+{
+public:
+  __attribute__((optnone)) int method (int t, U u)
+  {
+    return t - static_cast<int>(u);
+  }
+};
+
+// This function should cause an instantiation of the full template (whose
+// method is not marked optnone) and an instantiation of the partially
+// specialized template (whose method is marked optnone).
+void container2() 
+{
+  int y = 2;
+  float z = 3.0;
+  template_normal_base<float, int> class_normal;
+  template_normal_base<int, float> class_optnone;
+  float r1 = class_normal.method(z, y);
+  float r2 = class_optnone.method(y, z);
+}
+
+// CHECK: @_ZN20template_normal_baseIfiE6methodEfi({{.*}}) [[NORMAL]]
+// CHECK: @_ZN20template_normal_baseIifE6methodEif({{.*}}) [[OPTNONE]]
+
+
+//-- Effect of optnone on a partial specialization.
+//   SECOND TEST: a method loses optnone in the specialization.
+
+template <typename T, typename U> class template_optnone_base {
+public:
+  __attribute__((optnone)) T method(T t, U u) 
+  {
+    return t + static_cast<T>(u);
+  }
+};
+
+template <typename U> class template_optnone_base<int, U>
+{
+public:
+  int method (int t, U u)
+  {
+    return t - static_cast<int>(u);
+  }
+};
+
+// This function should cause an instantiation of the full template (whose
+// method is marked optnone) and an instantiation of the partially
+// specialized template (whose method is not marked optnone).
+void container3() 
+{
+  int y = 2;
+  float z = 3.0;
+  template_optnone_base<float, int> class_optnone;
+  template_optnone_base<int, float> class_normal;
+  float r1 = class_optnone.method(z, y);
+  float r2 = class_normal.method(y, z);
+}
+
+// CHECK: @_ZN21template_optnone_baseIfiE6methodEfi({{.*}}) [[OPTNONE]]
+// CHECK: @_ZN21template_optnone_baseIifE6methodEif({{.*}}) [[NORMAL]]
+
+
+// CHECK: attributes [[NORMAL]] =
+// CHECK-SAME-NOT: optnone
+// CHECK: attributes [[OPTNONE]] = {{.*}} optnone
diff --git a/test/CoverageMapping/ir.c b/test/CoverageMapping/ir.c
index 4dbfb48..f94d34c 100644
--- a/test/CoverageMapping/ir.c
+++ b/test/CoverageMapping/ir.c
@@ -9,4 +9,4 @@ int main(void) {
   return 0;
 }
 
-// CHECK: @__llvm_coverage_mapping = internal constant { i32, i32, i32, i32, [2 x <{ i8*, i32, i32, i64 }>], [{{[0-9]+}} x i8] } { i32 2, i32 {{[0-9]+}}, i32 {{[0-9]+}}, i32 0, [2 x <{ i8*, i32, i32, i64 }>] [<{ i8*, i32, i32, i64 }> <{ i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i32 3, i32 9, i64 {{[0-9]+}} }>, <{ i8*, i32, i32, i64 }> <{ i8* getelementptr inbounds ([4 x i8], [4 x i8]* @__profn_main, i32 0, i32 0), i32 4, i32 9, i64 {{[0-9]+}} }>]
+// CHECK: @__llvm_coverage_mapping = internal constant { { i32, i32, i32, i32 }, [2 x <{ i8*, i32, i32, i64 }>], [{{[0-9]+}} x i8] } { { i32, i32, i32, i32 } { i32 2, i32 {{[0-9]+}}, i32 {{[0-9]+}}, i32 0 }, [2 x <{ i8*, i32, i32, i64 }>] [<{ i8*, i32, i32, i64 }> <{ i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i32 3, i32 9, i64 {{[0-9]+}} }>, <{ i8*, i32, i32, i64 }> <{ i8* getelementptr inbounds ([4 x i8], [4 x i8]* @__profn_main, i32 0, i32 0), i32 4, i32 9, i64 {{[0-9]+}} }>]
diff --git a/test/Driver/aarch64-cpus.c b/test/Driver/aarch64-cpus.c
index 6355c24..7b0fac4 100644
--- a/test/Driver/aarch64-cpus.c
+++ b/test/Driver/aarch64-cpus.c
@@ -74,6 +74,20 @@
 // RUN: %clang -target arm64 -mlittle-endian -mtune=cortex-a72 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA72 %s
 // ARM64-CA72: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "cortex-a72"
 
+// RUN: %clang -target aarch64 -mcpu=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=M1 %s
+// RUN: %clang -target aarch64 -mlittle-endian -mcpu=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=M1 %s
+// RUN: %clang -target aarch64_be -mlittle-endian -mcpu=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=M1 %s
+// RUN: %clang -target aarch64 -mtune=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=M1 %s
+// RUN: %clang -target aarch64 -mlittle-endian -mtune=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=M1 %s
+// RUN: %clang -target aarch64_be -mlittle-endian -mtune=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=M1 %s
+// M1: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "exynos-m1"
+
+// RUN: %clang -target arm64 -mcpu=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-M1 %s
+// RUN: %clang -target arm64 -mlittle-endian -mcpu=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-M1 %s
+// RUN: %clang -target arm64 -mtune=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-M1 %s
+// RUN: %clang -target arm64 -mlittle-endian -mtune=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-M1 %s
+// ARM64-M1: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "exynos-m1"
+
 // RUN: %clang -target aarch64_be -### -c %s 2>&1 | FileCheck -check-prefix=GENERIC-BE %s
 // RUN: %clang -target aarch64 -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=GENERIC-BE %s
 // RUN: %clang -target aarch64_be -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=GENERIC-BE %s
@@ -111,6 +125,14 @@
 // RUN: %clang -target aarch64_be -mbig-endian -mtune=cortex-a72 -### -c %s 2>&1 | FileCheck -check-prefix=CA72-BE %s
 // CA72-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "cortex-a72"
 
+// RUN: %clang -target aarch64_be -mcpu=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=M1-BE %s
+// RUN: %clang -target aarch64 -mbig-endian -mcpu=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=M1-BE %s
+// RUN: %clang -target aarch64_be -mbig-endian -mcpu=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=M1-BE %s
+// RUN: %clang -target aarch64_be -mtune=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=M1-BE %s
+// RUN: %clang -target aarch64 -mbig-endian -mtune=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=M1-BE %s
+// RUN: %clang -target aarch64_be -mbig-endian -mtune=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=M1-BE %s
+// M1-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "exynos-m1"
+
 // RUN: %clang -target aarch64 -mcpu=cortex-a57 -mtune=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=MCPU-MTUNE %s
 // RUN: %clang -target aarch64 -mtune=cortex-a53 -mcpu=cortex-a57  -### -c %s 2>&1 | FileCheck -check-prefix=MCPU-MTUNE %s
 // RUN: %clang -target aarch64 -mcpu=cortex-a72 -mtune=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=MCPU-MTUNE %s
diff --git a/test/Driver/arm-cortex-cpus.c b/test/Driver/arm-cortex-cpus.c
index c0492ef..6a4d2d63 100644
--- a/test/Driver/arm-cortex-cpus.c
+++ b/test/Driver/arm-cortex-cpus.c
@@ -398,40 +398,48 @@
 // RUN: %clang -target arm -mcpu=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A %s
 // RUN: %clang -target arm -mcpu=cortex-a57 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A %s
 // RUN: %clang -target arm -mcpu=cortex-a72 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A %s
+// RUN: %clang -target arm -mcpu=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A %s
 // RUN: %clang -target arm -mcpu=cortex-a35 -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A %s
 // RUN: %clang -target arm -mcpu=cortex-a53 -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A %s
 // RUN: %clang -target arm -mcpu=cortex-a57 -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A %s
 // RUN: %clang -target arm -mcpu=cortex-a72 -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A %s
+// RUN: %clang -target arm -mcpu=exynos-m1 -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A %s
 // CHECK-CPUV8A: "-cc1"{{.*}} "-triple" "armv8-{{.*}}
 
 // RUN: %clang -target armeb -mcpu=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A %s
 // RUN: %clang -target armeb -mcpu=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A %s
 // RUN: %clang -target armeb -mcpu=cortex-a57 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A %s
 // RUN: %clang -target armeb -mcpu=cortex-a72 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A %s
+// RUN: %clang -target armeb -mcpu=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A %s
 // RUN: %clang -target arm -mcpu=cortex-a35 -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A %s
 // RUN: %clang -target arm -mcpu=cortex-a53 -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A %s
 // RUN: %clang -target arm -mcpu=cortex-a57 -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A %s
 // RUN: %clang -target arm -mcpu=cortex-a72 -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A %s
+// RUN: %clang -target arm -mcpu=exynos-m1 -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A %s
 // CHECK-BE-CPUV8A: "-cc1"{{.*}} "-triple" "armebv8-{{.*}}
 
 // RUN: %clang -target arm -mcpu=cortex-a35 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A-THUMB %s
 // RUN: %clang -target arm -mcpu=cortex-a53 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A-THUMB %s
 // RUN: %clang -target arm -mcpu=cortex-a57 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A-THUMB %s
 // RUN: %clang -target arm -mcpu=cortex-a72 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A-THUMB %s
+// RUN: %clang -target arm -mcpu=exynos-m1 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A-THUMB %s
 // RUN: %clang -target arm -mcpu=cortex-a35 -mlittle-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A-THUMB %s
 // RUN: %clang -target arm -mcpu=cortex-a53 -mlittle-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A-THUMB %s
 // RUN: %clang -target arm -mcpu=cortex-a57 -mlittle-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A-THUMB %s
 // RUN: %clang -target arm -mcpu=cortex-a72 -mlittle-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A-THUMB %s
+// RUN: %clang -target arm -mcpu=exynos-m1 -mlittle-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A-THUMB %s
 // CHECK-CPUV8A-THUMB: "-cc1"{{.*}} "-triple" "thumbv8-{{.*}}
 
 // RUN: %clang -target armeb -mcpu=cortex-a35 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A-THUMB %s
 // RUN: %clang -target armeb -mcpu=cortex-a53 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A-THUMB %s
 // RUN: %clang -target armeb -mcpu=cortex-a57 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A-THUMB %s
 // RUN: %clang -target armeb -mcpu=cortex-a72 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A-THUMB %s
+// RUN: %clang -target armeb -mcpu=exynos-m1 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A-THUMB %s
 // RUN: %clang -target arm -mcpu=cortex-a35 -mbig-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A-THUMB %s
 // RUN: %clang -target arm -mcpu=cortex-a53 -mbig-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A-THUMB %s
 // RUN: %clang -target arm -mcpu=cortex-a57 -mbig-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A-THUMB %s
 // RUN: %clang -target arm -mcpu=cortex-a72 -mbig-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A-THUMB %s
+// RUN: %clang -target arm -mcpu=exynos-m1 -mbig-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A-THUMB %s
 // CHECK-BE-CPUV8A-THUMB: "-cc1"{{.*}} "-triple" "thumbebv8-{{.*}}
 
 // ================== Check whether -mcpu accepts mixed-case values.
diff --git a/test/Driver/arm-no-movt.c b/test/Driver/arm-no-movt.c
index 9684d0e..6908593 100644
--- a/test/Driver/arm-no-movt.c
+++ b/test/Driver/arm-no-movt.c
@@ -4,6 +4,11 @@
 // RUN: %clang -target armv7-apple-darwin -mkernel -### %s 2>&1 \
 // RUN:    | FileCheck %s -check-prefix CHECK-KERNEL
 
+// RUN: %clang -target armv7-none-gnueabi -mno-movt -### %s 2>&1 \
+// RUN:    | FileCheck %s -check-prefix CHECK-NO-MOVT
+
 // CHECK-DEFAULT-NOT: "-target-feature" "+no-movt"
 
 // CHECK-KERNEL: "-target-feature" "+no-movt"
+
+// CHECK-NO-MOVT: "-target-feature" "+no-movt"
diff --git a/test/Driver/debug-options.c b/test/Driver/debug-options.c
index e160b7f..72d0136 100644
--- a/test/Driver/debug-options.c
+++ b/test/Driver/debug-options.c
@@ -169,3 +169,8 @@
 // NOCI-NOT: "-dwarf-column-info"
 //
 // GEXTREFS: "-dwarf-ext-refs" "-fmodule-format=obj" "-debug-info-kind={{standalone|limited}}"
+
+// RUN: not %clang -cc1 -debug-info-kind=watkind 2>&1 | FileCheck -check-prefix=BADSTRING1 %s
+// BADSTRING1: error: invalid value 'watkind' in '-debug-info-kind=watkind'
+// RUN: not %clang -cc1 -debugger-tuning=gmodal 2>&1 | FileCheck -check-prefix=BADSTRING2 %s
+// BADSTRING2: error: invalid value 'gmodal' in '-debugger-tuning=gmodal'
diff --git a/test/Driver/fortran.f95 b/test/Driver/fortran.f95
index 9334cbe..982f4eb 100644
--- a/test/Driver/fortran.f95
+++ b/test/Driver/fortran.f95
@@ -1,9 +1,15 @@
 // Check that the clang driver can invoke gcc to compile Fortran.
 
 // RUN: %clang -target x86_64-unknown-linux-gnu -integrated-as -c %s -### 2>&1 \
-// RUN:   | FileCheck %s
-// CHECK: gcc
-// CHECK: "-S"
-// CHECK: "-x" "f95"
-// CHECK: clang
-// CHECK: "-cc1as"
+// RUN:   | FileCheck --check-prefix=CHECK-OBJECT %s
+// CHECK-OBJECT: gcc
+// CHECK-OBJECT: "-c"
+// CHECK-OBJECT: "-x" "f95"
+// CHECK-OBJECT-NOT: cc1as
+
+// RUN: %clang -target x86_64-unknown-linux-gnu -integrated-as -S %s -### 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-ASM %s
+// CHECK-ASM: gcc
+// CHECK-ASM: "-S"
+// CHECK-ASM: "-x" "f95"
+// CHECK-ASM-NOT: cc1
diff --git a/test/Driver/instrprof-ld.c b/test/Driver/instrprof-ld.c
index b3ba12e..05f65d6 100644
--- a/test/Driver/instrprof-ld.c
+++ b/test/Driver/instrprof-ld.c
@@ -105,3 +105,19 @@
 //
 // CHECK-WATCHOS-ARMV7: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
 // CHECK-WATCHOS-ARMV7: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}darwin{{/|\\\\}}libclang_rt.profile_watchos.a"
+//
+// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN:     -target i386-pc-win32 -fprofile-instr-generate \
+// RUN:     -resource-dir=%S/Inputs/resource_dir \
+// RUN:   | FileCheck --check-prefix=CHECK-WINDOWS-I386 %s
+//
+// CHECK-WINDOWS-I386: "{{.*}}link{{(.exe)?}}"
+// CHECK-WINDOWS-I386: "{{.*}}clang_rt.profile-i386.lib"
+//
+// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN:     -target x86_64-pc-win32 -fprofile-instr-generate \
+// RUN:     -resource-dir=%S/Inputs/resource_dir \
+// RUN:   | FileCheck --check-prefix=CHECK-WINDOWS-X86-64 %s
+//
+// CHECK-WINDOWS-X86-64: "{{.*}}link{{(.exe)?}}"
+// CHECK-WINDOWS-X86-64: "{{.*}}clang_rt.profile-x86_64.lib"
diff --git a/test/Driver/ps4-analyzer-defaults.cpp b/test/Driver/ps4-analyzer-defaults.cpp
new file mode 100644
index 0000000..e1649a3
--- /dev/null
+++ b/test/Driver/ps4-analyzer-defaults.cpp
@@ -0,0 +1,33 @@
+// Check that the default analyzer checkers for PS4 are:
+//   core
+//   cplusplus
+//   deadcode
+//   nullability
+//   unix
+// Excluding:
+//   unix.API
+//   unix.Vfork
+
+// Check for expected checkers
+// RUN: %clang -target x86_64-scei-ps4 --analyze %s -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-PS4-POS-CHECKERS
+//
+// Negative check for unexpected checkers
+// RUN: %clang -target x86_64-scei-ps4 --analyze %s -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-PS4-NEG-CHECKERS
+//
+// Check for all unix checkers except API and Vfork
+// RUN: %clang -target x86_64-scei-ps4 --analyze %s -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-PS4-UNIX-CHECKERS
+
+// CHECK-PS4-POS-CHECKERS-DAG: analyzer-checker=core
+// CHECK-PS4-POS-CHECKERS-DAG: analyzer-checker=cplusplus
+// CHECK-PS4-POS-CHECKERS-DAG: analyzer-checker=deadcode
+// CHECK-PS4-POS-CHECKERS-DAG: analyzer-checker=nullability
+//
+// CHECK-PS4-NEG-CHECKERS-NOT: analyzer-checker={{osx|security}}
+//
+// CHECK-PS4-UNIX-CHECKERS: analyzer-checker=unix
+// CHECK-PS4-UNIX-CHECKERS-DAG: analyzer-disable-checker=unix.API
+// CHECK-PS4-UNIX-CHECKERS-DAG: analyzer-disable-checker=unix.Vfork
+// CHECK-PS4-UNIX-CHECKERS-NOT: analyzer-checker=unix.{{API|Vfork}}
diff --git a/test/Index/cindex-test-inclusions.c b/test/Index/cindex-test-inclusions.c
index 9c7de2e..b85cd24 100644
--- a/test/Index/cindex-test-inclusions.c
+++ b/test/Index/cindex-test-inclusions.c
@@ -11,3 +11,14 @@
 // CHECK: included by:
 // CHECK: include_test.h:1:10
 // CHECK: cindex-test-inclusions.c:3:10
+
+// RUN: env CINDEXTEST_EDITING=1 c-index-test -test-inclusion-stack-source %s 2>&1 | FileCheck -check-prefix=REPARSE %s
+// REPARSE: include_test_2.h
+// REPARSE: included by:
+// REPARSE: include_test.h:1:10
+// REPARSE: cindex-test-inclusions.c:3:10
+// REPARSE: include_test.h
+// REPARSE: included by:
+// REPARSE: cindex-test-inclusions.c:3:10
+// REPARSE: cindex-test-inclusions.c
+// REPARSE: included by:
diff --git a/test/Modules/ModuleDebugInfo.cpp b/test/Modules/ModuleDebugInfo.cpp
index 81192cb..82500f0 100644
--- a/test/Modules/ModuleDebugInfo.cpp
+++ b/test/Modules/ModuleDebugInfo.cpp
@@ -13,7 +13,7 @@
 // PCH:
 // RUN: %clang_cc1 -triple %itanium_abi_triple -x c++ -std=c++11 -emit-pch -fmodule-format=obj -I %S/Inputs -o %t.pch %S/Inputs/DebugCXX.h -mllvm -debug-only=pchcontainer &>%t-pch.ll
 // RUN: cat %t-pch.ll | FileCheck %s
-// RUN: cat %t-mod.ll | FileCheck --check-prefix=CHECK-NEG %s
+// RUN: cat %t-pch.ll | FileCheck --check-prefix=CHECK-NEG %s
 
 #ifdef MODULES
 @import DebugCXX;
diff --git a/test/Modules/tag-injection.cpp b/test/Modules/tag-injection.cpp
new file mode 100644
index 0000000..75c8b5f
--- /dev/null
+++ b/test/Modules/tag-injection.cpp
@@ -0,0 +1,22 @@
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: touch %t/a.h
+// RUN: echo 'struct X {};' > %t/b.h
+// RUN: echo 'module X { module a { header "a.h" } module b { header "b.h" } }' > %t/x.modulemap
+// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t -x c++ -fmodule-map-file=%t/x.modulemap %s -I%t -verify -fmodules-local-submodule-visibility -std=c++11
+
+#include "a.h"
+
+struct A {
+  // This use of 'struct X' makes the declaration (but not definition) of X visible.
+  virtual void f(struct X *p);
+};
+
+namespace N {
+  struct B : A {
+    void f(struct X *q) override;
+  };
+}
+
+X x; // expected-error {{definition of 'X' must be imported from module 'X.b' before it is required}}
+// expected-note@b.h:1 {{here}}
diff --git a/test/OpenMP/for_simd_ast_print.cpp b/test/OpenMP/for_simd_ast_print.cpp
index 725c727..d4b13ba 100644
--- a/test/OpenMP/for_simd_ast_print.cpp
+++ b/test/OpenMP/for_simd_ast_print.cpp
@@ -14,8 +14,8 @@ template<class T, class N> T reduct(T* arr, N num) {
   N myind;
   T sum = (T)0;
 // CHECK: T sum = (T)0;
-#pragma omp for simd private(myind, g_ind), linear(ind), aligned(arr)
-// CHECK-NEXT: #pragma omp for simd private(myind,g_ind) linear(ind) aligned(arr)
+#pragma omp for simd private(myind, g_ind), linear(ind), aligned(arr) ordered
+// CHECK-NEXT: #pragma omp for simd private(myind,g_ind) linear(ind) aligned(arr) ordered
   for (i = 0; i < num; ++i) {
     myind = ind;
     T cur = arr[myind];
@@ -92,8 +92,8 @@ int main (int argc, char **argv) {
   int k1=0,k2=0;
   static int *a;
 // CHECK: static int *a;
-#pragma omp for simd
-// CHECK-NEXT: #pragma omp for simd
+#pragma omp for simd ordered
+// CHECK-NEXT: #pragma omp for simd ordered
   for (int i=0; i < 2; ++i)*a=2;
 // CHECK-NEXT: for (int i = 0; i < 2; ++i)
 // CHECK-NEXT: *a = 2;
diff --git a/test/OpenMP/for_simd_codegen.cpp b/test/OpenMP/for_simd_codegen.cpp
index cc8193d..e1aa892 100644
--- a/test/OpenMP/for_simd_codegen.cpp
+++ b/test/OpenMP/for_simd_codegen.cpp
@@ -28,24 +28,24 @@ void simple(float *a, float *b, float *c, float *d) {
 // CHECK: [[LB_VAL:%.+]] = load i32, i32* [[LB]],
 // CHECK: store i32 [[LB_VAL]], i32* [[OMP_IV:%[^,]+]],
 
-// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID:[0-9]+]]
-// CHECK: [[UB_VAL:%.+]] = load i32, i32* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]]
+// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]
+// CHECK: [[UB_VAL:%.+]] = load i32, i32* [[UB]]
 // CHECK-NEXT: [[CMP:%.+]] = icmp sle i32 [[IV]], [[UB_VAL]]
 // CHECK-NEXT: br i1 [[CMP]], label %[[SIMPLE_LOOP1_BODY:.+]], label %[[SIMPLE_LOOP1_END:[^,]+]]
   for (int i = 3; i < 32; i += 5) {
 // CHECK: [[SIMPLE_LOOP1_BODY]]
 // Start of body: calculate i from IV:
-// CHECK: [[IV1_1:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]]
+// CHECK: [[IV1_1:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}
 // CHECK: [[CALC_I_1:%.+]] = mul nsw i32 [[IV1_1]], 5
 // CHECK-NEXT: [[CALC_I_2:%.+]] = add nsw i32 3, [[CALC_I_1]]
-// CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]]
+// CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]]
 // ... loop body ...
 // End of body: store into a[i]:
-// CHECK: store float [[RESULT:%.+]], float* {{%.+}}{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]]
+// CHECK: store float [[RESULT:%.+]], float*
     a[i] = b[i] * c[i] * d[i];
-// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]]
+// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]
 // CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i32 [[IV1_2]], 1
-// CHECK-NEXT: store i32 [[ADD1_2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]]
+// CHECK-NEXT: store i32 [[ADD1_2]], i32* [[OMP_IV]]
 // br label %{{.+}}, !llvm.loop !{{.+}}
   }
 // CHECK: [[SIMPLE_LOOP1_END]]
@@ -132,36 +132,36 @@ void simple(float *a, float *b, float *c, float *d) {
 // CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]],
 // CHECK: store i64 [[LB_VAL]], i64* [[OMP_IV3:%[^,]+]],
 
-// CHECK: [[IV3:%.+]] = load i64, i64* [[OMP_IV3]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID:[0-9]+]]
-// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
+// CHECK: [[IV3:%.+]] = load i64, i64* [[OMP_IV3]]
+// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]]
 // CHECK-NEXT: [[CMP3:%.+]] = icmp ule i64 [[IV3]], [[UB_VAL]]
 // CHECK-NEXT: br i1 [[CMP3]], label %[[SIMPLE_LOOP3_BODY:.+]], label %[[SIMPLE_LOOP3_END:[^,]+]]
   for (unsigned long long it = 2000; it >= 600; it-=400) {
 // CHECK: [[SIMPLE_LOOP3_BODY]]
 // Start of body: calculate it from IV:
-// CHECK: [[IV3_0:%.+]] = load i64, i64* [[OMP_IV3]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
+// CHECK: [[IV3_0:%.+]] = load i64, i64* [[OMP_IV3]]
 // CHECK-NEXT: [[LC_IT_1:%.+]] = mul i64 [[IV3_0]], 400
 // CHECK-NEXT: [[LC_IT_2:%.+]] = sub i64 2000, [[LC_IT_1]]
-// CHECK-NEXT: store i64 [[LC_IT_2]], i64* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
+// CHECK-NEXT: store i64 [[LC_IT_2]], i64*
 //
 // Linear start and step are used to calculate current value of the linear variable.
-// CHECK: [[LINSTART:.+]] = load i32, i32* [[LIN_START]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
-// CHECK: [[LINSTEP:.+]] = load i64, i64* [[LIN_STEP]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
-// CHECK-NOT: store i32 {{.+}}, i32* [[LIN_VAR]],{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
-// CHECK: [[GLINSTART:.+]] = load double*, double** [[GLIN_START]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
-// CHECK-NEXT: [[IV3_1:%.+]] = load i64, i64* [[OMP_IV3]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
+// CHECK: [[LINSTART:.+]] = load i32, i32* [[LIN_START]]
+// CHECK: [[LINSTEP:.+]] = load i64, i64* [[LIN_STEP]]
+// CHECK-NOT: store i32 {{.+}}, i32* [[LIN_VAR]],
+// CHECK: [[GLINSTART:.+]] = load double*, double** [[GLIN_START]]
+// CHECK-NEXT: [[IV3_1:%.+]] = load i64, i64* [[OMP_IV3]]
 // CHECK-NEXT: [[MUL:%.+]] = mul i64 [[IV3_1]], 1
 // CHECK: [[GEP:%.+]] = getelementptr{{.*}}[[GLINSTART]]
-// CHECK-NEXT: store double* [[GEP]], double** [[G_PTR_CUR:%[^,]+]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
+// CHECK-NEXT: store double* [[GEP]], double** [[G_PTR_CUR:%[^,]+]]
     *g_ptr++ = 0.0;
-// CHECK: [[GEP_VAL:%.+]] = load double{{.*}}[[G_PTR_CUR]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
-// CHECK: store double{{.*}}[[GEP_VAL]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
+// CHECK: [[GEP_VAL:%.+]] = load double{{.*}}[[G_PTR_CUR]]
+// CHECK: store double{{.*}}[[GEP_VAL]]
     a[it + lin]++;
 // CHECK: [[FLT_INC:%.+]] = fadd float
-// CHECK-NEXT: store float [[FLT_INC]],{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
-// CHECK: [[IV3_2:%.+]] = load i64, i64* [[OMP_IV3]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
+// CHECK-NEXT: store float [[FLT_INC]],
+// CHECK: [[IV3_2:%.+]] = load i64, i64* [[OMP_IV3]]
 // CHECK-NEXT: [[ADD3_2:%.+]] = add i64 [[IV3_2]], 1
-// CHECK-NEXT: store i64 [[ADD3_2]], i64* [[OMP_IV3]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
+// CHECK-NEXT: store i64 [[ADD3_2]], i64* [[OMP_IV3]]
   }
 // CHECK: [[SIMPLE_LOOP3_END]]
 // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}})
@@ -190,22 +190,22 @@ void simple(float *a, float *b, float *c, float *d) {
 // CHECK: [[LB_VAL:%.+]] = load i32, i32* [[LB]],
 // CHECK: store i32 [[LB_VAL]], i32* [[OMP_IV4:%[^,]+]],
 
-// CHECK: [[IV4:%.+]] = load i32, i32* [[OMP_IV4]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID:[0-9]+]]
-// CHECK: [[UB_VAL:%.+]] = load i32, i32* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID]]
+// CHECK: [[IV4:%.+]] = load i32, i32* [[OMP_IV4]]
+// CHECK: [[UB_VAL:%.+]] = load i32, i32* [[UB]]
 // CHECK-NEXT: [[CMP4:%.+]] = icmp sle i32 [[IV4]], [[UB_VAL]]
 // CHECK-NEXT: br i1 [[CMP4]], label %[[SIMPLE_LOOP4_BODY:.+]], label %[[SIMPLE_LOOP4_END:[^,]+]]
   for (short it = 6; it <= 20; it-=-4) {
 // CHECK: [[SIMPLE_LOOP4_BODY]]
 // Start of body: calculate it from IV:
-// CHECK: [[IV4_0:%.+]] = load i32, i32* [[OMP_IV4]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID]]
+// CHECK: [[IV4_0:%.+]] = load i32, i32* [[OMP_IV4]]
 // CHECK-NEXT: [[LC_IT_1:%.+]] = mul nsw i32 [[IV4_0]], 4
 // CHECK-NEXT: [[LC_IT_2:%.+]] = add nsw i32 6, [[LC_IT_1]]
 // CHECK-NEXT: [[LC_IT_3:%.+]] = trunc i32 [[LC_IT_2]] to i16
-// CHECK-NEXT: store i16 [[LC_IT_3]], i16* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID]]
+// CHECK-NEXT: store i16 [[LC_IT_3]], i16*
 
-// CHECK: [[IV4_2:%.+]] = load i32, i32* [[OMP_IV4]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID]]
+// CHECK: [[IV4_2:%.+]] = load i32, i32* [[OMP_IV4]]
 // CHECK-NEXT: [[ADD4_2:%.+]] = add nsw i32 [[IV4_2]], 1
-// CHECK-NEXT: store i32 [[ADD4_2]], i32* [[OMP_IV4]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID]]
+// CHECK-NEXT: store i32 [[ADD4_2]], i32* [[OMP_IV4]]
   }
 // CHECK: [[SIMPLE_LOOP4_END]]
 // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}})
@@ -227,22 +227,22 @@ void simple(float *a, float *b, float *c, float *d) {
 // CHECK: [[LB_VAL:%.+]] = load i32, i32* [[LB]],
 // CHECK: store i32 [[LB_VAL]], i32* [[OMP_IV5:%[^,]+]],
 
-// CHECK: [[IV5:%.+]] = load i32, i32* [[OMP_IV5]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID:[0-9]+]]
-// CHECK: [[UB_VAL:%.+]] = load i32, i32* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID]]
+// CHECK: [[IV5:%.+]] = load i32, i32* [[OMP_IV5]]
+// CHECK: [[UB_VAL:%.+]] = load i32, i32* [[UB]]
 // CHECK-NEXT: [[CMP5:%.+]] = icmp sle i32 [[IV5]], [[UB_VAL]]
 // CHECK-NEXT: br i1 [[CMP5]], label %[[SIMPLE_LOOP5_BODY:.+]], label %[[SIMPLE_LOOP5_END:[^,]+]]
   for (unsigned char it = 'z'; it >= 'a'; it+=-1) {
 // CHECK: [[SIMPLE_LOOP5_BODY]]
 // Start of body: calculate it from IV:
-// CHECK: [[IV5_0:%.+]] = load i32, i32* [[OMP_IV5]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID]]
+// CHECK: [[IV5_0:%.+]] = load i32, i32* [[OMP_IV5]]
 // CHECK-NEXT: [[IV5_1:%.+]] = mul nsw i32 [[IV5_0]], 1
 // CHECK-NEXT: [[LC_IT_1:%.+]] = sub nsw i32 122, [[IV5_1]]
 // CHECK-NEXT: [[LC_IT_2:%.+]] = trunc i32 [[LC_IT_1]] to i8
-// CHECK-NEXT: store i8 [[LC_IT_2]], i8* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID]]
+// CHECK-NEXT: store i8 [[LC_IT_2]], i8*
 
-// CHECK: [[IV5_2:%.+]] = load i32, i32* [[OMP_IV5]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID]]
+// CHECK: [[IV5_2:%.+]] = load i32, i32* [[OMP_IV5]]
 // CHECK-NEXT: [[ADD5_2:%.+]] = add nsw i32 [[IV5_2]], 1
-// CHECK-NEXT: store i32 [[ADD5_2]], i32* [[OMP_IV5]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID]]
+// CHECK-NEXT: store i32 [[ADD5_2]], i32* [[OMP_IV5]]
   }
 // CHECK: [[SIMPLE_LOOP5_END]]
 // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}})
@@ -276,24 +276,24 @@ void simple(float *a, float *b, float *c, float *d) {
 
 // CHECK: br label %[[SIMD_LOOP7_COND:[^,]+]]
 // CHECK: [[SIMD_LOOP7_COND]]
-// CHECK-NEXT: [[IV7:%.+]] = load i64, i64* [[OMP_IV7]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID:[0-9]+]]
-// CHECK-NEXT: [[UB_VAL:%.+]] = load i64, i64* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]]
+// CHECK-NEXT: [[IV7:%.+]] = load i64, i64* [[OMP_IV7]]
+// CHECK-NEXT: [[UB_VAL:%.+]] = load i64, i64* [[UB]]
 // CHECK-NEXT: [[CMP7:%.+]] = icmp sle i64 [[IV7]], [[UB_VAL]]
 // CHECK-NEXT: br i1 [[CMP7]], label %[[SIMPLE_LOOP7_BODY:.+]], label %[[SIMPLE_LOOP7_END:[^,]+]]
   for (long long i = -10; i < 10; i += 3) {
 // CHECK: [[SIMPLE_LOOP7_BODY]]
 // Start of body: calculate i from IV:
-// CHECK: [[IV7_0:%.+]] = load i64, i64* [[OMP_IV7]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]]
+// CHECK: [[IV7_0:%.+]] = load i64, i64* [[OMP_IV7]]
 // CHECK-NEXT: [[LC_IT_1:%.+]] = mul nsw i64 [[IV7_0]], 3
 // CHECK-NEXT: [[LC_IT_2:%.+]] = add nsw i64 -10, [[LC_IT_1]]
-// CHECK-NEXT: store i64 [[LC_IT_2]], i64* [[LC:%[^,]+]],{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]]
-// CHECK-NEXT: [[LC_VAL:%.+]] = load i64, i64* [[LC]]{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]]
+// CHECK-NEXT: store i64 [[LC_IT_2]], i64* [[LC:%[^,]+]],
+// CHECK-NEXT: [[LC_VAL:%.+]] = load i64, i64* [[LC]]
 // CHECK-NEXT: [[CONV:%.+]] = trunc i64 [[LC_VAL]] to i32
-// CHECK-NEXT: store i32 [[CONV]], i32* [[A_PRIV:%[^,]+]],{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]]
+// CHECK-NEXT: store i32 [[CONV]], i32* [[A_PRIV:%[^,]+]],
     A = i;
-// CHECK: [[IV7_2:%.+]] = load i64, i64* [[OMP_IV7]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]]
+// CHECK: [[IV7_2:%.+]] = load i64, i64* [[OMP_IV7]]
 // CHECK-NEXT: [[ADD7_2:%.+]] = add nsw i64 [[IV7_2]], 1
-// CHECK-NEXT: store i64 [[ADD7_2]], i64* [[OMP_IV7]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]]
+// CHECK-NEXT: store i64 [[ADD7_2]], i64* [[OMP_IV7]]
   }
 // CHECK: [[SIMPLE_LOOP7_END]]
 // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}})
@@ -329,23 +329,23 @@ void simple(float *a, float *b, float *c, float *d) {
 
 // CHECK: br label %[[SIMD_LOOP8_COND:[^,]+]]
 // CHECK: [[SIMD_LOOP8_COND]]
-// CHECK-NEXT: [[IV8:%.+]] = load i64, i64* [[OMP_IV8]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID:[0-9]+]]
-// CHECK-NEXT: [[UB_VAL:%.+]] = load i64, i64* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]]
+// CHECK-NEXT: [[IV8:%.+]] = load i64, i64* [[OMP_IV8]]
+// CHECK-NEXT: [[UB_VAL:%.+]] = load i64, i64* [[UB]]
 // CHECK-NEXT: [[CMP8:%.+]] = icmp sle i64 [[IV8]], [[UB_VAL]]
 // CHECK-NEXT: br i1 [[CMP8]], label %[[SIMPLE_LOOP8_BODY:.+]], label %[[SIMPLE_LOOP8_END:[^,]+]]
   for (long long i = -10; i < 10; i += 3) {
 // CHECK: [[SIMPLE_LOOP8_BODY]]
 // Start of body: calculate i from IV:
-// CHECK: [[IV8_0:%.+]] = load i64, i64* [[OMP_IV8]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]]
+// CHECK: [[IV8_0:%.+]] = load i64, i64* [[OMP_IV8]]
 // CHECK-NEXT: [[LC_IT_1:%.+]] = mul nsw i64 [[IV8_0]], 3
 // CHECK-NEXT: [[LC_IT_2:%.+]] = add nsw i64 -10, [[LC_IT_1]]
-// CHECK-NEXT: store i64 [[LC_IT_2]], i64* [[LC:%[^,]+]],{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]]
-// CHECK-NEXT: [[LC_VAL:%.+]] = load i64, i64* [[LC]]{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]]
-// CHECK: store i32 %{{.+}}, i32* [[R_PRIV]],{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]]
+// CHECK-NEXT: store i64 [[LC_IT_2]], i64* [[LC:%[^,]+]],
+// CHECK-NEXT: [[LC_VAL:%.+]] = load i64, i64* [[LC]]
+// CHECK: store i32 %{{.+}}, i32* [[R_PRIV]],
     R *= i;
-// CHECK: [[IV8_2:%.+]] = load i64, i64* [[OMP_IV8]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]]
+// CHECK: [[IV8_2:%.+]] = load i64, i64* [[OMP_IV8]]
 // CHECK-NEXT: [[ADD8_2:%.+]] = add nsw i64 [[IV8_2]], 1
-// CHECK-NEXT: store i64 [[ADD8_2]], i64* [[OMP_IV8]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]]
+// CHECK-NEXT: store i64 [[ADD8_2]], i64* [[OMP_IV8]]
   }
 // CHECK: [[SIMPLE_LOOP8_END]]
 // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}})
@@ -389,27 +389,27 @@ int templ1(T a, T *z) {
 // CHECK: store i64 [[LB_VAL]], i64* [[T1_OMP_IV:%[^,]+]],
 
 // ...
-// CHECK: [[IV:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID:[0-9]+]]
-// CHECK-NEXT: [[UB_VAL:%.+]] = load i64, i64* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]]
+// CHECK: [[IV:%.+]] = load i64, i64* [[T1_OMP_IV]]
+// CHECK-NEXT: [[UB_VAL:%.+]] = load i64, i64* [[UB]]
 // CHECK-NEXT: [[CMP1:%.+]] = icmp sle i64 [[IV]], [[UB_VAL]]
 // CHECK-NEXT: br i1 [[CMP1]], label %[[T1_BODY:.+]], label %[[T1_END:[^,]+]]
 // CHECK: [[T1_BODY]]
 // Loop counters i and j updates:
-// CHECK: [[IV1:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]]
+// CHECK: [[IV1:%.+]] = load i64, i64* [[T1_OMP_IV]]
 // CHECK-NEXT: [[I_1:%.+]] = sdiv i64 [[IV1]], 4
 // CHECK-NEXT: [[I_1_MUL1:%.+]] = mul nsw i64 [[I_1]], 1
 // CHECK-NEXT: [[I_1_ADD0:%.+]] = add nsw i64 0, [[I_1_MUL1]]
 // CHECK-NEXT: [[I_2:%.+]] = trunc i64 [[I_1_ADD0]] to i32
-// CHECK-NEXT: store i32 [[I_2]], i32* {{%.+}}{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]]
-// CHECK: [[IV2:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]]
+// CHECK-NEXT: store i32 [[I_2]], i32*
+// CHECK: [[IV2:%.+]] = load i64, i64* [[T1_OMP_IV]]
 // CHECK-NEXT: [[J_1:%.+]] = srem i64 [[IV2]], 4
 // CHECK-NEXT: [[J_2:%.+]] = mul nsw i64 [[J_1]], 2
 // CHECK-NEXT: [[J_2_ADD0:%.+]] = add nsw i64 0, [[J_2]]
-// CHECK-NEXT: store i64 [[J_2_ADD0]], i64* {{%.+}}{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]]
+// CHECK-NEXT: store i64 [[J_2_ADD0]], i64*
 // simd.for.inc:
-// CHECK: [[IV3:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]]
+// CHECK: [[IV3:%.+]] = load i64, i64* [[T1_OMP_IV]]
 // CHECK-NEXT: [[INC:%.+]] = add nsw i64 [[IV3]], 1
-// CHECK-NEXT: store i64 [[INC]], i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]]
+// CHECK-NEXT: store i64 [[INC]], i64* [[T1_OMP_IV]]
 // CHECK-NEXT: br label {{%.+}}
 // CHECK: [[T1_END]]
 // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}})
@@ -480,14 +480,14 @@ void iter_simple(IterDouble ia, IterDouble ib, IterDouble ic) {
 // CHECK: [[LB_VAL:%.+]] = load i32, i32* [[LB]],
 // CHECK: store i32 [[LB_VAL]], i32* [[IT_OMP_IV:%[^,]+]],
 
-// CHECK: [[IV:%.+]] = load i32, i32* [[IT_OMP_IV]]{{.+}} !llvm.mem.parallel_loop_access ![[ITER_LOOP_ID:[0-9]+]]
-// CHECK-NEXT: [[UB_VAL:%.+]] = load i32, i32* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[ITER_LOOP_ID]]
+// CHECK: [[IV:%.+]] = load i32, i32* [[IT_OMP_IV]]
+// CHECK-NEXT: [[UB_VAL:%.+]] = load i32, i32* [[UB]]
 // CHECK-NEXT: [[CMP:%.+]] = icmp sle i32 [[IV]], [[UB_VAL]]
 // CHECK-NEXT: br i1 [[CMP]], label %[[IT_BODY:[^,]+]], label %[[IT_END:[^,]+]]
   for (IterDouble i = ia; i < ib; ++i) {
 // CHECK: [[IT_BODY]]
 // Start of body: calculate i from index:
-// CHECK: [[IV1:%.+]] = load i32, i32* [[IT_OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[ITER_LOOP_ID]]
+// CHECK: [[IV1:%.+]] = load i32, i32* [[IT_OMP_IV]]
 // Call of operator+ (i, IV).
 // CHECK: {{%.+}} = invoke {{.+}} @{{.*}}IterDouble{{.*}}
 // ... loop body ...
@@ -495,12 +495,12 @@ void iter_simple(IterDouble ia, IterDouble ib, IterDouble ic) {
 // Float multiply and save result.
 // CHECK: [[MULR:%.+]] = fmul double {{%.+}}, 5.000000e-01
 // CHECK-NEXT: invoke {{.+}} @{{.*}}IterDouble{{.*}}
-// CHECK: store double [[MULR:%.+]], double* [[RESULT_ADDR:%.+]], !llvm.mem.parallel_loop_access ![[ITER_LOOP_ID]]
+// CHECK: store double [[MULR:%.+]], double* [[RESULT_ADDR:%.+]]
    ++ic;
 //
-// CHECK: [[IV2:%.+]] = load i32, i32* [[IT_OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[ITER_LOOP_ID]]
+// CHECK: [[IV2:%.+]] = load i32, i32* [[IT_OMP_IV]]
 // CHECK-NEXT: [[ADD2:%.+]] = add nsw i32 [[IV2]], 1
-// CHECK-NEXT: store i32 [[ADD2]], i32* [[IT_OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[ITER_LOOP_ID]]
+// CHECK-NEXT: store i32 [[ADD2]], i32* [[IT_OMP_IV]]
 // br label %{{.*}}, !llvm.loop ![[ITER_LOOP_ID]]
   }
 // CHECK: [[IT_END]]
@@ -533,8 +533,8 @@ void collapsed(float *a, float *b, float *c, float *d) {
 //
   #pragma omp for simd collapse(4)
 
-// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID:[0-9]+]]
-// CHECK: [[UB_VAL:%.+]] = load i32, i32* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
+// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]
+// CHECK: [[UB_VAL:%.+]] = load i32, i32* [[UB]]
 // CHECK-NEXT: [[CMP:%.+]] = icmp ule i32 [[IV]], [[UB_VAL]]
 // CHECK-NEXT: br i1 [[CMP]], label %[[COLL1_BODY:[^,]+]], label %[[COLL1_END:[^,]+]]
   for (i = 1; i < 3; i++) // 2 iterations
@@ -544,25 +544,25 @@ void collapsed(float *a, float *b, float *c, float *d) {
         {
 // CHECK: [[COLL1_BODY]]
 // Start of body: calculate i from index:
-// CHECK: [[IV1:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
+// CHECK: [[IV1:%.+]] = load i32, i32* [[OMP_IV]]
 // Calculation of the loop counters values.
 // CHECK: [[CALC_I_1:%.+]] = udiv i32 [[IV1]], 60
 // CHECK-NEXT: [[CALC_I_1_MUL1:%.+]] = mul i32 [[CALC_I_1]], 1
 // CHECK-NEXT: [[CALC_I_2:%.+]] = add i32 1, [[CALC_I_1_MUL1]]
 // CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]]
-// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
+// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]
 // CHECK-NEXT: [[CALC_J_1:%.+]] = udiv i32 [[IV1_2]], 20
 // CHECK-NEXT: [[CALC_J_2:%.+]] = urem i32 [[CALC_J_1]], 3
 // CHECK-NEXT: [[CALC_J_2_MUL1:%.+]] = mul i32 [[CALC_J_2]], 1
 // CHECK-NEXT: [[CALC_J_3:%.+]] = add i32 2, [[CALC_J_2_MUL1]]
 // CHECK-NEXT: store i32 [[CALC_J_3]], i32* [[LC_J:.+]]
-// CHECK: [[IV1_3:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
+// CHECK: [[IV1_3:%.+]] = load i32, i32* [[OMP_IV]]
 // CHECK-NEXT: [[CALC_K_1:%.+]] = udiv i32 [[IV1_3]], 5
 // CHECK-NEXT: [[CALC_K_2:%.+]] = urem i32 [[CALC_K_1]], 4
 // CHECK-NEXT: [[CALC_K_2_MUL1:%.+]] = mul i32 [[CALC_K_2]], 1
 // CHECK-NEXT: [[CALC_K_3:%.+]] = add i32 3, [[CALC_K_2_MUL1]]
 // CHECK-NEXT: store i32 [[CALC_K_3]], i32* [[LC_K:.+]]
-// CHECK: [[IV1_4:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
+// CHECK: [[IV1_4:%.+]] = load i32, i32* [[OMP_IV]]
 // CHECK-NEXT: [[CALC_L_1:%.+]] = urem i32 [[IV1_4]], 5
 // CHECK-NEXT: [[CALC_L_1_MUL1:%.+]] = mul i32 [[CALC_L_1]], 1
 // CHECK-NEXT: [[CALC_L_2:%.+]] = add i32 4, [[CALC_L_1_MUL1]]
@@ -570,12 +570,12 @@ void collapsed(float *a, float *b, float *c, float *d) {
 // CHECK-NEXT: store i16 [[CALC_L_3]], i16* [[LC_L:.+]]
 // ... loop body ...
 // End of body: store into a[i]:
-// CHECK: store float [[RESULT:%.+]], float* [[RESULT_ADDR:%.+]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
+// CHECK: store float [[RESULT:%.+]], float* [[RESULT_ADDR:%.+]]
     float res = b[j] * c[k];
     a[i] = res * d[l];
-// CHECK: [[IV2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
+// CHECK: [[IV2:%.+]] = load i32, i32* [[OMP_IV]]
 // CHECK-NEXT: [[ADD2:%.+]] = add i32 [[IV2]], 1
-// CHECK-NEXT: store i32 [[ADD2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
+// CHECK-NEXT: store i32 [[ADD2]], i32* [[OMP_IV]]
 // br label %{{[^,]+}}, !llvm.loop ![[COLL1_LOOP_ID]]
 // CHECK: [[COLL1_END]]
   }
@@ -621,8 +621,8 @@ void widened(float *a, float *b, float *c, float *d) {
 //
   #pragma omp for simd collapse(2) private(globalfloat, localint)
 
-// CHECK: [[IV:%.+]] = load i64, i64* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID:[0-9]+]]
-// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]]
+// CHECK: [[IV:%.+]] = load i64, i64* [[OMP_IV]]
+// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]]
 // CHECK-NEXT: [[CMP:%.+]] = icmp sle i64 [[IV]], [[UB_VAL]]
 // CHECK-NEXT: br i1 [[CMP]], label %[[WIDE1_BODY:[^,]+]], label %[[WIDE1_END:[^,]+]]
   for (i = 1; i < 3; i++) // 2 iterations
@@ -630,10 +630,10 @@ void widened(float *a, float *b, float *c, float *d) {
   {
 // CHECK: [[WIDE1_BODY]]
 // Start of body: calculate i from index:
-// CHECK: [[IV1:%.+]] = load i64, i64* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]]
+// CHECK: [[IV1:%.+]] = load i64, i64* [[OMP_IV]]
 // Calculation of the loop counters values...
 // CHECK: store i32 {{[^,]+}}, i32* [[LC_I:.+]]
-// CHECK: [[IV1_2:%.+]] = load i64, i64* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]]
+// CHECK: [[IV1_2:%.+]] = load i64, i64* [[OMP_IV]]
 // CHECK: store i16 {{[^,]+}}, i16* [[LC_J:.+]]
 // ... loop body ...
 //
@@ -642,14 +642,14 @@ void widened(float *a, float *b, float *c, float *d) {
     globalfloat = (float)j/i;
     float res = b[j] * c[j];
 // Store into a[i]:
-// CHECK: store float [[RESULT:%.+]], float* [[RESULT_ADDR:%.+]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]]
+// CHECK: store float [[RESULT:%.+]], float* [[RESULT_ADDR:%.+]]
     a[i] = res * d[i];
 // Then there's a store into private var localint:
-// CHECK: store i32 {{.+}}, i32* [[LOCALINT:%[^,]+]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]]
+// CHECK: store i32 {{.+}}, i32* [[LOCALINT:%[^,]+]]
     localint = (int)j;
-// CHECK: [[IV2:%.+]] = load i64, i64* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]]
+// CHECK: [[IV2:%.+]] = load i64, i64* [[OMP_IV]]
 // CHECK-NEXT: [[ADD2:%.+]] = add nsw i64 [[IV2]], 1
-// CHECK-NEXT: store i64 [[ADD2]], i64* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]]
+// CHECK-NEXT: store i64 [[ADD2]], i64* [[OMP_IV]]
 //
 // br label %{{[^,]+}}, !llvm.loop ![[WIDE1_LOOP_ID]]
 // CHECK: [[WIDE1_END]]
diff --git a/test/OpenMP/for_simd_loop_messages.cpp b/test/OpenMP/for_simd_loop_messages.cpp
index 958c7f6..afd7b0b 100644
--- a/test/OpenMP/for_simd_loop_messages.cpp
+++ b/test/OpenMP/for_simd_loop_messages.cpp
@@ -719,10 +719,18 @@ void test_loop_firstprivate_lastprivate() {
 
 void test_ordered() {
 #pragma omp parallel
-// expected-error@+1 2 {{unexpected OpenMP clause 'ordered' in directive '#pragma omp for simd'}}
 #pragma omp for simd ordered ordered // expected-error {{directive '#pragma omp for simd' cannot contain more than one 'ordered' clause}}
   for (int i = 0; i < 16; ++i)
     ;
+#pragma omp parallel
+#pragma omp for simd ordered
+  for (int i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 {{'ordered' clause with a parameter can not be specified in '#pragma omp for simd' directive}}
+#pragma omp for simd ordered(1)
+  for (int i = 0; i < 16; ++i)
+    ;
 }
 
 void test_nowait() {
diff --git a/test/OpenMP/nesting_of_regions.cpp b/test/OpenMP/nesting_of_regions.cpp
index 6445c6b..b2b87db 100644
--- a/test/OpenMP/nesting_of_regions.cpp
+++ b/test/OpenMP/nesting_of_regions.cpp
@@ -2060,7 +2060,7 @@ void foo() {
   }
 #pragma omp ordered
   {
-#pragma omp parallel for simd ordered //expected-error {{unexpected OpenMP clause 'ordered' in directive '#pragma omp parallel for simd'}}
+#pragma omp parallel for simd ordered
     for (int j = 0; j < 10; ++j) {
 #pragma omp ordered // expected-error {{OpenMP constructs may not be nested inside a simd region}}
       {
@@ -2070,6 +2070,16 @@ void foo() {
   }
 #pragma omp ordered
   {
+#pragma omp parallel for simd ordered
+    for (int j = 0; j < 10; ++j) {
+#pragma omp ordered simd
+      {
+        bar();
+      }
+    }
+  }
+#pragma omp ordered
+  {
 #pragma omp parallel for
     for (int i = 0; i < 10; ++i)
       ;
diff --git a/test/OpenMP/ordered_codegen.cpp b/test/OpenMP/ordered_codegen.cpp
index e77c1be..daa6211 100644
--- a/test/OpenMP/ordered_codegen.cpp
+++ b/test/OpenMP/ordered_codegen.cpp
@@ -224,6 +224,14 @@ void foo_simd(int low, int up) {
 #pragma omp ordered simd
     f[i] = 1.0;
   }
+  // CHECK: store float 0.000000e+00, float* %{{.+}}, align {{[0-9]+}}
+  // CHECK-NEXT: call void [[CAP_FUNC:@.+]](i32* %{{.+}}) #{{[0-9]+}}
+#pragma omp for simd ordered
+  for (int i = low; i < up; ++i) {
+    f[i] = 0.0;
+#pragma omp ordered simd
+    f[i] = 1.0;
+  }
 }
 
 // CHECK: define internal void [[CAP_FUNC]](i32* dereferenceable({{[0-9]+}}) %{{.+}}) #
diff --git a/test/OpenMP/parallel_for_simd_ast_print.cpp b/test/OpenMP/parallel_for_simd_ast_print.cpp
index e23c2cb..1b9415d 100644
--- a/test/OpenMP/parallel_for_simd_ast_print.cpp
+++ b/test/OpenMP/parallel_for_simd_ast_print.cpp
@@ -44,8 +44,8 @@ template<class T> struct S {
     }
     const T clen = 3;
 // CHECK: T clen = 3;
-    #pragma omp parallel for simd safelen(clen-1) simdlen(clen-1)
-// CHECK-NEXT: #pragma omp parallel for simd safelen(clen - 1) simdlen(clen - 1)
+    #pragma omp parallel for simd safelen(clen-1) simdlen(clen-1) ordered
+// CHECK-NEXT: #pragma omp parallel for simd safelen(clen - 1) simdlen(clen - 1) ordered
     for(T i = clen+2; i < 20; ++i) {
 // CHECK-NEXT: for (T i = clen + 2; i < 20; ++i) {
       v[i] = v[v-clen] + 1;
@@ -92,8 +92,8 @@ int main (int argc, char **argv) {
   int k1=0,k2=0;
   static int *a;
 // CHECK: static int *a;
-#pragma omp parallel for simd if(parallel :b)
-// CHECK-NEXT: #pragma omp parallel for simd if(parallel: b)
+#pragma omp parallel for simd if(parallel :b) ordered
+// CHECK-NEXT: #pragma omp parallel for simd if(parallel: b) ordered
   for (int i=0; i < 2; ++i)*a=2;
 // CHECK-NEXT: for (int i = 0; i < 2; ++i)
 // CHECK-NEXT: *a = 2;
diff --git a/test/OpenMP/parallel_for_simd_codegen.cpp b/test/OpenMP/parallel_for_simd_codegen.cpp
index 53f18d0..9a6ad2f 100644
--- a/test/OpenMP/parallel_for_simd_codegen.cpp
+++ b/test/OpenMP/parallel_for_simd_codegen.cpp
@@ -41,24 +41,24 @@ void simple(float *a, float *b, float *c, float *d) {
 // CHECK: [[LB_VAL:%.+]] = load i32, i32* [[LB]],
 // CHECK: store i32 [[LB_VAL]], i32* [[OMP_IV:%[^,]+]],
 
-// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID:[0-9]+]]
-// CHECK: [[UB_VAL:%.+]] = load i32, i32* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]]
+// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]
+// CHECK: [[UB_VAL:%.+]] = load i32, i32* [[UB]]
 // CHECK-NEXT: [[CMP:%.+]] = icmp sle i32 [[IV]], [[UB_VAL]]
 // CHECK-NEXT: br i1 [[CMP]], label %[[SIMPLE_LOOP1_BODY:.+]], label %[[SIMPLE_LOOP1_END:[^,]+]]
   for (int i = 3; i < 32; i += 5) {
 // CHECK: [[SIMPLE_LOOP1_BODY]]
 // Start of body: calculate i from IV:
-// CHECK: [[IV1_1:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]]
+// CHECK: [[IV1_1:%.+]] = load i32, i32* [[OMP_IV]]
 // CHECK: [[CALC_I_1:%.+]] = mul nsw i32 [[IV1_1]], 5
 // CHECK-NEXT: [[CALC_I_2:%.+]] = add nsw i32 3, [[CALC_I_1]]
-// CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]]
+// CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]]
 // ... loop body ...
 // End of body: store into a[i]:
-// CHECK: store float [[RESULT:%.+]], float* {{%.+}}{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]]
+// CHECK: store float [[RESULT:%.+]], float*
     a[i] = b[i] * c[i] * d[i];
-// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]]
+// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]
 // CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i32 [[IV1_2]], 1
-// CHECK-NEXT: store i32 [[ADD1_2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]]
+// CHECK-NEXT: store i32 [[ADD1_2]], i32* [[OMP_IV]]
 // br label %{{.+}}, !llvm.loop !{{.+}}
   }
 // CHECK: [[SIMPLE_LOOP1_END]]
@@ -141,36 +141,36 @@ void simple(float *a, float *b, float *c, float *d) {
 // CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]],
 // CHECK: store i64 [[LB_VAL]], i64* [[OMP_IV3:%[^,]+]],
 
-// CHECK: [[IV3:%.+]] = load i64, i64* [[OMP_IV3]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID:[0-9]+]]
-// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
+// CHECK: [[IV3:%.+]] = load i64, i64* [[OMP_IV3]]
+// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]]
 // CHECK-NEXT: [[CMP3:%.+]] = icmp ule i64 [[IV3]], [[UB_VAL]]
 // CHECK-NEXT: br i1 [[CMP3]], label %[[SIMPLE_LOOP3_BODY:.+]], label %[[SIMPLE_LOOP3_END:[^,]+]]
   for (unsigned long long it = 2000; it >= 600; it-=400) {
 // CHECK: [[SIMPLE_LOOP3_BODY]]
 // Start of body: calculate it from IV:
-// CHECK: [[IV3_0:%.+]] = load i64, i64* [[OMP_IV3]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
+// CHECK: [[IV3_0:%.+]] = load i64, i64* [[OMP_IV3]]
 // CHECK-NEXT: [[LC_IT_1:%.+]] = mul i64 [[IV3_0]], 400
 // CHECK-NEXT: [[LC_IT_2:%.+]] = sub i64 2000, [[LC_IT_1]]
-// CHECK-NEXT: store i64 [[LC_IT_2]], i64* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
+// CHECK-NEXT: store i64 [[LC_IT_2]], i64* {{.+}}
 //
 // Linear start and step are used to calculate current value of the linear variable.
-// CHECK: [[LINSTART:.+]] = load i32, i32* [[LIN_START]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
-// CHECK: [[LINSTEP:.+]] = load i64, i64* [[LIN_STEP]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
-// CHECK-NOT: store i32 {{.+}}, i32* [[LIN_VAR]],{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
-// CHECK: [[GLINSTART:.+]] = load double*, double** [[GLIN_START]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
-// CHECK-NEXT: [[IV3_1:%.+]] = load i64, i64* [[OMP_IV3]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
+// CHECK: [[LINSTART:.+]] = load i32, i32* [[LIN_START]]
+// CHECK: [[LINSTEP:.+]] = load i64, i64* [[LIN_STEP]]
+// CHECK-NOT: store i32 {{.+}}, i32* [[LIN_VAR]]
+// CHECK: [[GLINSTART:.+]] = load double*, double** [[GLIN_START]]
+// CHECK-NEXT: [[IV3_1:%.+]] = load i64, i64* [[OMP_IV3]]
 // CHECK-NEXT: [[MUL:%.+]] = mul i64 [[IV3_1]], 1
 // CHECK: [[GEP:%.+]] = getelementptr{{.*}}[[GLINSTART]]
-// CHECK-NEXT: store double* [[GEP]], double** [[G_PTR_CUR:%[^,]+]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
+// CHECK-NEXT: store double* [[GEP]], double** [[G_PTR_CUR:%[^,]+]]
     *g_ptr++ = 0.0;
-// CHECK: [[GEP_VAL:%.+]] = load double{{.*}}[[G_PTR_CUR]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
-// CHECK: store double{{.*}}[[GEP_VAL]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
+// CHECK: [[GEP_VAL:%.+]] = load double{{.*}}[[G_PTR_CUR]]
+// CHECK: store double{{.*}}[[GEP_VAL]]
     a[it + lin]++;
 // CHECK: [[FLT_INC:%.+]] = fadd float
-// CHECK-NEXT: store float [[FLT_INC]],{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
-// CHECK: [[IV3_2:%.+]] = load i64, i64* [[OMP_IV3]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
+// CHECK-NEXT: store float [[FLT_INC]],
+// CHECK: [[IV3_2:%.+]] = load i64, i64* [[OMP_IV3]]
 // CHECK-NEXT: [[ADD3_2:%.+]] = add i64 [[IV3_2]], 1
-// CHECK-NEXT: store i64 [[ADD3_2]], i64* [[OMP_IV3]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
+// CHECK-NEXT: store i64 [[ADD3_2]], i64* [[OMP_IV3]]
   }
 // CHECK: [[SIMPLE_LOOP3_END]]
 // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}})
@@ -198,22 +198,22 @@ void simple(float *a, float *b, float *c, float *d) {
 // CHECK: [[LB_VAL:%.+]] = load i32, i32* [[LB]],
 // CHECK: store i32 [[LB_VAL]], i32* [[OMP_IV4:%[^,]+]],
 
-// CHECK: [[IV4:%.+]] = load i32, i32* [[OMP_IV4]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID:[0-9]+]]
-// CHECK: [[UB_VAL:%.+]] = load i32, i32* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID]]
+// CHECK: [[IV4:%.+]] = load i32, i32* [[OMP_IV4]]
+// CHECK: [[UB_VAL:%.+]] = load i32, i32* [[UB]]
 // CHECK-NEXT: [[CMP4:%.+]] = icmp sle i32 [[IV4]], [[UB_VAL]]
 // CHECK-NEXT: br i1 [[CMP4]], label %[[SIMPLE_LOOP4_BODY:.+]], label %[[SIMPLE_LOOP4_END:[^,]+]]
   for (short it = 6; it <= 20; it-=-4) {
 // CHECK: [[SIMPLE_LOOP4_BODY]]
 // Start of body: calculate it from IV:
-// CHECK: [[IV4_0:%.+]] = load i32, i32* [[OMP_IV4]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID]]
+// CHECK: [[IV4_0:%.+]] = load i32, i32* [[OMP_IV4]]
 // CHECK-NEXT: [[LC_IT_1:%.+]] = mul nsw i32 [[IV4_0]], 4
 // CHECK-NEXT: [[LC_IT_2:%.+]] = add nsw i32 6, [[LC_IT_1]]
 // CHECK-NEXT: [[LC_IT_3:%.+]] = trunc i32 [[LC_IT_2]] to i16
-// CHECK-NEXT: store i16 [[LC_IT_3]], i16* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID]]
+// CHECK-NEXT: store i16 [[LC_IT_3]], i16*
 
-// CHECK: [[IV4_2:%.+]] = load i32, i32* [[OMP_IV4]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID]]
+// CHECK: [[IV4_2:%.+]] = load i32, i32* [[OMP_IV4]]
 // CHECK-NEXT: [[ADD4_2:%.+]] = add nsw i32 [[IV4_2]], 1
-// CHECK-NEXT: store i32 [[ADD4_2]], i32* [[OMP_IV4]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID]]
+// CHECK-NEXT: store i32 [[ADD4_2]], i32* [[OMP_IV4]]
   }
 // CHECK: [[SIMPLE_LOOP4_END]]
 // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}})
@@ -234,22 +234,22 @@ void simple(float *a, float *b, float *c, float *d) {
 // CHECK: [[LB_VAL:%.+]] = load i32, i32* [[LB]],
 // CHECK: store i32 [[LB_VAL]], i32* [[OMP_IV5:%[^,]+]],
 
-// CHECK: [[IV5:%.+]] = load i32, i32* [[OMP_IV5]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID:[0-9]+]]
-// CHECK: [[UB_VAL:%.+]] = load i32, i32* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID]]
+// CHECK: [[IV5:%.+]] = load i32, i32* [[OMP_IV5]]
+// CHECK: [[UB_VAL:%.+]] = load i32, i32* [[UB]]
 // CHECK-NEXT: [[CMP5:%.+]] = icmp sle i32 [[IV5]], [[UB_VAL]]
 // CHECK-NEXT: br i1 [[CMP5]], label %[[SIMPLE_LOOP5_BODY:.+]], label %[[SIMPLE_LOOP5_END:[^,]+]]
   for (unsigned char it = 'z'; it >= 'a'; it+=-1) {
 // CHECK: [[SIMPLE_LOOP5_BODY]]
 // Start of body: calculate it from IV:
-// CHECK: [[IV5_0:%.+]] = load i32, i32* [[OMP_IV5]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID]]
+// CHECK: [[IV5_0:%.+]] = load i32, i32* [[OMP_IV5]]
 // CHECK-NEXT: [[IV5_1:%.+]] = mul nsw i32 [[IV5_0]], 1
 // CHECK-NEXT: [[LC_IT_1:%.+]] = sub nsw i32 122, [[IV5_1]]
 // CHECK-NEXT: [[LC_IT_2:%.+]] = trunc i32 [[LC_IT_1]] to i8
-// CHECK-NEXT: store i8 [[LC_IT_2]], i8* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID]]
+// CHECK-NEXT: store i8 [[LC_IT_2]], i8* {{.+}},
 
-// CHECK: [[IV5_2:%.+]] = load i32, i32* [[OMP_IV5]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID]]
+// CHECK: [[IV5_2:%.+]] = load i32, i32* [[OMP_IV5]]
 // CHECK-NEXT: [[ADD5_2:%.+]] = add nsw i32 [[IV5_2]], 1
-// CHECK-NEXT: store i32 [[ADD5_2]], i32* [[OMP_IV5]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID]]
+// CHECK-NEXT: store i32 [[ADD5_2]], i32* [[OMP_IV5]]
   }
 // CHECK: [[SIMPLE_LOOP5_END]]
 // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}})
@@ -280,24 +280,24 @@ void simple(float *a, float *b, float *c, float *d) {
 
 // CHECK: br label %[[SIMD_LOOP7_COND:[^,]+]]
 // CHECK: [[SIMD_LOOP7_COND]]
-// CHECK-NEXT: [[IV7:%.+]] = load i64, i64* [[OMP_IV7]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID:[0-9]+]]
-// CHECK-NEXT: [[UB_VAL:%.+]] = load i64, i64* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]]
+// CHECK-NEXT: [[IV7:%.+]] = load i64, i64* [[OMP_IV7]]
+// CHECK-NEXT: [[UB_VAL:%.+]] = load i64, i64* [[UB]]
 // CHECK-NEXT: [[CMP7:%.+]] = icmp sle i64 [[IV7]], [[UB_VAL]]
 // CHECK-NEXT: br i1 [[CMP7]], label %[[SIMPLE_LOOP7_BODY:.+]], label %[[SIMPLE_LOOP7_END:[^,]+]]
   for (long long i = -10; i < 10; i += 3) {
 // CHECK: [[SIMPLE_LOOP7_BODY]]
 // Start of body: calculate i from IV:
-// CHECK: [[IV7_0:%.+]] = load i64, i64* [[OMP_IV7]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]]
+// CHECK: [[IV7_0:%.+]] = load i64, i64* [[OMP_IV7]]
 // CHECK-NEXT: [[LC_IT_1:%.+]] = mul nsw i64 [[IV7_0]], 3
 // CHECK-NEXT: [[LC_IT_2:%.+]] = add nsw i64 -10, [[LC_IT_1]]
-// CHECK-NEXT: store i64 [[LC_IT_2]], i64* [[LC:%[^,]+]],{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]]
-// CHECK-NEXT: [[LC_VAL:%.+]] = load i64, i64* [[LC]]{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]]
+// CHECK-NEXT: store i64 [[LC_IT_2]], i64* [[LC:%[^,]+]],
+// CHECK-NEXT: [[LC_VAL:%.+]] = load i64, i64* [[LC]]
 // CHECK-NEXT: [[CONV:%.+]] = trunc i64 [[LC_VAL]] to i32
-// CHECK-NEXT: store i32 [[CONV]], i32* [[A_PRIV:%[^,]+]],{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]]
+// CHECK-NEXT: store i32 [[CONV]], i32* [[A_PRIV:%[^,]+]],
     A = i;
-// CHECK: [[IV7_2:%.+]] = load i64, i64* [[OMP_IV7]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]]
+// CHECK: [[IV7_2:%.+]] = load i64, i64* [[OMP_IV7]]
 // CHECK-NEXT: [[ADD7_2:%.+]] = add nsw i64 [[IV7_2]], 1
-// CHECK-NEXT: store i64 [[ADD7_2]], i64* [[OMP_IV7]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]]
+// CHECK-NEXT: store i64 [[ADD7_2]], i64* [[OMP_IV7]]
   }
 // CHECK: [[SIMPLE_LOOP7_END]]
 // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}})
@@ -330,23 +330,23 @@ void simple(float *a, float *b, float *c, float *d) {
 
 // CHECK: br label %[[SIMD_LOOP8_COND:[^,]+]]
 // CHECK: [[SIMD_LOOP8_COND]]
-// CHECK-NEXT: [[IV8:%.+]] = load i64, i64* [[OMP_IV8]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID:[0-9]+]]
-// CHECK-NEXT: [[UB_VAL:%.+]] = load i64, i64* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]]
+// CHECK-NEXT: [[IV8:%.+]] = load i64, i64* [[OMP_IV8]]
+// CHECK-NEXT: [[UB_VAL:%.+]] = load i64, i64* [[UB]]
 // CHECK-NEXT: [[CMP8:%.+]] = icmp sle i64 [[IV8]], [[UB_VAL]]
 // CHECK-NEXT: br i1 [[CMP8]], label %[[SIMPLE_LOOP8_BODY:.+]], label %[[SIMPLE_LOOP8_END:[^,]+]]
   for (long long i = -10; i < 10; i += 3) {
 // CHECK: [[SIMPLE_LOOP8_BODY]]
 // Start of body: calculate i from IV:
-// CHECK: [[IV8_0:%.+]] = load i64, i64* [[OMP_IV8]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]]
+// CHECK: [[IV8_0:%.+]] = load i64, i64* [[OMP_IV8]]
 // CHECK-NEXT: [[LC_IT_1:%.+]] = mul nsw i64 [[IV8_0]], 3
 // CHECK-NEXT: [[LC_IT_2:%.+]] = add nsw i64 -10, [[LC_IT_1]]
-// CHECK-NEXT: store i64 [[LC_IT_2]], i64* [[LC:%[^,]+]],{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]]
-// CHECK-NEXT: [[LC_VAL:%.+]] = load i64, i64* [[LC]]{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]]
-// CHECK: store i32 %{{.+}}, i32* [[R_PRIV]],{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]]
+// CHECK-NEXT: store i64 [[LC_IT_2]], i64* [[LC:%[^,]+]],
+// CHECK-NEXT: [[LC_VAL:%.+]] = load i64, i64* [[LC]]
+// CHECK: store i32 %{{.+}}, i32* [[R_PRIV]],
     R *= i;
-// CHECK: [[IV8_2:%.+]] = load i64, i64* [[OMP_IV8]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]]
+// CHECK: [[IV8_2:%.+]] = load i64, i64* [[OMP_IV8]]
 // CHECK-NEXT: [[ADD8_2:%.+]] = add nsw i64 [[IV8_2]], 1
-// CHECK-NEXT: store i64 [[ADD8_2]], i64* [[OMP_IV8]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]]
+// CHECK-NEXT: store i64 [[ADD8_2]], i64* [[OMP_IV8]]
   }
 // CHECK: [[SIMPLE_LOOP8_END]]
 // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}})
@@ -438,14 +438,14 @@ void iter_simple(IterDouble ia, IterDouble ib, IterDouble ic) {
 // CHECK: [[LB_VAL:%.+]] = load i32, i32* [[LB]],
 // CHECK: store i32 [[LB_VAL]], i32* [[IT_OMP_IV:%[^,]+]],
 
-// CHECK: [[IV:%.+]] = load i32, i32* [[IT_OMP_IV]]{{.+}} !llvm.mem.parallel_loop_access ![[ITER_LOOP_ID:[0-9]+]]
-// CHECK-NEXT: [[UB_VAL:%.+]] = load i32, i32* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[ITER_LOOP_ID]]
+// CHECK: [[IV:%.+]] = load i32, i32* [[IT_OMP_IV]]
+// CHECK-NEXT: [[UB_VAL:%.+]] = load i32, i32* [[UB]]
 // CHECK-NEXT: [[CMP:%.+]] = icmp sle i32 [[IV]], [[UB_VAL]]
 // CHECK-NEXT: br i1 [[CMP]], label %[[IT_BODY:[^,]+]], label %[[IT_END:[^,]+]]
   for (IterDouble i = ia; i < ib; ++i) {
 // CHECK: [[IT_BODY]]
 // Start of body: calculate i from index:
-// CHECK: [[IV1:%.+]] = load i32, i32* [[IT_OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[ITER_LOOP_ID]]
+// CHECK: [[IV1:%.+]] = load i32, i32* [[IT_OMP_IV]]
 // Call of operator+ (i, IV).
 // CHECK: {{%.+}} = invoke {{.+}} @{{.*}}IterDouble{{.*}}
 // ... loop body ...
@@ -453,12 +453,12 @@ void iter_simple(IterDouble ia, IterDouble ib, IterDouble ic) {
 // Float multiply and save result.
 // CHECK: [[MULR:%.+]] = fmul double {{%.+}}, 5.000000e-01
 // CHECK-NEXT: invoke {{.+}} @{{.*}}IterDouble{{.*}}
-// CHECK: store double [[MULR:%.+]], double* [[RESULT_ADDR:%.+]], !llvm.mem.parallel_loop_access ![[ITER_LOOP_ID]]
+// CHECK: store double [[MULR:%.+]], double* [[RESULT_ADDR:%.+]]
    ++ic;
 //
-// CHECK: [[IV2:%.+]] = load i32, i32* [[IT_OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[ITER_LOOP_ID]]
+// CHECK: [[IV2:%.+]] = load i32, i32* [[IT_OMP_IV]]
 // CHECK-NEXT: [[ADD2:%.+]] = add nsw i32 [[IV2]], 1
-// CHECK-NEXT: store i32 [[ADD2]], i32* [[IT_OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[ITER_LOOP_ID]]
+// CHECK-NEXT: store i32 [[ADD2]], i32* [[IT_OMP_IV]]
 // br label %{{.*}}, !llvm.loop ![[ITER_LOOP_ID]]
   }
 // CHECK: [[IT_END]]
@@ -490,8 +490,8 @@ void collapsed(float *a, float *b, float *c, float *d) {
 //
   #pragma omp parallel for simd collapse(4)
 
-// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID:[0-9]+]]
-// CHECK: [[UB_VAL:%.+]] = load i32, i32* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
+// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]
+// CHECK: [[UB_VAL:%.+]] = load i32, i32* [[UB]]
 // CHECK-NEXT: [[CMP:%.+]] = icmp ule i32 [[IV]], [[UB_VAL]]
 // CHECK-NEXT: br i1 [[CMP]], label %[[COLL1_BODY:[^,]+]], label %[[COLL1_END:[^,]+]]
   for (i = 1; i < 3; i++) // 2 iterations
@@ -501,25 +501,25 @@ void collapsed(float *a, float *b, float *c, float *d) {
         {
 // CHECK: [[COLL1_BODY]]
 // Start of body: calculate i from index:
-// CHECK: [[IV1:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
+// CHECK: [[IV1:%.+]] = load i32, i32* [[OMP_IV]]
 // Calculation of the loop counters values.
 // CHECK: [[CALC_I_1:%.+]] = udiv i32 [[IV1]], 60
 // CHECK-NEXT: [[CALC_I_1_MUL1:%.+]] = mul i32 [[CALC_I_1]], 1
 // CHECK-NEXT: [[CALC_I_2:%.+]] = add i32 1, [[CALC_I_1_MUL1]]
 // CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]]
-// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
+// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]
 // CHECK-NEXT: [[CALC_J_1:%.+]] = udiv i32 [[IV1_2]], 20
 // CHECK-NEXT: [[CALC_J_2:%.+]] = urem i32 [[CALC_J_1]], 3
 // CHECK-NEXT: [[CALC_J_2_MUL1:%.+]] = mul i32 [[CALC_J_2]], 1
 // CHECK-NEXT: [[CALC_J_3:%.+]] = add i32 2, [[CALC_J_2_MUL1]]
 // CHECK-NEXT: store i32 [[CALC_J_3]], i32* [[LC_J:.+]]
-// CHECK: [[IV1_3:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
+// CHECK: [[IV1_3:%.+]] = load i32, i32* [[OMP_IV]]
 // CHECK-NEXT: [[CALC_K_1:%.+]] = udiv i32 [[IV1_3]], 5
 // CHECK-NEXT: [[CALC_K_2:%.+]] = urem i32 [[CALC_K_1]], 4
 // CHECK-NEXT: [[CALC_K_2_MUL1:%.+]] = mul i32 [[CALC_K_2]], 1
 // CHECK-NEXT: [[CALC_K_3:%.+]] = add i32 3, [[CALC_K_2_MUL1]]
 // CHECK-NEXT: store i32 [[CALC_K_3]], i32* [[LC_K:.+]]
-// CHECK: [[IV1_4:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
+// CHECK: [[IV1_4:%.+]] = load i32, i32* [[OMP_IV]]
 // CHECK-NEXT: [[CALC_L_1:%.+]] = urem i32 [[IV1_4]], 5
 // CHECK-NEXT: [[CALC_L_1_MUL1:%.+]] = mul i32 [[CALC_L_1]], 1
 // CHECK-NEXT: [[CALC_L_2:%.+]] = add i32 4, [[CALC_L_1_MUL1]]
@@ -527,12 +527,12 @@ void collapsed(float *a, float *b, float *c, float *d) {
 // CHECK-NEXT: store i16 [[CALC_L_3]], i16* [[LC_L:.+]]
 // ... loop body ...
 // End of body: store into a[i]:
-// CHECK: store float [[RESULT:%.+]], float* [[RESULT_ADDR:%.+]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
+// CHECK: store float [[RESULT:%.+]], float* [[RESULT_ADDR:%.+]]
     float res = b[j] * c[k];
     a[i] = res * d[l];
-// CHECK: [[IV2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
+// CHECK: [[IV2:%.+]] = load i32, i32* [[OMP_IV]]
 // CHECK-NEXT: [[ADD2:%.+]] = add i32 [[IV2]], 1
-// CHECK-NEXT: store i32 [[ADD2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
+// CHECK-NEXT: store i32 [[ADD2]], i32* [[OMP_IV]]
 // br label %{{[^,]+}}, !llvm.loop ![[COLL1_LOOP_ID]]
 // CHECK: [[COLL1_END]]
   }
@@ -577,8 +577,8 @@ void widened(float *a, float *b, float *c, float *d) {
 //
   #pragma omp parallel for simd collapse(2) private(globalfloat, localint)
 
-// CHECK: [[IV:%.+]] = load i64, i64* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID:[0-9]+]]
-// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]]
+// CHECK: [[IV:%.+]] = load i64, i64* [[OMP_IV]]
+// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]]
 // CHECK-NEXT: [[CMP:%.+]] = icmp sle i64 [[IV]], [[UB_VAL]]
 // CHECK-NEXT: br i1 [[CMP]], label %[[WIDE1_BODY:[^,]+]], label %[[WIDE1_END:[^,]+]]
   for (i = 1; i < 3; i++) // 2 iterations
@@ -586,10 +586,10 @@ void widened(float *a, float *b, float *c, float *d) {
   {
 // CHECK: [[WIDE1_BODY]]
 // Start of body: calculate i from index:
-// CHECK: [[IV1:%.+]] = load i64, i64* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]]
+// CHECK: [[IV1:%.+]] = load i64, i64* [[OMP_IV]]
 // Calculation of the loop counters values...
 // CHECK: store i32 {{[^,]+}}, i32* [[LC_I:.+]]
-// CHECK: [[IV1_2:%.+]] = load i64, i64* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]]
+// CHECK: [[IV1_2:%.+]] = load i64, i64* [[OMP_IV]]
 // CHECK: store i16 {{[^,]+}}, i16* [[LC_J:.+]]
 // ... loop body ...
 //
@@ -598,14 +598,14 @@ void widened(float *a, float *b, float *c, float *d) {
     globalfloat = (float)j/i;
     float res = b[j] * c[j];
 // Store into a[i]:
-// CHECK: store float [[RESULT:%.+]], float* [[RESULT_ADDR:%.+]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]]
+// CHECK: store float [[RESULT:%.+]], float* [[RESULT_ADDR:%.+]]
     a[i] = res * d[i];
 // Then there's a store into private var localint:
-// CHECK: store i32 {{.+}}, i32* [[LOCALINT:%[^,]+]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]]
+// CHECK: store i32 {{.+}}, i32* [[LOCALINT:%[^,]+]]
     localint = (int)j;
-// CHECK: [[IV2:%.+]] = load i64, i64* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]]
+// CHECK: [[IV2:%.+]] = load i64, i64* [[OMP_IV]]
 // CHECK-NEXT: [[ADD2:%.+]] = add nsw i64 [[IV2]], 1
-// CHECK-NEXT: store i64 [[ADD2]], i64* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]]
+// CHECK-NEXT: store i64 [[ADD2]], i64* [[OMP_IV]]
 //
 // br label %{{[^,]+}}, !llvm.loop ![[WIDE1_LOOP_ID]]
 // CHECK: [[WIDE1_END]]
@@ -636,27 +636,27 @@ void widened(float *a, float *b, float *c, float *d) {
 // CHECK: store i64 [[LB_VAL]], i64* [[T1_OMP_IV:%[^,]+]],
 
 // ...
-// CHECK: [[IV:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID:[0-9]+]]
-// CHECK-NEXT: [[UB_VAL:%.+]] = load i64, i64* [[UB]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]]
+// CHECK: [[IV:%.+]] = load i64, i64* [[T1_OMP_IV]]
+// CHECK-NEXT: [[UB_VAL:%.+]] = load i64, i64* [[UB]]
 // CHECK-NEXT: [[CMP1:%.+]] = icmp sle i64 [[IV]], [[UB_VAL]]
 // CHECK-NEXT: br i1 [[CMP1]], label %[[T1_BODY:.+]], label %[[T1_END:[^,]+]]
 // CHECK: [[T1_BODY]]
 // Loop counters i and j updates:
-// CHECK: [[IV1:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]]
+// CHECK: [[IV1:%.+]] = load i64, i64* [[T1_OMP_IV]]
 // CHECK-NEXT: [[I_1:%.+]] = sdiv i64 [[IV1]], 4
 // CHECK-NEXT: [[I_1_MUL1:%.+]] = mul nsw i64 [[I_1]], 1
 // CHECK-NEXT: [[I_1_ADD0:%.+]] = add nsw i64 0, [[I_1_MUL1]]
 // CHECK-NEXT: [[I_2:%.+]] = trunc i64 [[I_1_ADD0]] to i32
-// CHECK-NEXT: store i32 [[I_2]], i32* {{%.+}}{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]]
-// CHECK: [[IV2:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]]
+// CHECK-NEXT: store i32 [[I_2]], i32*
+// CHECK: [[IV2:%.+]] = load i64, i64* [[T1_OMP_IV]]
 // CHECK-NEXT: [[J_1:%.+]] = srem i64 [[IV2]], 4
 // CHECK-NEXT: [[J_2:%.+]] = mul nsw i64 [[J_1]], 2
 // CHECK-NEXT: [[J_2_ADD0:%.+]] = add nsw i64 0, [[J_2]]
-// CHECK-NEXT: store i64 [[J_2_ADD0]], i64* {{%.+}}{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]]
+// CHECK-NEXT: store i64 [[J_2_ADD0]], i64*
 // simd.for.inc:
-// CHECK: [[IV3:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]]
+// CHECK: [[IV3:%.+]] = load i64, i64* [[T1_OMP_IV]]
 // CHECK-NEXT: [[INC:%.+]] = add nsw i64 [[IV3]], 1
-// CHECK-NEXT: store i64 [[INC]], i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]]
+// CHECK-NEXT: store i64 [[INC]], i64*
 // CHECK-NEXT: br label {{%.+}}
 // CHECK: [[T1_END]]
 // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}})
diff --git a/test/OpenMP/parallel_for_simd_loop_messages.cpp b/test/OpenMP/parallel_for_simd_loop_messages.cpp
index f4929d9..e5fd8c0 100644
--- a/test/OpenMP/parallel_for_simd_loop_messages.cpp
+++ b/test/OpenMP/parallel_for_simd_loop_messages.cpp
@@ -628,10 +628,16 @@ void test_loop_firstprivate_lastprivate() {
 }
 
 void test_ordered() {
-// expected-error@+1 2 {{unexpected OpenMP clause 'ordered' in directive '#pragma omp parallel for simd'}}
 #pragma omp parallel for simd ordered ordered // expected-error {{directive '#pragma omp parallel for simd' cannot contain more than one 'ordered' clause}}
   for (int i = 0; i < 16; ++i)
     ;
+#pragma omp parallel for simd ordered
+  for (int i = 0; i < 16; ++i)
+    ;
+//expected-error@+1 {{'ordered' clause with a parameter can not be specified in '#pragma omp parallel for simd' directive}}
+#pragma omp parallel for simd ordered(1)
+  for (int i = 0; i < 16; ++i)
+    ;
 }
 
 void test_nowait() {
diff --git a/test/OpenMP/parallel_for_simd_messages.cpp b/test/OpenMP/parallel_for_simd_messages.cpp
index fe14883..18f25fa 100644
--- a/test/OpenMP/parallel_for_simd_messages.cpp
+++ b/test/OpenMP/parallel_for_simd_messages.cpp
@@ -79,9 +79,15 @@ L1:
 }
 
 void test_ordered() {
-// expected-error@+1 2 {{unexpected OpenMP clause 'ordered' in directive '#pragma omp parallel for simd'}}
 #pragma omp parallel for simd ordered ordered // expected-error {{directive '#pragma omp parallel for simd' cannot contain more than one 'ordered' clause}}
   for (int i = 0; i < 16; ++i)
     ;
+#pragma omp parallel for simd ordered
+  for (int i = 0; i < 16; ++i)
+    ;
+// expected-error@+1 {{'ordered' clause with a parameter can not be specified in '#pragma omp parallel for simd' directive}}
+#pragma omp parallel for simd ordered(1)
+  for (int i = 0; i < 16; ++i)
+    ;
 }
 
diff --git a/test/OpenMP/schedule_codegen.cpp b/test/OpenMP/schedule_codegen.cpp
new file mode 100644
index 0000000..bd5ec86
--- /dev/null
+++ b/test/OpenMP/schedule_codegen.cpp
@@ -0,0 +1,194 @@
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm -fexceptions -fcxx-exceptions -o - %s | FileCheck %s
+
+int main() {
+// CHECK: @__kmpc_for_static_init
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK: @__kmpc_for_static_fini
+#pragma omp for
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_for_static_init
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK: @__kmpc_for_static_fini
+#pragma omp for simd
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_for_static_init
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK: @__kmpc_for_static_fini
+#pragma omp for schedule(static)
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_for_static_init
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK: @__kmpc_for_static_fini
+#pragma omp for simd schedule(static)
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_for_static_init
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK: @__kmpc_for_static_fini
+#pragma omp for schedule(static, 2)
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_for_static_init
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK: @__kmpc_for_static_fini
+#pragma omp for simd schedule(static, 2)
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_dispatch_init
+// CHECK: !llvm.mem.parallel_loop_access
+#pragma omp for schedule(auto)
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_dispatch_init
+// CHECK: !llvm.mem.parallel_loop_access
+#pragma omp for simd schedule(auto)
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_dispatch_init
+// CHECK: !llvm.mem.parallel_loop_access
+#pragma omp for schedule(runtime)
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_dispatch_init
+// CHECK: !llvm.mem.parallel_loop_access
+#pragma omp for simd schedule(runtime)
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_dispatch_init
+// CHECK: !llvm.mem.parallel_loop_access
+#pragma omp for schedule(guided)
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_dispatch_init
+// CHECK: !llvm.mem.parallel_loop_access
+#pragma omp for simd schedule(guided)
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_dispatch_init
+// CHECK: !llvm.mem.parallel_loop_access
+#pragma omp for schedule(dynamic)
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_dispatch_init
+// CHECK: !llvm.mem.parallel_loop_access
+#pragma omp for simd schedule(dynamic)
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_for_static_init
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK: @__kmpc_for_static_fini
+#pragma omp for schedule(monotonic: static)
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_for_static_init
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK: @__kmpc_for_static_fini
+#pragma omp for simd schedule(monotonic: static)
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_for_static_init
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK: @__kmpc_for_static_fini
+#pragma omp for schedule(monotonic: static, 2)
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_for_static_init
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK: @__kmpc_for_static_fini
+#pragma omp for simd schedule(monotonic: static, 2)
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_dispatch_init
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+#pragma omp for schedule(monotonic: auto)
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_dispatch_init
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+#pragma omp for simd schedule(monotonic: auto)
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_dispatch_init
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+#pragma omp for schedule(monotonic: runtime)
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_dispatch_init
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+#pragma omp for simd schedule(monotonic: runtime)
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_dispatch_init
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+#pragma omp for schedule(monotonic: guided)
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_dispatch_init
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+#pragma omp for simd schedule(monotonic: guided)
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_dispatch_init
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+#pragma omp for schedule(monotonic: dynamic)
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_dispatch_init
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+#pragma omp for simd schedule(monotonic: dynamic)
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_dispatch_init
+// CHECK: !llvm.mem.parallel_loop_access
+#pragma omp for schedule(nonmonotonic: guided)
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_dispatch_init
+// CHECK: !llvm.mem.parallel_loop_access
+#pragma omp for simd schedule(nonmonotonic: guided)
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_dispatch_init
+// CHECK: !llvm.mem.parallel_loop_access
+#pragma omp for schedule(nonmonotonic: dynamic)
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_dispatch_init
+// CHECK: !llvm.mem.parallel_loop_access
+#pragma omp for simd schedule(nonmonotonic: dynamic)
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_dispatch_init
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK: @__kmpc_dispatch_next
+#pragma omp for schedule(static) ordered
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_dispatch_init
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK: @__kmpc_dispatch_next
+#pragma omp for simd schedule(static) ordered
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_dispatch_init
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK: @__kmpc_dispatch_next
+#pragma omp for schedule(static, 2) ordered(1)
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_dispatch_init
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK: @__kmpc_dispatch_next
+#pragma omp for simd schedule(static, 2) ordered
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_dispatch_init
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK: @__kmpc_dispatch_next
+#pragma omp for schedule(auto) ordered(1)
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_dispatch_init
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+#pragma omp for simd schedule(auto) ordered
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_dispatch_init
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK: @__kmpc_dispatch_next
+#pragma omp for schedule(runtime) ordered
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_dispatch_init
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK: @__kmpc_dispatch_next
+#pragma omp for simd schedule(runtime) ordered
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_dispatch_init
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK: @__kmpc_dispatch_next
+#pragma omp for schedule(guided) ordered(1)
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_dispatch_init
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK: @__kmpc_dispatch_next
+#pragma omp for simd schedule(guided) ordered
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_dispatch_init
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK: @__kmpc_dispatch_next
+#pragma omp for schedule(dynamic) ordered(1)
+  for(int i = 0; i < 10; ++i);
+// CHECK: @__kmpc_dispatch_init
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK: @__kmpc_dispatch_next
+#pragma omp for simd schedule(dynamic)
+  for(int i = 0; i < 10; ++i);
+  return 0;
+}
diff --git a/test/OpenMP/target_codegen.cpp b/test/OpenMP/target_codegen.cpp
index bcefa24..c2e08d6 100644
--- a/test/OpenMP/target_codegen.cpp
+++ b/test/OpenMP/target_codegen.cpp
@@ -1,15 +1,32 @@
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
-// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
-// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -omp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -omp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -omp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -omp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -omp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -omp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
 
 // CHECK-DAG: [[TT:%.+]] = type { i64, i8 }
 // CHECK-DAG: [[S1:%.+]] = type { double }
+// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]] }
+// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* }
+// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* }
+
+// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}} }
 
 // We have 8 target regions, but only 7 that actually will generate offloading
 // code, only 6 will have mapped arguments, and only 4 have all-constant map
@@ -33,6 +50,27 @@
 // CHECK-DAG: @{{.*}} = private constant i8 0
 // CHECK-DAG: @{{.*}} = private constant i8 0
 
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK-NOT: @{{.+}} = constant [[ENTTY]]
+
+// Check if offloading descriptor is created.
+// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]]
+// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]]
+// CHECK: [[DEVBEGIN:@.+]] = external constant i8
+// CHECK: [[DEVEND:@.+]] = external constant i8
+// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }]
+// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }
+
+// Check target registration is registered as a Ctor.
+// CHECK: appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* [[REGFN:@.+]] to void ()*), i8* null }]
+
+
 template<typename tx, typename ty>
 struct TT{
   tx X;
diff --git a/test/OpenMP/target_codegen_global_capture.cpp b/test/OpenMP/target_codegen_global_capture.cpp
index 29469cd..211a3cc 100644
--- a/test/OpenMP/target_codegen_global_capture.cpp
+++ b/test/OpenMP/target_codegen_global_capture.cpp
@@ -1,9 +1,9 @@
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
-// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
-// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
diff --git a/test/OpenMP/target_codegen_registration.cpp b/test/OpenMP/target_codegen_registration.cpp
new file mode 100644
index 0000000..0c9bba6
--- /dev/null
+++ b/test/OpenMP/target_codegen_registration.cpp
@@ -0,0 +1,437 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -omp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -omp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -omp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -omp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -omp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -omp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+
+// Check that no target code is emmitted if no omptests flag was provided.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK-NTARGET
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK-DAG: [[SA:%.+]] = type { [4 x i32] }
+// CHECK-DAG: [[SB:%.+]] = type { [8 x i32] }
+// CHECK-DAG: [[SC:%.+]] = type { [16 x i32] }
+// CHECK-DAG: [[SD:%.+]] = type { [32 x i32] }
+// CHECK-DAG: [[SE:%.+]] = type { [64 x i32] }
+// CHECK-DAG: [[ST1:%.+]] = type { [228 x i32] }
+// CHECK-DAG: [[ST2:%.+]] = type { [1128 x i32] }
+// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]] }
+// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* }
+// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* }
+
+// TCHECK:    [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]] }
+
+// CHECK-DAG: [[A1:@.+]] = internal global [[SA]]
+// CHECK-DAG: [[A2:@.+]] = global [[SA]]
+// CHECK-DAG: [[B1:@.+]] = global [[SB]]
+// CHECK-DAG: [[B2:@.+]] = global [[SB]]
+// CHECK-DAG: [[C1:@.+]] = internal global [[SC]]
+// CHECK-DAG: [[D1:@.+]] = global [[SD]]
+// CHECK-DAG: [[E1:@.+]] = global [[SE]]
+// CHECK-DAG: [[T1:@.+]] = global [[ST1]]
+// CHECK-DAG: [[T2:@.+]] = global [[ST2]]
+
+// CHECK-NTARGET-DAG: [[SA:%.+]] = type { [4 x i32] }
+// CHECK-NTARGET-DAG: [[SB:%.+]] = type { [8 x i32] }
+// CHECK-NTARGET-DAG: [[SC:%.+]] = type { [16 x i32] }
+// CHECK-NTARGET-DAG: [[SD:%.+]] = type { [32 x i32] }
+// CHECK-NTARGET-DAG: [[SE:%.+]] = type { [64 x i32] }
+// CHECK-NTARGET-DAG: [[ST1:%.+]] = type { [228 x i32] }
+// CHECK-NTARGET-DAG: [[ST2:%.+]] = type { [1128 x i32] }
+// CHECK-NTARGET-NOT: type { i8*,
+// CHECK-NTARGET-NOT: type { i32,
+
+// We have 7 target regions
+
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// TCHECK-NOT: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 128]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 128]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 128]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 128]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 128]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 128]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 128]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 128]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 128]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 128]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 128]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 128]
+
+// CHECK-NTARGET-NOT: private constant i8 0
+// CHECK-NTARGET-NOT: private unnamed_addr constant [1 x i
+
+// CHECK-DAG: [[NAMEPTR1:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME1:\.omp_offloading\.[0-9a-f]+\.[0-9a-f]+\._Z.+\.l[0-9]+\.c[0-9]+]]\00"
+// CHECK-DAG: [[ENTRY1:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR2:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME2:.+]]\00"
+// CHECK-DAG: [[ENTRY2:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR3:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME3:.+]]\00"
+// CHECK-DAG: [[ENTRY3:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR4:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME4:.+]]\00"
+// CHECK-DAG: [[ENTRY4:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR5:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME5:.+]]\00"
+// CHECK-DAG: [[ENTRY5:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR6:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME6:.+]]\00"
+// CHECK-DAG: [[ENTRY6:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR7:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME7:.+]]\00"
+// CHECK-DAG: [[ENTRY7:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR8:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME8:.+]]\00"
+// CHECK-DAG: [[ENTRY8:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR9:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME9:.+]]\00"
+// CHECK-DAG: [[ENTRY9:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR10:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME10:.+]]\00"
+// CHECK-DAG: [[ENTRY10:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR11:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME11:.+]]\00"
+// CHECK-DAG: [[ENTRY11:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR12:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME12:.+]]\00"
+// CHECK-DAG: [[ENTRY12:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+
+// TCHECK-DAG: [[NAMEPTR1:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME1:\.omp_offloading\.[0-9a-f]+\.[0-9a-f]+\._Z.+\.l[0-9]+\.c[0-9]+]]\00"
+// TCHECK-DAG: [[ENTRY1:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR2:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME2:.+]]\00"
+// TCHECK-DAG: [[ENTRY2:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR3:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME3:.+]]\00"
+// TCHECK-DAG: [[ENTRY3:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR4:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME4:.+]]\00"
+// TCHECK-DAG: [[ENTRY4:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR5:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME5:.+]]\00"
+// TCHECK-DAG: [[ENTRY5:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR6:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME6:.+]]\00"
+// TCHECK-DAG: [[ENTRY6:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR7:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME7:.+]]\00"
+// TCHECK-DAG: [[ENTRY7:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR8:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME8:.+]]\00"
+// TCHECK-DAG: [[ENTRY8:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR9:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME9:.+]]\00"
+// TCHECK-DAG: [[ENTRY9:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR10:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME10:.+]]\00"
+// TCHECK-DAG: [[ENTRY10:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR11:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME11:.+]]\00"
+// TCHECK-DAG: [[ENTRY11:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR12:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME12:.+]]\00"
+// TCHECK-DAG: [[ENTRY12:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1
+
+// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]]
+// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]]
+// CHECK: [[DEVBEGIN:@.+]] = external constant i8
+// CHECK: [[DEVEND:@.+]] = external constant i8
+// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }]
+// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }
+
+// We have 4 initializers, one for the 500 priority, another one for 501, or more for the default priority, and the last one for the offloading registration function.
+// CHECK: @llvm.global_ctors = appending global [4 x { i32, void ()*, i8* }] [
+// CHECK-SAME: { i32, void ()*, i8* } { i32 500, void ()* [[P500:@[^,]+]], i8* null },
+// CHECK-SAME: { i32, void ()*, i8* } { i32 501, void ()* [[P501:@[^,]+]], i8* null },
+// CHECK-SAME: { i32, void ()*, i8* } { i32 65535, void ()* [[PMAX:@[^,]+]], i8* null },
+// CHECK-SAME: { i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* [[REGFN:@.+]] to void ()*), i8* null }]
+
+// CHECK-NTARGET: @llvm.global_ctors = appending global [3   x { i32, void ()*, i8* }] [
+
+extern int *R;
+
+struct SA {
+  int arr[4];
+  void foo() {
+    int a = *R;
+    a += 1;
+    *R = a;
+  }
+  SA() {
+    int a = *R;
+    a += 2;
+    *R = a;
+  }
+  ~SA() {
+    int a = *R;
+    a += 3;
+    *R = a;
+  }
+};
+
+struct SB {
+  int arr[8];
+  void foo() {
+    int a = *R;
+    #pragma omp target
+    a += 4;
+    *R = a;
+  }
+  SB() {
+    int a = *R;
+    a += 5;
+    *R = a;
+  }
+  ~SB() {
+    int a = *R;
+    a += 6;
+    *R = a;
+  }
+};
+
+struct SC {
+  int arr[16];
+  void foo() {
+    int a = *R;
+    a += 7;
+    *R = a;
+  }
+  SC() {
+    int a = *R;
+    #pragma omp target
+    a += 8;
+    *R = a;
+  }
+  ~SC() {
+    int a = *R;
+    a += 9;
+    *R = a;
+  }
+};
+
+struct SD {
+  int arr[32];
+  void foo() {
+    int a = *R;
+    a += 10;
+    *R = a;
+  }
+  SD() {
+    int a = *R;
+    a += 11;
+    *R = a;
+  }
+  ~SD() {
+    int a = *R;
+    #pragma omp target
+    a += 12;
+    *R = a;
+  }
+};
+
+struct SE {
+  int arr[64];
+  void foo() {
+    int a = *R;
+    #pragma omp target if(0)
+    a += 13;
+    *R = a;
+  }
+  SE() {
+    int a = *R;
+    #pragma omp target
+    a += 14;
+    *R = a;
+  }
+  ~SE() {
+    int a = *R;
+    #pragma omp target
+    a += 15;
+    *R = a;
+  }
+};
+
+template <int x>
+struct ST {
+  int arr[128 + x];
+  void foo() {
+    int a = *R;
+    #pragma omp target
+    a += 16 + x;
+    *R = a;
+  }
+  ST() {
+    int a = *R;
+    #pragma omp target
+    a += 17 + x;
+    *R = a;
+  }
+  ~ST() {
+    int a = *R;
+    #pragma omp target
+    a += 18 + x;
+    *R = a;
+  }
+};
+
+// We have to make sure we us all the target regions:
+//CHECK-DAG: define internal void @[[NAME1]](
+//CHECK-DAG: call void @[[NAME1]](
+//CHECK-DAG: define internal void @[[NAME2]](
+//CHECK-DAG: call void @[[NAME2]](
+//CHECK-DAG: define internal void @[[NAME3]](
+//CHECK-DAG: call void @[[NAME3]](
+//CHECK-DAG: define internal void @[[NAME4]](
+//CHECK-DAG: call void @[[NAME4]](
+//CHECK-DAG: define internal void @[[NAME5]](
+//CHECK-DAG: call void @[[NAME5]](
+//CHECK-DAG: define internal void @[[NAME6]](
+//CHECK-DAG: call void @[[NAME6]](
+//CHECK-DAG: define internal void @[[NAME7]](
+//CHECK-DAG: call void @[[NAME7]](
+//CHECK-DAG: define internal void @[[NAME8]](
+//CHECK-DAG: call void @[[NAME8]](
+//CHECK-DAG: define internal void @[[NAME9]](
+//CHECK-DAG: call void @[[NAME9]](
+//CHECK-DAG: define internal void @[[NAME10]](
+//CHECK-DAG: call void @[[NAME10]](
+//CHECK-DAG: define internal void @[[NAME11]](
+//CHECK-DAG: call void @[[NAME11]](
+//CHECK-DAG: define internal void @[[NAME12]](
+//CHECK-DAG: call void @[[NAME12]](
+
+//TCHECK-DAG: define void @[[NAME1]](
+//TCHECK-DAG: define void @[[NAME2]](
+//TCHECK-DAG: define void @[[NAME3]](
+//TCHECK-DAG: define void @[[NAME4]](
+//TCHECK-DAG: define void @[[NAME5]](
+//TCHECK-DAG: define void @[[NAME6]](
+//TCHECK-DAG: define void @[[NAME7]](
+//TCHECK-DAG: define void @[[NAME8]](
+//TCHECK-DAG: define void @[[NAME9]](
+//TCHECK-DAG: define void @[[NAME10]](
+//TCHECK-DAG: define void @[[NAME11]](
+//TCHECK-DAG: define void @[[NAME12]](
+
+// CHECK-NTARGET-NOT: __tgt_target
+// CHECK-NTARGET-NOT: __tgt_register_lib
+// CHECK-NTARGET-NOT: __tgt_unregister_lib
+
+// TCHECK-NOT: __tgt_target
+// TCHECK-NOT: __tgt_register_lib
+// TCHECK-NOT: __tgt_unregister_lib
+
+// We have 2 initializers with priority 500
+//CHECK: define internal void [[P500]](
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK-NOT: call void @{{.+}}()
+//CHECK:     ret void
+
+// We have 1 initializers with priority 501
+//CHECK: define internal void [[P501]](
+//CHECK:     call void @{{.+}}()
+//CHECK-NOT: call void @{{.+}}()
+//CHECK:     ret void
+
+// We have 6 initializers with default priority
+//CHECK: define internal void [[PMAX]](
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK:     call void @{{.+}}()
+//CHECK-NOT: call void @{{.+}}()
+//CHECK:     ret void
+
+// Check registration and unregistration
+
+//CHECK:     define internal void [[UNREGFN:@.+]](i8*)
+//CHECK:     call i32 @__tgt_unregister_lib([[DSCTY]]* [[DESC]])
+//CHECK:     ret void
+//CHECK:     declare i32 @__tgt_unregister_lib([[DSCTY]]*)
+
+//CHECK:     define internal void [[REGFN]](i8*)
+//CHECK:     call i32 @__tgt_register_lib([[DSCTY]]* [[DESC]])
+//CHECK:     call i32 @__cxa_atexit(void (i8*)* [[UNREGFN]], i8* bitcast ([[DSCTY]]* [[DESC]] to i8*),
+//CHECK:     ret void
+//CHECK:     declare i32 @__tgt_register_lib([[DSCTY]]*)
+
+static __attribute__((init_priority(500))) SA a1;
+SA a2;
+SB __attribute__((init_priority(500))) b1;
+SB __attribute__((init_priority(501))) b2;
+static SC c1;
+SD d1;
+SE e1;
+ST<100> t1;
+ST<1000> t2;
+
+
+int bar(int a){
+  int r = a;
+
+  a1.foo();
+  a2.foo();
+  b1.foo();
+  b2.foo();
+  c1.foo();
+  d1.foo();
+  e1.foo();
+  t1.foo();
+  t2.foo();
+
+  #pragma omp target
+  ++r;
+
+  return r + *R;
+}
+
+// Check metadata is properly generated:
+// CHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}}
+// CHECK-DAG = !{i32 0, i32 [[DEVID:[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 160, i32 13, i32 {{[0-9]}}+}
+// CHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2SDD2Ev", i32 210, i32 13, i32 {{[0-9]}}+}
+// CHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2SEC2Ev", i32 226, i32 13, i32 {{[0-9]}}+}
+// CHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2SED2Ev", i32 232, i32 13, i32 {{[0-9]}}+}
+// CHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2STILi1000EE3fooEv", i32 243, i32 13, i32 {{[0-9]}}+}
+// CHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2STILi100EEC2Ev", i32 249, i32 13, i32 {{[0-9]}}+}
+// CHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_Z3bari", i32 352, i32 11, i32 {{[0-9]}}+}
+// CHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2STILi100EED2Ev", i32 255, i32 13, i32 {{[0-9]}}+}
+// CHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2STILi1000EEC2Ev", i32 249, i32 13, i32 {{[0-9]}}+}
+// CHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2STILi1000EED2Ev", i32 255, i32 13, i32 {{[0-9]}}+}
+// CHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2STILi100EE3fooEv", i32 243, i32 13, i32 {{[0-9]}}+}
+// CHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2SCC2Ev", i32 185, i32 13, i32 {{[0-9]}}+}
+
+// TCHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}}
+// TCHECK-DAG = !{i32 0, i32 [[DEVID:[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 160, i32 13, i32 {{[0-9]}}+}
+// TCHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2SDD2Ev", i32 210, i32 13, i32 {{[0-9]}}+}
+// TCHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2SEC2Ev", i32 226, i32 13, i32 {{[0-9]}}+}
+// TCHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2SED2Ev", i32 232, i32 13, i32 {{[0-9]}}+}
+// TCHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2STILi1000EE3fooEv", i32 243, i32 13, i32 {{[0-9]}}+}
+// TCHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2STILi100EEC2Ev", i32 249, i32 13, i32 {{[0-9]}}+}
+// TCHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_Z3bari", i32 352, i32 11, i32 {{[0-9]}}+}
+// TCHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2STILi100EED2Ev", i32 255, i32 13, i32 {{[0-9]}}+}
+// TCHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2STILi1000EEC2Ev", i32 249, i32 13, i32 {{[0-9]}}+}
+// TCHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2STILi1000EED2Ev", i32 255, i32 13, i32 {{[0-9]}}+}
+// TCHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2STILi100EE3fooEv", i32 243, i32 13, i32 {{[0-9]}}+}
+// TCHECK-DAG = !{i32 0, i32 [[DEVID]], i32 [[FILEID]] !"_ZN2SCC2Ev", i32 185, i32 13, i32 {{[0-9]}}+}
+
+#endif
diff --git a/test/OpenMP/target_codegen_registration_naming.cpp b/test/OpenMP/target_codegen_registration_naming.cpp
new file mode 100644
index 0000000..6ab9bf1
--- /dev/null
+++ b/test/OpenMP/target_codegen_registration_naming.cpp
@@ -0,0 +1,66 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -omp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -omp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -fopenmp-is-device -omp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -omp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -omp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -fopenmp-is-device -omp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: [[CA:%.+]] = type { i32* }
+
+// CHECK: define {{.*}}i32 @[[NNAME:.+]](i32 {{.*}}%{{.+}})
+int nested(int a){
+  // CHECK: call void @.omp_offloading.[[FILEID:[0-9a-f]+\.[0-9a-f]+]].[[NNAME]].l[[T1L:[0-9]+]].c[[T1C:[0-9]+]](
+  #pragma omp target
+    ++a;
+
+  // CHECK: call void @"[[LNAME:.+]]"([[CA]]*
+  auto F = [&](){
+    #pragma omp parallel
+    {
+      #pragma omp target
+      ++a;
+    }
+  };
+
+  F();
+
+  return a;
+}
+
+// CHECK: define {{.*}}void @.omp_offloading.[[FILEID]].[[NNAME]].l[[T1L]].c[[T1C]](
+// TCHECK: define {{.*}}void @.omp_offloading.[[FILEID:[0-9a-f]+\.[0-9a-f]+]].[[NNAME:.+]].l[[T1L:[0-9]+]].c[[T1C:[0-9]+]](
+
+// CHECK: define {{.*}}void @"[[LNAME]]"(
+// CHECK: call void {{.*}}@__kmpc_fork_call{{.+}}[[PNAME:@.+]] to
+
+// CHECK: define {{.*}}void [[PNAME]](
+// CHECK: call void @.omp_offloading.[[FILEID]].[[NNAME]].l[[T2L:[0-9]+]].c[[T2C:[0-9]+]](
+
+// CHECK: define {{.*}}void @.omp_offloading.[[FILEID]].[[NNAME]].l[[T2L]].c[[T2C]](
+// TCHECK: define {{.*}}void @.omp_offloading.[[FILEID]].[[NNAME:.+]].l[[T2L:[0-9]+]].c[[T2C:[0-9]+]](
+
+
+// Check metadata is properly generated:
+// CHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 {{[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 [[T1C]], i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 {{[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 [[T2C]], i32 {{[0-9]+}}}
+
+// TCHECK:     !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 {{[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 [[T1C]], i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 {{[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 [[T2C]], i32 {{[0-9]+}}}
+#endif
diff --git a/test/OpenMP/target_map_codegen.cpp b/test/OpenMP/target_map_codegen.cpp
index 2b24c82..942cc4c 100644
--- a/test/OpenMP/target_map_codegen.cpp
+++ b/test/OpenMP/target_map_codegen.cpp
@@ -7,12 +7,12 @@
 ///
 
 ///==========================================================================///
-// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
-// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK1 --check-prefix CK1-64
-// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK1 --check-prefix CK1-32
-// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK1 --check-prefix CK1-32
 #ifdef CK1
 
 // CK1-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
@@ -52,12 +52,12 @@ void implicit_maps_integer (int a){
 
 #endif
 ///==========================================================================///
-// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
-// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK2 --check-prefix CK2-64
-// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK2 --check-prefix CK2-32
-// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK2 --check-prefix CK2-32
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK2 --check-prefix CK2-32
+// RUN: %clang_cc1 -DCK2 -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK2 --check-prefix CK2-32
 #ifdef CK2
 
 // CK2-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
@@ -101,12 +101,12 @@ void implicit_maps_integer_reference (int a){
 
 #endif
 ///==========================================================================///
-// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64
-// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK3 --check-prefix CK3-64
-// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK3 --check-prefix CK3-32
-// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK3 --check-prefix CK3-32
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64
+// RUN: %clang_cc1 -DCK3 -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK3 --check-prefix CK3-64
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK3 --check-prefix CK3-32
+// RUN: %clang_cc1 -DCK3 -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK3 --check-prefix CK3-32
 #ifdef CK3
 
 // CK3-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
@@ -145,12 +145,12 @@ void implicit_maps_parameter (int a){
 
 #endif
 ///==========================================================================///
-// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-64
-// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK4 --check-prefix CK4-64
-// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK4 --check-prefix CK4-32
-// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK4 --check-prefix CK4-32
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-64
+// RUN: %clang_cc1 -DCK4 -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK4 --check-prefix CK4-64
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK4 --check-prefix CK4-32
+// RUN: %clang_cc1 -DCK4 -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK4 --check-prefix CK4-32
 #ifdef CK4
 
 // CK4-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
@@ -201,12 +201,12 @@ void implicit_maps_nested_integer (int a){
 // CK4: define internal void [[KERNELP2]](i32* {{[^,]+}}, i32* {{[^,]+}}, i32* {{[^,]+}})
 #endif
 ///==========================================================================///
-// RUN: %clang_cc1 -DCK5 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK5 --check-prefix CK5-64
-// RUN: %clang_cc1 -DCK5 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK5 --check-prefix CK5-64
-// RUN: %clang_cc1 -DCK5 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK5 --check-prefix CK5-32
-// RUN: %clang_cc1 -DCK5 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK5 --check-prefix CK5-32
+// RUN: %clang_cc1 -DCK5 -verify -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK5 --check-prefix CK5-64
+// RUN: %clang_cc1 -DCK5 -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK5 --check-prefix CK5-64
+// RUN: %clang_cc1 -DCK5 -verify -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK5 --check-prefix CK5-32
+// RUN: %clang_cc1 -DCK5 -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK5 --check-prefix CK5-32
 #ifdef CK5
 
 // CK5-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
@@ -252,12 +252,12 @@ void implicit_maps_nested_integer_and_enum (int a){
 
 #endif
 ///==========================================================================///
-// RUN: %clang_cc1 -DCK6 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK6 --check-prefix CK6-64
-// RUN: %clang_cc1 -DCK6 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK6 --check-prefix CK6-64
-// RUN: %clang_cc1 -DCK6 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK6 --check-prefix CK6-32
-// RUN: %clang_cc1 -DCK6 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK6 --check-prefix CK6-32
+// RUN: %clang_cc1 -DCK6 -verify -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK6 --check-prefix CK6-64
+// RUN: %clang_cc1 -DCK6 -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK6 --check-prefix CK6-64
+// RUN: %clang_cc1 -DCK6 -verify -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK6 --check-prefix CK6-32
+// RUN: %clang_cc1 -DCK6 -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK6 --check-prefix CK6-32
 #ifdef CK6
 // CK6-DAG: [[GBL:@Gi]] = global i32 0
 // CK6-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
@@ -298,12 +298,12 @@ void implicit_maps_host_global (int a){
 
 #endif
 ///==========================================================================///
-// RUN: %clang_cc1 -DCK7 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK7 --check-prefix CK7-64
-// RUN: %clang_cc1 -DCK7 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK7  --check-prefix CK7-64
-// RUN: %clang_cc1 -DCK7 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK7  --check-prefix CK7-32
-// RUN: %clang_cc1 -DCK7 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK7  --check-prefix CK7-32
+// RUN: %clang_cc1 -DCK7 -verify -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK7 --check-prefix CK7-64
+// RUN: %clang_cc1 -DCK7 -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK7  --check-prefix CK7-64
+// RUN: %clang_cc1 -DCK7 -verify -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK7  --check-prefix CK7-32
+// RUN: %clang_cc1 -DCK7 -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK7  --check-prefix CK7-32
 #ifdef CK7
 
 // For a 32-bit targets, the value doesn't fit the size of the pointer,
@@ -360,12 +360,12 @@ void implicit_maps_double (int a){
 
 #endif
 ///==========================================================================///
-// RUN: %clang_cc1 -DCK8 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK8
-// RUN: %clang_cc1 -DCK8 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK8
-// RUN: %clang_cc1 -DCK8 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK8
-// RUN: %clang_cc1 -DCK8 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK8
+// RUN: %clang_cc1 -DCK8 -verify -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK8
+// RUN: %clang_cc1 -DCK8 -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK8
+// RUN: %clang_cc1 -DCK8 -verify -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK8
+// RUN: %clang_cc1 -DCK8 -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK8
 #ifdef CK8
 
 // CK8-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
@@ -404,12 +404,12 @@ void implicit_maps_float (int a){
 
 #endif
 ///==========================================================================///
-// RUN: %clang_cc1 -DCK9 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK9
-// RUN: %clang_cc1 -DCK9 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK9
-// RUN: %clang_cc1 -DCK9 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK9
-// RUN: %clang_cc1 -DCK9 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK9
+// RUN: %clang_cc1 -DCK9 -verify -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK9
+// RUN: %clang_cc1 -DCK9 -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK9
+// RUN: %clang_cc1 -DCK9 -verify -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK9
+// RUN: %clang_cc1 -DCK9 -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK9
 #ifdef CK9
 
 // CK9-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 16]
@@ -445,12 +445,12 @@ void implicit_maps_array (int a){
 // CK9: {{.+}} = getelementptr inbounds [2 x double], [2 x double]* [[REF]], i[[sz]] 0, i[[sz]] 0
 #endif
 ///==========================================================================///
-// RUN: %clang_cc1 -DCK10 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK10
-// RUN: %clang_cc1 -DCK10 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK10
-// RUN: %clang_cc1 -DCK10 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK10
-// RUN: %clang_cc1 -DCK10 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK10
+// RUN: %clang_cc1 -DCK10 -verify -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK10
+// RUN: %clang_cc1 -DCK10 -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK10
+// RUN: %clang_cc1 -DCK10 -verify -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK10
+// RUN: %clang_cc1 -DCK10 -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK10
 #ifdef CK10
 
 // CK10-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} {{8|4}}]
@@ -487,12 +487,12 @@ void implicit_maps_pointer (){
 
 #endif
 ///==========================================================================///
-// RUN: %clang_cc1 -DCK11 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK11
-// RUN: %clang_cc1 -DCK11 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK11
-// RUN: %clang_cc1 -DCK11 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK11
-// RUN: %clang_cc1 -DCK11 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK11
+// RUN: %clang_cc1 -DCK11 -verify -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK11
+// RUN: %clang_cc1 -DCK11 -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK11
+// RUN: %clang_cc1 -DCK11 -verify -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK11
+// RUN: %clang_cc1 -DCK11 -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK11
 #ifdef CK11
 
 // CK11-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 16]
@@ -527,12 +527,12 @@ void implicit_maps_double_complex (int a){
 // CK11: {{.+}} = getelementptr inbounds { double, double }, { double, double }* [[REF]], i32 0, i32 0
 #endif
 ///==========================================================================///
-// RUN: %clang_cc1 -DCK12 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK12 --check-prefix CK12-64
-// RUN: %clang_cc1 -DCK12 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK12 --check-prefix CK12-64
-// RUN: %clang_cc1 -DCK12 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK12 --check-prefix CK12-32
-// RUN: %clang_cc1 -DCK12 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK12 --check-prefix CK12-32
+// RUN: %clang_cc1 -DCK12 -verify -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK12 --check-prefix CK12-64
+// RUN: %clang_cc1 -DCK12 -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK12 --check-prefix CK12-64
+// RUN: %clang_cc1 -DCK12 -verify -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK12 --check-prefix CK12-32
+// RUN: %clang_cc1 -DCK12 -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK12 --check-prefix CK12-32
 #ifdef CK12
 
 // For a 32-bit targets, the value doesn't fit the size of the pointer,
@@ -588,12 +588,12 @@ void implicit_maps_float_complex (int a){
 // CK12-32: {{.+}} = getelementptr inbounds { float, float }, { float, float }* [[REF]], i32 0, i32 0
 #endif
 ///==========================================================================///
-// RUN: %clang_cc1 -DCK13 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK13
-// RUN: %clang_cc1 -DCK13 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK13
-// RUN: %clang_cc1 -DCK13 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK13
-// RUN: %clang_cc1 -DCK13 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK13
+// RUN: %clang_cc1 -DCK13 -verify -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK13
+// RUN: %clang_cc1 -DCK13 -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK13
+// RUN: %clang_cc1 -DCK13 -verify -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK13
+// RUN: %clang_cc1 -DCK13 -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK13
 #ifdef CK13
 
 // We don't have a constant map size for VLAs.
@@ -658,12 +658,12 @@ void implicit_maps_variable_length_array (int a){
 // CK13: {{.+}} = getelementptr inbounds double, double* [[REF]], i[[sz]] %{{.+}}
 #endif
 ///==========================================================================///
-// RUN: %clang_cc1 -DCK14 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK14 --check-prefix CK14-64
-// RUN: %clang_cc1 -DCK14 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK14 --check-prefix CK14-64
-// RUN: %clang_cc1 -DCK14 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK14 --check-prefix CK14-32
-// RUN: %clang_cc1 -DCK14 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK14 --check-prefix CK14-32
+// RUN: %clang_cc1 -DCK14 -verify -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK14 --check-prefix CK14-64
+// RUN: %clang_cc1 -DCK14 -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK14 --check-prefix CK14-64
+// RUN: %clang_cc1 -DCK14 -verify -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK14 --check-prefix CK14-32
+// RUN: %clang_cc1 -DCK14 -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK14 --check-prefix CK14-32
 #ifdef CK14
 
 // CK14-DAG: [[ST:%.+]] = type { i32, double }
@@ -732,12 +732,12 @@ void implicit_maps_class (int a){
 
 #endif
 ///==========================================================================///
-// RUN: %clang_cc1 -DCK15 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK15 --check-prefix CK15-64
-// RUN: %clang_cc1 -DCK15 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK15 --check-prefix CK15-64
-// RUN: %clang_cc1 -DCK15 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK15 --check-prefix CK15-32
-// RUN: %clang_cc1 -DCK15 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK15 --check-prefix CK15-32
+// RUN: %clang_cc1 -DCK15 -verify -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK15 --check-prefix CK15-64
+// RUN: %clang_cc1 -DCK15 -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK15 --check-prefix CK15-64
+// RUN: %clang_cc1 -DCK15 -verify -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK15 --check-prefix CK15-32
+// RUN: %clang_cc1 -DCK15 -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK15 --check-prefix CK15-32
 #ifdef CK15
 
 // CK15: [[ST:%.+]] = type { i32, double, i32* }
@@ -860,12 +860,12 @@ void implicit_maps_templated_class (int a){
 
 #endif
 ///==========================================================================///
-// RUN: %clang_cc1 -DCK16 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK16 --check-prefix CK16-64
-// RUN: %clang_cc1 -DCK16 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK16 --check-prefix CK16-64
-// RUN: %clang_cc1 -DCK16 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK16 --check-prefix CK16-32
-// RUN: %clang_cc1 -DCK16 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK16 --check-prefix CK16-32
+// RUN: %clang_cc1 -DCK16 -verify -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK16 --check-prefix CK16-64
+// RUN: %clang_cc1 -DCK16 -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK16 --check-prefix CK16-64
+// RUN: %clang_cc1 -DCK16 -verify -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK16 --check-prefix CK16-32
+// RUN: %clang_cc1 -DCK16 -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK16 --check-prefix CK16-32
 #ifdef CK16
 
 // CK16-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
@@ -913,12 +913,12 @@ void implicit_maps_templated_function (int a){
 
 #endif
 ///==========================================================================///
-// RUN: %clang_cc1 -DCK17 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK17
-// RUN: %clang_cc1 -DCK17 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK17
-// RUN: %clang_cc1 -DCK17 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK17
-// RUN: %clang_cc1 -DCK17 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK17
+// RUN: %clang_cc1 -DCK17 -verify -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK17
+// RUN: %clang_cc1 -DCK17 -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK17
+// RUN: %clang_cc1 -DCK17 -verify -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK17
+// RUN: %clang_cc1 -DCK17 -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK17
 #ifdef CK17
 
 // CK17-DAG: [[ST:%.+]] = type { i32, double }
@@ -961,12 +961,12 @@ void implicit_maps_struct (int a){
 // CK17: {{.+}} = getelementptr inbounds [[ST]], [[ST]]* [[REF]], i32 0, i32 0
 #endif
 ///==========================================================================///
-// RUN: %clang_cc1 -DCK18 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK18 --check-prefix CK18-64
-// RUN: %clang_cc1 -DCK18 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK18 --check-prefix CK18-64
-// RUN: %clang_cc1 -DCK18 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK18 --check-prefix CK18-32
-// RUN: %clang_cc1 -DCK18 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK18 --check-prefix CK18-32
+// RUN: %clang_cc1 -DCK18 -verify -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK18 --check-prefix CK18-64
+// RUN: %clang_cc1 -DCK18 -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -omptargets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK18 --check-prefix CK18-64
+// RUN: %clang_cc1 -DCK18 -verify -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK18 --check-prefix CK18-32
+// RUN: %clang_cc1 -DCK18 -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -omptargets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK18 --check-prefix CK18-32
 #ifdef CK18
 
 // CK18-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
diff --git a/test/OpenMP/target_messages.cpp b/test/OpenMP/target_messages.cpp
index ebac51a..86a9183 100644
--- a/test/OpenMP/target_messages.cpp
+++ b/test/OpenMP/target_messages.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -o - %s
+// RUN: not %clang_cc1 -fopenmp -std=c++11 -omptargets=aaa-bbb-ccc-ddd -o - %s 2>&1 | FileCheck %s
+// CHECK: error: OpenMP target is invalid: 'aaa-bbb-ccc-ddd'
 
 void foo() {
 }
diff --git a/test/Preprocessor/aarch64-target-features.c b/test/Preprocessor/aarch64-target-features.c
index 2630e32..dbc29cf 100644
--- a/test/Preprocessor/aarch64-target-features.c
+++ b/test/Preprocessor/aarch64-target-features.c
@@ -92,11 +92,13 @@
 // RUN: %clang -target aarch64 -mcpu=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-A53 %s
 // RUN: %clang -target aarch64 -mcpu=cortex-a57 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-A57 %s
 // RUN: %clang -target aarch64 -mcpu=cortex-a72 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-A72 %s
+// RUN: %clang -target aarch64 -mcpu=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-M1 %s
 // CHECK-MCPU-CYCLONE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+neon" "-target-feature" "+crc" "-target-feature" "+crypto" "-target-feature" "+zcm" "-target-feature" "+zcz"
 // CHECK-MCPU-A35: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+neon" "-target-feature" "+crc" "-target-feature" "+crypto"
 // CHECK-MCPU-A53: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+neon" "-target-feature" "+crc" "-target-feature" "+crypto"
 // CHECK-MCPU-A57: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+neon" "-target-feature" "+crc" "-target-feature" "+crypto"
 // CHECK-MCPU-A72: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+neon" "-target-feature" "+crc" "-target-feature" "+crypto"
+// CHECK-MCPU-M1: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+neon" "-target-feature" "+crc" "-target-feature" "+crypto"
 
 // RUN: %clang -target x86_64-apple-macosx -arch arm64 -### -c %s 2>&1 | FileCheck --check-prefix=CHECK-ARCH-ARM64 %s
 // CHECK-ARCH-ARM64: "-target-cpu" "cyclone" "-target-feature" "+neon" "-target-feature" "+crc" "-target-feature" "+crypto" "-target-feature" "+zcm" "-target-feature" "+zcz"
diff --git a/test/Profile/c-attributes.c b/test/Profile/c-attributes.c
deleted file mode 100644
index 2dcc180..0000000
--- a/test/Profile/c-attributes.c
+++ /dev/null
@@ -1,48 +0,0 @@
-// Test that instrumentation based profiling sets function attributes correctly.
-
-// RUN: llvm-profdata merge %S/Inputs/c-attributes.proftext -o %t.profdata
-// RUN: %clang %s -o - -mllvm -disable-llvm-optzns -emit-llvm -S -fprofile-instr-use=%t.profdata | FileCheck %s
-
-extern int atoi(const char *);
-
-// CHECK: hot_100_percent(i32{{.*}}%i) [[HOT:#[0-9]+]]
-void hot_100_percent(int i) {
-  while (i > 0)
-    i--;
-}
-
-// CHECK: hot_40_percent(i32{{.*}}%i) [[HOT]]
-void hot_40_percent(int i) {
-  while (i > 0)
-    i--;
-}
-
-// CHECK: normal_func(i32{{.*}}%i) [[NORMAL:#[0-9]+]]
-void normal_func(int i) {
-  while (i > 0)
-    i--;
-}
-
-// CHECK: cold_func(i32{{.*}}%i) [[COLD:#[0-9]+]]
-void cold_func(int i) {
-  while (i > 0)
-    i--;
-}
-
-// CHECK: attributes [[HOT]] = { inlinehint nounwind {{.*}} }
-// CHECK: attributes [[NORMAL]] = { nounwind {{.*}} }
-// CHECK: attributes [[COLD]] = { cold nounwind {{.*}} }
-
-int main(int argc, const char *argv[]) {
-  int max = atoi(argv[1]);
-  int i;
-  for (i = 0; i < max; i++)
-    hot_100_percent(i);
-  for (i = 0; i < max * 4 / 10; i++)
-    hot_40_percent(i);
-  for (i = 0; i < max * 2 / 10; i++)
-    normal_func(i);
-  for (i = 0; i < max / 200; i++)
-    cold_func(i);
-  return 0;
-}
diff --git a/test/Profile/func-entry.c b/test/Profile/func-entry.c
index 32c20a2..1ecae60 100644
--- a/test/Profile/func-entry.c
+++ b/test/Profile/func-entry.c
@@ -5,10 +5,10 @@
 
 void foo(void);
 
-// CHECK: @foo() #0 !prof [[FOO:![0-9]+]]
+// CHECK: @foo() #{{[0-9]}} !prof [[FOO:![0-9]+]]
 void foo() { return; }
 
-// CHECK: @main() #1 !prof [[MAIN:![0-9]+]]
+// CHECK: @main() #{{[0-9]}} !prof [[MAIN:![0-9]+]]
 int main() {
   int i;
   for (i = 0; i < 10000; i++) foo();
diff --git a/test/Sema/attr-ownership.c b/test/Sema/attr-ownership.c
index f7969d4..fa21b03 100644
--- a/test/Sema/attr-ownership.c
+++ b/test/Sema/attr-ownership.c
@@ -9,7 +9,7 @@ void f6(void) __attribute__((ownership_holds(foo, 1, 2, 3)));  // expected-error
 void f7(void) __attribute__((ownership_takes(foo)));  // expected-error {{'ownership_takes' attribute takes at least 2 arguments}}
 void f8(int *i, int *j, int k) __attribute__((ownership_holds(foo, 1, 2, 4)));  // expected-error {{'ownership_holds' attribute parameter 3 is out of bounds}}
 
-int f9 __attribute__((ownership_takes(foo, 1)));  // expected-warning {{'ownership_takes' attribute only applies to non-K&R-style functions}}
+int f9 __attribute__((ownership_takes(foo, 1)));  // expected-warning {{'ownership_takes' attribute only applies to non-K&R-style functions}}
 
 void f10(int i) __attribute__((ownership_holds(foo, 1)));  // expected-error {{'ownership_holds' attribute only applies to pointer arguments}}
 void *f11(float i) __attribute__((ownership_returns(foo, 1)));  // expected-error {{'ownership_returns' attribute only applies to integer arguments}}
@@ -19,8 +19,8 @@ void f13(int *i, int *j) __attribute__((ownership_holds(foo, 1))) __attribute__(
 void f14(int i, int j, int *k) __attribute__((ownership_holds(foo, 3))) __attribute__((ownership_takes(foo, 3)));  // expected-error {{'ownership_holds' and 'ownership_takes' attributes are not compatible}}
 
 void f15(int, int)
-  __attribute__((ownership_returns(foo, 1)))  // expected-note {{declared with index 1 here}}
-  __attribute__((ownership_returns(foo, 2))); // expected-error {{'ownership_returns' attribute index does not match; here it is 2}}
-void f16(int *i, int *j) __attribute__((ownership_holds(foo, 1))) __attribute__((ownership_holds(foo, 1))); // OK, same index
-void f17(void*) __attribute__((ownership_takes(__, 1)));
-void f18() __attribute__((ownership_takes(foo, 1)));  // expected-warning {{'ownership_takes' attribute only applies to non-K&R-style functions}}
+  __attribute__((ownership_returns(foo, 1)))  // expected-note {{declared with index 1 here}}
+  __attribute__((ownership_returns(foo, 2))); // expected-error {{'ownership_returns' attribute index does not match; here it is 2}}
+void f16(int *i, int *j) __attribute__((ownership_holds(foo, 1))) __attribute__((ownership_holds(foo, 1))); // OK, same index
+void f17(void*) __attribute__((ownership_takes(__, 1)));
+void f18() __attribute__((ownership_takes(foo, 1)));  // expected-warning {{'ownership_takes' attribute only applies to non-K&R-style functions}}
diff --git a/test/Sema/implicit-builtin-freestanding.c b/test/Sema/implicit-builtin-freestanding.c
index 385cf1f..f77bccc 100644
--- a/test/Sema/implicit-builtin-freestanding.c
+++ b/test/Sema/implicit-builtin-freestanding.c
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify -ffreestanding %s
+// RUN: %clang_cc1 -fsyntax-only -verify -fno-builtin %s
+// RUN: %clang_cc1 -fsyntax-only -verify -fno-builtin-malloc %s
 // expected-no-diagnostics
 
 int malloc(int a) { return a; }
diff --git a/test/Sema/warn-documentation-crlf.c b/test/Sema/warn-documentation-crlf.c
index 99c0714..474901b 100644
--- a/test/Sema/warn-documentation-crlf.c
+++ b/test/Sema/warn-documentation-crlf.c
@@ -1,13 +1,13 @@
-// RUN: %clang_cc1 -fsyntax-only -Wdocumentation %s
-// The run line does not have '-verify' because we were crashing while printing
-// the diagnostic.
-
-// This file has DOS-style line endings (CR LF).  Please don't change it to
-// Unix-style LF!
-
-// PR14591.  Check that we don't crash on this.
-/**
- * @param abc
- */
-void nocrash1(int qwerty);
-
+// RUN: %clang_cc1 -fsyntax-only -Wdocumentation %s
+// The run line does not have '-verify' because we were crashing while printing
+// the diagnostic.
+
+// This file has DOS-style line endings (CR LF).  Please don't change it to
+// Unix-style LF!
+
+// PR14591.  Check that we don't crash on this.
+/**
+ * @param abc
+ */
+void nocrash1(int qwerty);
+
diff --git a/test/SemaCXX/attr-no-sanitize-address.cpp b/test/SemaCXX/attr-no-sanitize-address.cpp
index 0aafe06..9499742 100644
--- a/test/SemaCXX/attr-no-sanitize-address.cpp
+++ b/test/SemaCXX/attr-no-sanitize-address.cpp
@@ -5,14 +5,14 @@
 #if !__has_attribute(no_sanitize_address)
 #error "Should support no_sanitize_address"
 #endif
-
-void noanal_fun() NO_SANITIZE_ADDRESS;
-
-void noanal_fun_alt() __attribute__((__no_sanitize_address__));
-
-void noanal_fun_args() __attribute__((no_sanitize_address(1))); // \
-  // expected-error {{'no_sanitize_address' attribute takes no arguments}}
-
+
+void noanal_fun() NO_SANITIZE_ADDRESS;
+
+void noanal_fun_alt() __attribute__((__no_sanitize_address__));
+
+void noanal_fun_args() __attribute__((no_sanitize_address(1))); // \
+  // expected-error {{'no_sanitize_address' attribute takes no arguments}}
+
 int noanal_testfn(int y) NO_SANITIZE_ADDRESS;
 
 int noanal_testfn(int y) {
diff --git a/test/SemaCXX/attr-no-sanitize-memory.cpp b/test/SemaCXX/attr-no-sanitize-memory.cpp
index 8a8d847..41809a0 100644
--- a/test/SemaCXX/attr-no-sanitize-memory.cpp
+++ b/test/SemaCXX/attr-no-sanitize-memory.cpp
@@ -5,14 +5,14 @@
 #if !__has_attribute(no_sanitize_memory)
 #error "Should support no_sanitize_memory"
 #endif
-
-void noanal_fun() NO_SANITIZE_MEMORY;
-
-void noanal_fun_alt() __attribute__((__no_sanitize_memory__));
-
-void noanal_fun_args() __attribute__((no_sanitize_memory(1))); // \
-  // expected-error {{'no_sanitize_memory' attribute takes no arguments}}
-
+
+void noanal_fun() NO_SANITIZE_MEMORY;
+
+void noanal_fun_alt() __attribute__((__no_sanitize_memory__));
+
+void noanal_fun_args() __attribute__((no_sanitize_memory(1))); // \
+  // expected-error {{'no_sanitize_memory' attribute takes no arguments}}
+
 int noanal_testfn(int y) NO_SANITIZE_MEMORY;
 
 int noanal_testfn(int y) {
diff --git a/test/SemaCXX/attr-no-sanitize-thread.cpp b/test/SemaCXX/attr-no-sanitize-thread.cpp
index 92a3fa9..d97e050 100644
--- a/test/SemaCXX/attr-no-sanitize-thread.cpp
+++ b/test/SemaCXX/attr-no-sanitize-thread.cpp
@@ -5,14 +5,14 @@
 #if !__has_attribute(no_sanitize_thread)
 #error "Should support no_sanitize_thread"
 #endif
-
-void noanal_fun() NO_SANITIZE_THREAD;
-
-void noanal_fun_alt() __attribute__((__no_sanitize_thread__));
-
-void noanal_fun_args() __attribute__((no_sanitize_thread(1))); // \
-  // expected-error {{'no_sanitize_thread' attribute takes no arguments}}
-
+
+void noanal_fun() NO_SANITIZE_THREAD;
+
+void noanal_fun_alt() __attribute__((__no_sanitize_thread__));
+
+void noanal_fun_args() __attribute__((no_sanitize_thread(1))); // \
+  // expected-error {{'no_sanitize_thread' attribute takes no arguments}}
+
 int noanal_testfn(int y) NO_SANITIZE_THREAD;
 
 int noanal_testfn(int y) {
diff --git a/test/SemaCXX/dllexport.cpp b/test/SemaCXX/dllexport.cpp
index a32ba44..0bbf9b37 100644
--- a/test/SemaCXX/dllexport.cpp
+++ b/test/SemaCXX/dllexport.cpp
@@ -730,7 +730,12 @@ __declspec(dllexport)        int  MemberRedecl::StaticField = 1;       // expect
 __declspec(dllexport) const  int  MemberRedecl::StaticConstField = 1;  // expected-error{{redeclaration of 'MemberRedecl::StaticConstField' cannot add 'dllexport' attribute}}
 __declspec(dllexport) constexpr int MemberRedecl::ConstexprField;      // expected-error{{redeclaration of 'MemberRedecl::ConstexprField' cannot add 'dllexport' attribute}}
 
-
+#ifdef MS
+struct __declspec(dllexport) ClassWithMultipleDefaultCtors {
+  ClassWithMultipleDefaultCtors(int = 40) {} // expected-error{{'__declspec(dllexport)' cannot be applied to more than one default constructor}}
+  ClassWithMultipleDefaultCtors(int = 30, ...) {} // expected-note{{declared here}}
+};
+#endif
 
 //===----------------------------------------------------------------------===//
 // Class member templates
diff --git a/test/SemaCXX/warn-literal-conversion.cpp b/test/SemaCXX/warn-literal-conversion.cpp
index d7bec4c..5d4b6f7 100644
--- a/test/SemaCXX/warn-literal-conversion.cpp
+++ b/test/SemaCXX/warn-literal-conversion.cpp
@@ -38,3 +38,14 @@ void test0() {
   int y = (24*60*60) * 0.25;
   int pennies = 123.45 * 100;
 }
+
+// Similarly, test floating point conversion to bool. Only float values of zero
+// are converted to false; everything else is converted to true.
+void test1() {
+  bool b1 = 0.99f; // expected-warning {{implicit conversion from 'float' to 'bool' changes value from 0.99 to true}}
+  bool b2 = 0.99; // expected-warning {{implicit conversion from 'double' to 'bool' changes value from 0.99 to true}}
+  // These do not warn because they can be directly converted to integral
+  // values.
+  bool b3 = 0.0f;
+  bool b4 = 0.0;
+}
diff --git a/test/SemaOpenCL/cond.cl b/test/SemaOpenCL/cond.cl
index 8cc4f1e..60f7056 100644
--- a/test/SemaOpenCL/cond.cl
+++ b/test/SemaOpenCL/cond.cl
@@ -128,5 +128,5 @@ int foo2(int);
 
 unsigned int ntest12(int2 C)
 {
-  return (unsigned int)(C ? foo1 : foo2); // expected-error {{taking address of function is not allowed}}
+  return (unsigned int)(C ? foo1 : foo2); // expected-error {{taking address of function is not allowed}} expected-error {{taking address of function is not allowed}}
 }
diff --git a/test/SemaOpenCL/func_ptr.cl b/test/SemaOpenCL/func_ptr.cl
index f21a3d3..abeab73 100644
--- a/test/SemaOpenCL/func_ptr.cl
+++ b/test/SemaOpenCL/func_ptr.cl
@@ -11,6 +11,9 @@ void bar()
   foo((void*)foo); // expected-error{{taking address of function is not allowed}}
   foo(&foo); // expected-error{{taking address of function is not allowed}}
 
+  // initializing an array with the address of functions is an error
+  void* vptrarr[2] = {foo, &foo}; // expected-error{{taking address of function is not allowed}} expected-error{{taking address of function is not allowed}}
+
   // just calling a function is correct
   foo(0);
 }
diff --git a/test/SemaTemplate/deduction.cpp b/test/SemaTemplate/deduction.cpp
index bf08122..6826774 100644
--- a/test/SemaTemplate/deduction.cpp
+++ b/test/SemaTemplate/deduction.cpp
@@ -207,3 +207,14 @@ namespace PR18645 {
   template<typename F> F Quux(F &&f);
   auto Baz = Quux(Quux<float>);
 }
+
+namespace NonDeducedNestedNameSpecifier {
+  template<typename T> struct A {
+    template<typename U> struct B {
+      B(int) {}
+    };
+  };
+
+  template<typename T> int f(A<T>, typename A<T>::template B<T>);
+  int k = f(A<int>(), 0);
+}