244 files changed, 38830 insertions, 1222 deletions
diff --git a/test/CodeGen/2003-08-06-BuiltinSetjmpLongjmp.c b/test/CodeGen/2003-08-06-BuiltinSetjmpLongjmp.c
index 12bce26..3aa5c00 100644
--- a/test/CodeGen/2003-08-06-BuiltinSetjmpLongjmp.c
+++ b/test/CodeGen/2003-08-06-BuiltinSetjmpLongjmp.c
@@ -1,9 +1,10 @@
-/* RUN: %clang_cc1  %s -emit-llvm -o - | not grep __builtin_
+/* RUN: %clang_cc1  %s -emit-llvm -o - | FileCheck %s
  *
- * __builtin_longjmp/setjmp should get transformed into llvm.setjmp/longjmp 
- * just like explicit setjmp/longjmp calls are.
+ * __builtin_longjmp/setjmp should get transformed into intrinsics.
  */
 
+// CHECK-NOT: builtin_longjmp
+
 void jumpaway(int *ptr) {
   __builtin_longjmp(ptr,1);
 }
diff --git a/test/CodeGen/2004-11-27-InvalidConstantExpr.c b/test/CodeGen/2004-11-27-InvalidConstantExpr.c
deleted file mode 100644
index 431dccf..0000000
--- a/test/CodeGen/2004-11-27-InvalidConstantExpr.c
+++ /dev/null
@@ -1,10 +0,0 @@
-// RUN: %clang_cc1 %s -emit-llvm -o - | not grep {foo\\* sub}
-// This should not produce a subtrace constantexpr of a pointer
-struct foo {
-  int Y;
-  char X[100];
-} F;
-
-int test(char *Y) {
-   return Y - F.X;
-} 
diff --git a/test/CodeGen/2004-11-27-StaticFunctionRedeclare.c b/test/CodeGen/2004-11-27-StaticFunctionRedeclare.c
index 55efa86..9ceee4c 100644
--- a/test/CodeGen/2004-11-27-StaticFunctionRedeclare.c
+++ b/test/CodeGen/2004-11-27-StaticFunctionRedeclare.c
@@ -1,12 +1,14 @@
-// RUN: %clang_cc1 -emit-llvm %s -o - | \
-// RUN:   opt -std-compile-opts -emit-llvm | not grep {declare i32.*func}
+// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s
 
 // There should not be an unresolved reference to func here.  Believe it or not,
-// the "expected result" is a function named 'func' which is internal and 
+// the "expected result" is a function named 'func' which is internal and
 // referenced by bar().
 
 // This is PR244
 
+// CHECK-LABEL: define void @bar(
+// CHECK: call {{.*}} @func
+// CHECK: define internal {{.*}}i32 @func(
 static int func();
 void bar() {
   int func();
diff --git a/test/CodeGen/2007-02-25-C-DotDotDot.c b/test/CodeGen/2007-02-25-C-DotDotDot.c
index abc4668..1c3a3df 100644
--- a/test/CodeGen/2007-02-25-C-DotDotDot.c
+++ b/test/CodeGen/2007-02-25-C-DotDotDot.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin -O0 %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin %s -emit-llvm -o - | FileCheck %s
 
 // Make sure the call to foo is compiled as:
 //  call float @foo()
diff --git a/test/CodeGen/2007-03-05-DataLayout.c b/test/CodeGen/2007-03-05-DataLayout.c
deleted file mode 100644
index 7519624..0000000
--- a/test/CodeGen/2007-03-05-DataLayout.c
+++ /dev/null
@@ -1,55 +0,0 @@
-// Testcase for PR1242
-// RUN: %clang_cc1 -emit-llvm %s -o - | grep datalayout | \
-// RUN:    not grep {"\[Ee\]-p:\[36\]\[24\]:\[36\]\[24\]"}
-// END.
-
-typedef __SIZE_TYPE__ size_t;
-void * malloc(size_t size);
-#define NDIM 3
-#define BODY 01
-typedef double vector[NDIM];
-typedef struct bnode* bodyptr;
-// { i16, double, [3 x double], i32, i32, [3 x double], [3 x double], [3 x
-// double], double, \2 *, \2 * }
-struct bnode {
-  short int type;
-  double mass;
-  vector pos;
-  int proc;
-  int new_proc;
-  vector vel;
-  vector acc;
-  vector new_acc;
-  double phi;
-  bodyptr next;
-  bodyptr proc_next;
-} body;
-
-#define Type(x) ((x)->type)
-#define Mass(x) ((x)->mass)
-#define Pos(x)  ((x)->pos)
-#define Proc(x) ((x)->proc)
-#define New_Proc(x) ((x)->new_proc)
-#define Vel(x)  ((x)->vel)
-#define Acc(x)  ((x)->acc)
-#define New_Acc(x)  ((x)->new_acc)
-#define Phi(x)  ((x)->phi)
-#define Next(x) ((x)->next)
-#define Proc_Next(x) ((x)->proc_next)
-
-bodyptr ubody_alloc(int p)
-{
-  register bodyptr tmp;
-  tmp = (bodyptr)malloc(sizeof(body));
-
-  Type(tmp) = BODY;
-  Proc(tmp) = p;
-  Proc_Next(tmp) = NULL;
-  New_Proc(tmp) = p;
-  return tmp;
-}
-
-int main(int argc, char** argv) {
-  bodyptr b = ubody_alloc(17);
-  return 0;
-}
diff --git a/test/CodeGen/2007-04-14-FNoBuiltin.c b/test/CodeGen/2007-04-14-FNoBuiltin.c
index a5fda63..25ae01c 100644
--- a/test/CodeGen/2007-04-14-FNoBuiltin.c
+++ b/test/CodeGen/2007-04-14-FNoBuiltin.c
@@ -1,7 +1,10 @@
-// RUN: %clang_cc1 -emit-llvm %s -O2 -fno-builtin -o - | grep call.*printf
+// RUN: %clang_cc1 -emit-llvm %s -O2 -fno-builtin -o - | FileCheck %s
 // Check that -fno-builtin is honored.
 
 extern int printf(const char*, ...);
+
+// CHECK: define void {{.*}}foo(
 void foo(const char *msg) {
-	printf("%s\n",msg);
+  // CHECK: call {{.*}}printf
+  printf("%s\n",msg);
 }
diff --git a/test/CodeGen/2007-05-07-PaddingElements.c b/test/CodeGen/2007-05-07-PaddingElements.c
index 574a377..f8ec248 100644
--- a/test/CodeGen/2007-05-07-PaddingElements.c
+++ b/test/CodeGen/2007-05-07-PaddingElements.c
@@ -1,6 +1,6 @@
 // PR 1278
-// RUN: %clang_cc1 %s -emit-llvm -O0 -o - | grep {struct.s} | not grep "4 x i8] zeroinitializer"
-// RUN: %clang_cc1 %s -emit-llvm -O0 -o - | not grep "i32 0, i32 2"
+// RUN: %clang_cc1 %s -emit-llvm -o - | grep struct.s | not grep "4 x i8] zeroinitializer"
+// RUN: %clang_cc1 %s -emit-llvm -o - | not grep "i32 0, i32 2"
 struct s {
   double d1;
   int s1;
diff --git a/test/CodeGen/2008-01-11-ChainConsistency.c b/test/CodeGen/2008-01-11-ChainConsistency.c
deleted file mode 100644
index 9ae021f..0000000
--- a/test/CodeGen/2008-01-11-ChainConsistency.c
+++ /dev/null
@@ -1,3 +0,0 @@
-// RUN: %clang_cc1 -emit-llvm %s -o - -fnested-functions | not grep nest
-
-void n1(void) { void a(void) { a(); } a(); }
diff --git a/test/CodeGen/2008-01-25-ByValReadNone.c b/test/CodeGen/2008-01-25-ByValReadNone.c
index ca21f6c..bb5a588 100644
--- a/test/CodeGen/2008-01-25-ByValReadNone.c
+++ b/test/CodeGen/2008-01-25-ByValReadNone.c
@@ -1,7 +1,6 @@
-// RUN: %clang_cc1 -emit-llvm -o - %s | FileCheck %s
-
-// XFAIL: mips
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | FileCheck %s
 
+// It could hit in @llvm.memcpy with "-triple x86_64-(mingw32|win32)".
 // CHECK-NOT: readonly
 // CHECK-NOT: readnone
 
diff --git a/test/CodeGen/2008-03-05-syncPtr.c b/test/CodeGen/2008-03-05-syncPtr.c
index 93f328a..4df9482 100644
--- a/test/CodeGen/2008-03-05-syncPtr.c
+++ b/test/CodeGen/2008-03-05-syncPtr.c
@@ -3,38 +3,38 @@
 int* foo(int** a, int* b, int* c) {
 return __sync_val_compare_and_swap (a, b, c);
 }
-// CHECK: define i32* @foo
+// CHECK-LABEL: define i32* @foo
 // CHECK: cmpxchg 
 
 int foo2(int** a, int* b, int* c) {
 return __sync_bool_compare_and_swap (a, b, c);
 }
-// CHECK: define i32 @foo2
+// CHECK-LABEL: define i32 @foo2
 // CHECK: cmpxchg
 
 int* foo3(int** a, int b) {
   return __sync_fetch_and_add (a, b);
 }
-// CHECK: define i32* @foo3
+// CHECK-LABEL: define i32* @foo3
 // CHECK: atomicrmw add
 
 
 int* foo4(int** a, int b) {
   return __sync_fetch_and_sub (a, b);
 }
-// CHECK: define i32* @foo4
+// CHECK-LABEL: define i32* @foo4
 // CHECK: atomicrmw sub
 
 
 int* foo5(int** a, int* b) {
   return __sync_lock_test_and_set (a, b);
 }
-// CHECK: define i32* @foo5
+// CHECK-LABEL: define i32* @foo5
 // CHECK: atomicrmw xchg
 
 
 int* foo6(int** a, int*** b) {
   return __sync_lock_test_and_set (a, b);
 }
-// CHECK: define i32* @foo6
+// CHECK-LABEL: define i32* @foo6
 // CHECK: atomicrmw xchg
diff --git a/test/CodeGen/2008-03-24-BitField-And-Alloca.c b/test/CodeGen/2008-03-24-BitField-And-Alloca.c
index cb80d76..b0ccdec 100644
--- a/test/CodeGen/2008-03-24-BitField-And-Alloca.c
+++ b/test/CodeGen/2008-03-24-BitField-And-Alloca.c
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -O2 -emit-llvm %s -o - | not grep alloca
-// RUN: %clang_cc1 -m32 -O2 -emit-llvm %s -o - | not grep {store }
+// RUN: %clang_cc1 -triple i686-pc-linux-gnu -O2 -emit-llvm %s -o - | not grep store
 
 enum {
  PP_C,
diff --git a/test/CodeGen/2008-05-19-AlwaysInline.c b/test/CodeGen/2008-05-19-AlwaysInline.c
index 73a7691..419951b 100644
--- a/test/CodeGen/2008-05-19-AlwaysInline.c
+++ b/test/CodeGen/2008-05-19-AlwaysInline.c
@@ -1,5 +1,4 @@
-// RUN: %clang_cc1 %s -emit-llvm -fno-unit-at-a-time -O0 -o - | not grep sabrina
-// RUN: %clang_cc1 %s -emit-llvm -funit-at-a-time -O0 -o - | not grep sabrina
+// RUN: %clang_cc1 %s -emit-llvm -o - | not grep sabrina
 
 static inline int sabrina (void) __attribute__((always_inline));
 static inline int sabrina (void)
diff --git a/test/CodeGen/2008-07-30-implicit-initialization.c b/test/CodeGen/2008-07-30-implicit-initialization.c
index e516259..e77c70a 100644
--- a/test/CodeGen/2008-07-30-implicit-initialization.c
+++ b/test/CodeGen/2008-07-30-implicit-initialization.c
@@ -1,9 +1,9 @@
 // RUN: %clang_cc1 -triple i386-unknown-unknown -O1 -emit-llvm -o - %s | FileCheck %s
-// CHECK: define i32 @f0()
+// CHECK-LABEL: define i32 @f0()
 // CHECK:   ret i32 0
-// CHECK: define i32 @f1()
+// CHECK-LABEL: define i32 @f1()
 // CHECK:   ret i32 0
-// CHECK: define i32 @f2()
+// CHECK-LABEL: define i32 @f2()
 // CHECK:   ret i32 0
 // <rdar://problem/6113085>
 
diff --git a/test/CodeGen/2008-07-31-asm-labels.c b/test/CodeGen/2008-07-31-asm-labels.c
index 130ad6b..733742b 100644
--- a/test/CodeGen/2008-07-31-asm-labels.c
+++ b/test/CodeGen/2008-07-31-asm-labels.c
@@ -1,10 +1,10 @@
 // RUN: %clang_cc1 -emit-llvm -o %t %s
-// RUN: grep "@pipe()" %t | count 0
+// RUN: not grep "@pipe()" %t
 // RUN: grep '_thisIsNotAPipe' %t | count 3
-// RUN: grep 'g0' %t | count 0
+// RUN: not grep 'g0' %t
 // RUN: grep '_renamed' %t | count 2
 // RUN: %clang_cc1 -DUSE_DEF -emit-llvm -o %t %s
-// RUN: grep "@pipe()" %t | count 0
+// RUN: not grep "@pipe()" %t
 // RUN: grep '_thisIsNotAPipe' %t | count 3
 // <rdr://6116729>
 
diff --git a/test/CodeGen/2008-07-31-promotion-of-compound-pointer-arithmetic.c b/test/CodeGen/2008-07-31-promotion-of-compound-pointer-arithmetic.c
index 429fb1f..f348b2b 100644
--- a/test/CodeGen/2008-07-31-promotion-of-compound-pointer-arithmetic.c
+++ b/test/CodeGen/2008-07-31-promotion-of-compound-pointer-arithmetic.c
@@ -1,9 +1,9 @@
 // RUN: %clang_cc1 -triple i386-unknown-unknown -O1 -emit-llvm -o - %s | FileCheck %s
-// CHECK: define i32 @f0
+// CHECK-LABEL: define i32 @f0
 // CHECK:   ret i32 1
-// CHECK: define i32 @f1
+// CHECK-LABEL: define i32 @f1
 // CHECK:   ret i32 1
-// CHECK: define i32 @f2
+// CHECK-LABEL: define i32 @f2
 // CHECK:   ret i32 1
 // <rdr://6115726>
 
diff --git a/test/CodeGen/2008-08-07-AlignPadding2.c b/test/CodeGen/2008-08-07-AlignPadding2.c
index ecf28dd..cef71a3 100644
--- a/test/CodeGen/2008-08-07-AlignPadding2.c
+++ b/test/CodeGen/2008-08-07-AlignPadding2.c
@@ -1,4 +1,4 @@
-/* RUN: %clang_cc1 %s -emit-llvm -o - -O0 | grep zeroinitializer | count 1
+/* RUN: %clang_cc1 %s -emit-llvm -o - | grep zeroinitializer | count 1
 
 The FE must not generate padding here between array elements.  PR 2533. */
 
diff --git a/test/CodeGen/2010-01-18-Inlined-Debug.c b/test/CodeGen/2010-01-18-Inlined-Debug.c
index cf00be7..bdc6fc5 100644
--- a/test/CodeGen/2010-01-18-Inlined-Debug.c
+++ b/test/CodeGen/2010-01-18-Inlined-Debug.c
@@ -1,5 +1,5 @@
 // PR: 6058
-// RUN: %clang_cc1 -g -emit-llvm %s  -O0 -o /dev/null
+// RUN: %clang_cc1 -g -emit-llvm %s -o /dev/null
 
 static inline int foo(double) __attribute__ ((always_inline));
 static inline int foo(double __x) { return __x; }
diff --git a/test/CodeGen/2010-03-5-LexicalScope.c b/test/CodeGen/2010-03-5-LexicalScope.c
index e0e41dd..8dc68d7 100644
--- a/test/CodeGen/2010-03-5-LexicalScope.c
+++ b/test/CodeGen/2010-03-5-LexicalScope.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -O0 -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -g %s -o - | FileCheck %s
 // CHECK: DW_TAG_lexical_block
 // CHECK: DW_TAG_lexical_block
 int foo(int i) {
diff --git a/test/CodeGen/2010-07-08-DeclDebugLineNo.c b/test/CodeGen/2010-07-08-DeclDebugLineNo.c
index 1637a49..5e9edd9 100644
--- a/test/CodeGen/2010-07-08-DeclDebugLineNo.c
+++ b/test/CodeGen/2010-07-08-DeclDebugLineNo.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -O0 -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -g %s -o - | FileCheck %s
 // Insure that dbg.declare lines for locals refer to correct line number records.
 // Radar 8152866.
 void foo() {
diff --git a/test/CodeGen/2010-08-12-asm-aggr-arg.c b/test/CodeGen/2010-08-12-asm-aggr-arg.c
index 5ddc412..dcd507d 100644
--- a/test/CodeGen/2010-08-12-asm-aggr-arg.c
+++ b/test/CodeGen/2010-08-12-asm-aggr-arg.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -emit-llvm -O0 -o - | FileCheck %s
+// RUN: %clang_cc1 %s -emit-llvm -o - | FileCheck %s
 // Radar 8288710: A small aggregate can be passed as an integer.  Make sure
 // we don't get an error with "input constraint with a matching output
 // constraint of incompatible type!" 
diff --git a/test/CodeGen/3dnow-builtins.c b/test/CodeGen/3dnow-builtins.c
index 294fbc0..f53b85c 100644
--- a/test/CodeGen/3dnow-builtins.c
+++ b/test/CodeGen/3dnow-builtins.c
@@ -6,151 +6,151 @@
 #include <x86intrin.h>
 
 __m64 test_m_pavgusb(__m64 m1, __m64 m2) {
-  // CHECK: define i64 @test_m_pavgusb
+  // CHECK-LABEL: define i64 @test_m_pavgusb
   // CHECK: @llvm.x86.3dnow.pavgusb
   return _m_pavgusb(m1, m2);
 }
 
 __m64 test_m_pf2id(__m64 m) {
-  // CHECK: define i64 @test_m_pf2id
+  // CHECK-LABEL: define i64 @test_m_pf2id
   // CHECK: @llvm.x86.3dnow.pf2id
   return _m_pf2id(m);
 }
 
 __m64 test_m_pfacc(__m64 m1, __m64 m2) {
-  // CHECK: define i64 @test_m_pfacc
+  // CHECK-LABEL: define i64 @test_m_pfacc
   // CHECK: @llvm.x86.3dnow.pfacc
   return _m_pfacc(m1, m2);
 }
 
 __m64 test_m_pfadd(__m64 m1, __m64 m2) {
-  // CHECK: define i64 @test_m_pfadd
+  // CHECK-LABEL: define i64 @test_m_pfadd
   // CHECK: @llvm.x86.3dnow.pfadd
   return _m_pfadd(m1, m2);
 }
 
 __m64 test_m_pfcmpeq(__m64 m1, __m64 m2) {
-  // CHECK: define i64 @test_m_pfcmpeq
+  // CHECK-LABEL: define i64 @test_m_pfcmpeq
   // CHECK: @llvm.x86.3dnow.pfcmpeq
   return _m_pfcmpeq(m1, m2);
 }
 
 __m64 test_m_pfcmpge(__m64 m1, __m64 m2) {
-  // CHECK: define i64 @test_m_pfcmpge
+  // CHECK-LABEL: define i64 @test_m_pfcmpge
   // CHECK: @llvm.x86.3dnow.pfcmpge
   return _m_pfcmpge(m1, m2);
 }
 
 __m64 test_m_pfcmpgt(__m64 m1, __m64 m2) {
-  // CHECK: define i64 @test_m_pfcmpgt
+  // CHECK-LABEL: define i64 @test_m_pfcmpgt
   // CHECK: @llvm.x86.3dnow.pfcmpgt
   return _m_pfcmpgt(m1, m2);
 }
 
 __m64 test_m_pfmax(__m64 m1, __m64 m2) {
-  // CHECK: define i64 @test_m_pfmax
+  // CHECK-LABEL: define i64 @test_m_pfmax
   // CHECK: @llvm.x86.3dnow.pfmax
   return _m_pfmax(m1, m2);
 }
 
 __m64 test_m_pfmin(__m64 m1, __m64 m2) {
-  // CHECK: define i64 @test_m_pfmin
+  // CHECK-LABEL: define i64 @test_m_pfmin
   // CHECK: @llvm.x86.3dnow.pfmin
   return _m_pfmin(m1, m2);
 }
 
 __m64 test_m_pfmul(__m64 m1, __m64 m2) {
-  // CHECK: define i64 @test_m_pfmul
+  // CHECK-LABEL: define i64 @test_m_pfmul
   // CHECK: @llvm.x86.3dnow.pfmul
   return _m_pfmul(m1, m2);
 }
 
 __m64 test_m_pfrcp(__m64 m) {
-  // CHECK: define i64 @test_m_pfrcp
+  // CHECK-LABEL: define i64 @test_m_pfrcp
   // CHECK: @llvm.x86.3dnow.pfrcp
   return _m_pfrcp(m);
 }
 
 __m64 test_m_pfrcpit1(__m64 m1, __m64 m2) {
-  // CHECK: define i64 @test_m_pfrcpit1
+  // CHECK-LABEL: define i64 @test_m_pfrcpit1
   // CHECK: @llvm.x86.3dnow.pfrcpit1
   return _m_pfrcpit1(m1, m2);
 }
 
 __m64 test_m_pfrcpit2(__m64 m1, __m64 m2) {
-  // CHECK: define i64 @test_m_pfrcpit2
+  // CHECK-LABEL: define i64 @test_m_pfrcpit2
   // CHECK: @llvm.x86.3dnow.pfrcpit2
   return _m_pfrcpit2(m1, m2);
 }
 
 __m64 test_m_pfrsqrt(__m64 m) {
-  // CHECK: define i64 @test_m_pfrsqrt
+  // CHECK-LABEL: define i64 @test_m_pfrsqrt
   // CHECK: @llvm.x86.3dnow.pfrsqrt
   return _m_pfrsqrt(m);
 }
 
 __m64 test_m_pfrsqrtit1(__m64 m1, __m64 m2) {
-  // CHECK: define i64 @test_m_pfrsqrtit1
+  // CHECK-LABEL: define i64 @test_m_pfrsqrtit1
   // CHECK: @llvm.x86.3dnow.pfrsqit1
   return _m_pfrsqrtit1(m1, m2);
 }
 
 __m64 test_m_pfsub(__m64 m1, __m64 m2) {
-  // CHECK: define i64 @test_m_pfsub
+  // CHECK-LABEL: define i64 @test_m_pfsub
   // CHECK: @llvm.x86.3dnow.pfsub
   return _m_pfsub(m1, m2);
 }
 
 __m64 test_m_pfsubr(__m64 m1, __m64 m2) {
-  // CHECK: define i64 @test_m_pfsubr
+  // CHECK-LABEL: define i64 @test_m_pfsubr
   // CHECK: @llvm.x86.3dnow.pfsubr
   return _m_pfsubr(m1, m2);
 }
 
 __m64 test_m_pi2fd(__m64 m) {
-  // CHECK: define i64 @test_m_pi2fd
+  // CHECK-LABEL: define i64 @test_m_pi2fd
   // CHECK: @llvm.x86.3dnow.pi2fd
   return _m_pi2fd(m);
 }
 
 __m64 test_m_pmulhrw(__m64 m1, __m64 m2) {
-  // CHECK: define i64 @test_m_pmulhrw
+  // CHECK-LABEL: define i64 @test_m_pmulhrw
   // CHECK: @llvm.x86.3dnow.pmulhrw
   return _m_pmulhrw(m1, m2);
 }
 
 __m64 test_m_pf2iw(__m64 m) {
-  // CHECK: define i64 @test_m_pf2iw
+  // CHECK-LABEL: define i64 @test_m_pf2iw
   // CHECK: @llvm.x86.3dnowa.pf2iw
   return _m_pf2iw(m);
 }
 
 __m64 test_m_pfnacc(__m64 m1, __m64 m2) {
-  // CHECK: define i64 @test_m_pfnacc
+  // CHECK-LABEL: define i64 @test_m_pfnacc
   // CHECK: @llvm.x86.3dnowa.pfnacc
   return _m_pfnacc(m1, m2);
 }
 
 __m64 test_m_pfpnacc(__m64 m1, __m64 m2) {
-  // CHECK: define i64 @test_m_pfpnacc
+  // CHECK-LABEL: define i64 @test_m_pfpnacc
   // CHECK: @llvm.x86.3dnowa.pfpnacc
   return _m_pfpnacc(m1, m2);
 }
 
 __m64 test_m_pi2fw(__m64 m) {
-  // CHECK: define i64 @test_m_pi2fw
+  // CHECK-LABEL: define i64 @test_m_pi2fw
   // CHECK: @llvm.x86.3dnowa.pi2fw
   return _m_pi2fw(m);
 }
 
 __m64 test_m_pswapdsf(__m64 m) {
-  // CHECK: define i64 @test_m_pswapdsf
+  // CHECK-LABEL: define i64 @test_m_pswapdsf
   // CHECK: @llvm.x86.3dnowa.pswapd
   return _m_pswapdsf(m);
 }
 
 __m64 test_m_pswapdsi(__m64 m) {
-  // CHECK: define i64 @test_m_pswapdsi
+  // CHECK-LABEL: define i64 @test_m_pswapdsi
   // CHECK: @llvm.x86.3dnowa.pswapd
   return _m_pswapdsi(m);
 }
diff --git a/test/CodeGen/Atomics.c b/test/CodeGen/Atomics.c
index 2bb38fd..5798dff 100644
--- a/test/CodeGen/Atomics.c
+++ b/test/CodeGen/Atomics.c
@@ -11,7 +11,7 @@ unsigned int ui;
 signed long long sll;
 unsigned long long ull;
 
-void test_op_ignore (void) // CHECK: define void @test_op_ignore
+void test_op_ignore (void) // CHECK-LABEL: define void @test_op_ignore
 {
   (void) __sync_fetch_and_add (&sc, 1); // CHECK: atomicrmw add i8
   (void) __sync_fetch_and_add (&uc, 1); // CHECK: atomicrmw add i8
@@ -60,7 +60,7 @@ void test_op_ignore (void) // CHECK: define void @test_op_ignore
 
 }
 
-void test_fetch_and_op (void) // CHECK: define void @test_fetch_and_op
+void test_fetch_and_op (void) // CHECK-LABEL: define void @test_fetch_and_op
 {
   sc = __sync_fetch_and_add (&sc, 11); // CHECK: atomicrmw add
   uc = __sync_fetch_and_add (&uc, 11); // CHECK: atomicrmw add
diff --git a/test/CodeGen/PR15826.c b/test/CodeGen/PR15826.c
new file mode 100644
index 0000000..28d7445
--- /dev/null
+++ b/test/CodeGen/PR15826.c
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -verify -emit-llvm-only %s -o %t
+
+/* Testcase for PR15826 - clang hits assert in clang::ASTContext::getASTRecordLayout */
+struct sysctl_req {
+  struct aiocblist *p_aio;
+};
+
+struct sysctl_oid {
+  int (*oid_handler)(struct sysctl_req *req);
+};
+
+static struct sysctl_oid sysctl___kern_features_aio;
+
+static void const *const __set_sysctl_set_sym_sysctl___kern_features_aio
+    __attribute__((__used__)) = &sysctl___kern_features_aio;
+
+struct aiocblist {
+  struct aiocb uaiocb; // expected-error {{field has incomplete type}} expected-note {{forward declaration}}
+};
diff --git a/test/CodeGen/PR3589-freestanding-libcalls.c b/test/CodeGen/PR3589-freestanding-libcalls.c
index 40e5fb1..5216e82 100644
--- a/test/CodeGen/PR3589-freestanding-libcalls.c
+++ b/test/CodeGen/PR3589-freestanding-libcalls.c
@@ -1,6 +1,6 @@
 // RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm %s -o - | grep 'declare i32 @printf' | count 1
 // RUN: %clang_cc1 -triple i386-unknown-unknown -O2 -emit-llvm %s -o - | grep 'declare i32 @puts' | count 1
-// RUN: %clang_cc1 -triple i386-unknown-unknown -ffreestanding -O2 -emit-llvm %s -o - | grep 'declare i32 @puts' | count 0
+// RUN: %clang_cc1 -triple i386-unknown-unknown -ffreestanding -O2 -emit-llvm %s -o - | not grep 'declare i32 @puts'
 
 int printf(const char *, ...);
 
diff --git a/test/CodeGen/PR5060-align.c b/test/CodeGen/PR5060-align.c
index efd8520..34293a9 100644
--- a/test/CodeGen/PR5060-align.c
+++ b/test/CodeGen/PR5060-align.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm %s -o - -verify | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s
 
 // CHECK: @foo.p = internal global i8 0, align 32
 char *foo(void) {
diff --git a/test/CodeGen/_Bool-conversion.c b/test/CodeGen/_Bool-conversion.c
index 9e5e894..a51cd8e 100644
--- a/test/CodeGen/_Bool-conversion.c
+++ b/test/CodeGen/_Bool-conversion.c
@@ -1,6 +1,6 @@
 // RUN: %clang_cc1 -triple i386 -emit-llvm -O2 -o - %s | FileCheck %s
 
-// CHECK: define i32 @f0()
+// CHECK-LABEL: define i32 @f0()
 // CHECK:  ret i32 1
 // CHECK: }
 
diff --git a/test/CodeGen/aarch64-arguments.c b/test/CodeGen/aarch64-arguments.c
index 901e734..a70dfb1 100644
--- a/test/CodeGen/aarch64-arguments.c
+++ b/test/CodeGen/aarch64-arguments.c
@@ -3,7 +3,7 @@
 // Sign extension is performed by the callee on AArch64, which means
 // that we *shouldn't* tag arguments and returns with their extension.
 
-// PCS: define i8 @f0(i16 %a)
+// PCS-LABEL: define i8 @f0(i16 %a)
 char f0(short a) {
   return a;
 }
@@ -32,11 +32,11 @@ struct s5 f5(void) {}
 struct s6 { int f0[1]; };
 struct s6 f6(void) {}
 
-// PCS: define void @f7()
+// PCS-LABEL: define void @f7()
 struct s7 { struct { int : 0; } f0; };
 struct s7 f7(void) {}
 
-// PCS: define  void @f8()
+// PCS-LABEL: define  void @f8()
 struct s8 { struct { int : 0; } f0[1]; };
 struct s8 f8(void) {}
 
@@ -56,18 +56,18 @@ struct s11 f11(void) {}
 union u12 { char f0; short f1; int f2; long f3; };
 union u12 f12(void) {}
 
-// PCS: define %struct.s13 @f13()
+// PCS-LABEL: define %struct.s13 @f13()
 struct s13 { float f0; };
 struct s13 f13(void) {}
 
-// PCS: define %union.u14 @f14()
+// PCS-LABEL: define %union.u14 @f14()
 union u14 { float f0; };
 union u14 f14(void) {}
 
-// PCS: define void @f15()
+// PCS-LABEL: define void @f15()
 void f15(struct s7 a0) {}
 
-// PCS: define void @f16()
+// PCS-LABEL: define void @f16()
 void f16(struct s8 a0) {}
 
 // PCS: define [1 x i64] @f17()
@@ -111,14 +111,14 @@ struct s26 f26() {}
 struct s27 { _Complex long f0; };
 struct s27 f27() {}
 
-// PCS: define void @f28(i8 %a, i16 %b, i32 %c, i64 %d, float %e, double %f)
+// PCS-LABEL: define void @f28(i8 %a, i16 %b, i32 %c, i64 %d, float %e, double %f)
 void f28(char a, short b, int c, long d, float e, double f) {}
 
 // PCS: define void @f29([2 x i64] %a
 struct s29 { int arr[4]; };
 void f29(struct s29 a) {}
 
-// PCS: define void @f30(%struct.s30* %a)
+// PCS-LABEL: define void @f30(%struct.s30* %a)
 struct s30 { int arr[4]; char c;};
 void f30(struct s30 a) {}
 
@@ -126,7 +126,7 @@ void f30(struct s30 a) {}
 struct s31 { double arr[4]; };
 void f31(struct s31 a) {}
 
-// PCS: define void @f32(%struct.s32* %a)
+// PCS-LABEL: define void @f32(%struct.s32* %a)
 struct s32 { float arr[5]; };
 void f32(struct s32 a) {}
 
@@ -135,11 +135,11 @@ void f32(struct s32 a) {}
 struct s33 { float arr[3]; float a; };
 void f33(struct s33 a) {}
 
-// PCS: define void @f34(%struct.s34* noalias sret
+// PCS-LABEL: define void @f34(%struct.s34* noalias sret
 struct s34 { int a[4]; char b };
 struct s34 f34(void) {}
 
-// PCS: define void @f35()
+// PCS-LABEL: define void @f35()
 struct s35 {};
 void f35(struct s35 a) {}
 
diff --git a/test/CodeGen/aarch64-neon-2velem.c b/test/CodeGen/aarch64-neon-2velem.c
new file mode 100644
index 0000000..03f7df7
--- /dev/null
+++ b/test/CodeGen/aarch64-neon-2velem.c
@@ -0,0 +1,1698 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
+// RUN:   -ffp-contract=fast -S -O3 -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
+// RUN:   -S -O3 -o - %s | FileCheck %s
+
+// Test new aarch64 intrinsics and types
+
+#include <arm_neon.h>
+
+int16x4_t test_vmla_lane_s16(int16x4_t a, int16x4_t b, int16x4_t v) {
+  // CHECK: test_vmla_lane_s16
+  return vmla_lane_s16(a, b, v, 3);
+  // CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+}
+
+int16x8_t test_vmlaq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t v) {
+  // CHECK: test_vmlaq_lane_s16
+  return vmlaq_lane_s16(a, b, v, 3);
+  // CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+}
+
+int32x2_t test_vmla_lane_s32(int32x2_t a, int32x2_t b, int32x2_t v) {
+  // CHECK: test_vmla_lane_s32
+  return vmla_lane_s32(a, b, v, 1);
+  // CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+}
+
+int32x4_t test_vmlaq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v) {
+  // CHECK: test_vmlaq_lane_s32
+  return vmlaq_lane_s32(a, b, v, 1);
+  // CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+}
+
+int16x4_t test_vmla_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) {
+  // CHECK: test_vmla_laneq_s16
+  return vmla_laneq_s16(a, b, v, 7);
+  // CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+}
+
+int16x8_t test_vmlaq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) {
+  // CHECK: test_vmlaq_laneq_s16
+  return vmlaq_laneq_s16(a, b, v, 7);
+  // CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+}
+
+int32x2_t test_vmla_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) {
+  // CHECK: test_vmla_laneq_s32
+  return vmla_laneq_s32(a, b, v, 3);
+  // CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+}
+
+int32x4_t test_vmlaq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) {
+  // CHECK: test_vmlaq_laneq_s32
+  return vmlaq_laneq_s32(a, b, v, 3);
+  // CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+}
+
+int16x4_t test_vmls_lane_s16(int16x4_t a, int16x4_t b, int16x4_t v) {
+  // CHECK: test_vmls_lane_s16
+  return vmls_lane_s16(a, b, v, 3);
+  // CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+}
+
+int16x8_t test_vmlsq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t v) {
+  // CHECK: test_vmlsq_lane_s16
+  return vmlsq_lane_s16(a, b, v, 3);
+  // CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+}
+
+int32x2_t test_vmls_lane_s32(int32x2_t a, int32x2_t b, int32x2_t v) {
+  // CHECK: test_vmls_lane_s32
+  return vmls_lane_s32(a, b, v, 1);
+  // CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+}
+
+int32x4_t test_vmlsq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v) {
+  // CHECK: test_vmlsq_lane_s32
+  return vmlsq_lane_s32(a, b, v, 1);
+  // CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+}
+
+int16x4_t test_vmls_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) {
+  // CHECK: test_vmls_laneq_s16
+  return vmls_laneq_s16(a, b, v, 7);
+  // CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+}
+
+int16x8_t test_vmlsq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) {
+  // CHECK: test_vmlsq_laneq_s16
+  return vmlsq_laneq_s16(a, b, v, 7);
+  // CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+}
+
+int32x2_t test_vmls_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) {
+  // CHECK: test_vmls_laneq_s32
+  return vmls_laneq_s32(a, b, v, 3);
+  // CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+}
+
+int32x4_t test_vmlsq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) {
+  // CHECK: test_vmlsq_laneq_s32
+  return vmlsq_laneq_s32(a, b, v, 3);
+  // CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+}
+
+int16x4_t test_vmul_lane_s16(int16x4_t a, int16x4_t v) {
+  // CHECK: test_vmul_lane_s16
+  return vmul_lane_s16(a, v, 3);
+  // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+}
+
+int16x8_t test_vmulq_lane_s16(int16x8_t a, int16x4_t v) {
+  // CHECK: test_vmulq_lane_s16
+  return vmulq_lane_s16(a, v, 3);
+  // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+}
+
+int32x2_t test_vmul_lane_s32(int32x2_t a, int32x2_t v) {
+  // CHECK: test_vmul_lane_s32
+  return vmul_lane_s32(a, v, 1);
+  // CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+}
+
+int32x4_t test_vmulq_lane_s32(int32x4_t a, int32x2_t v) {
+  // CHECK: test_vmulq_lane_s32
+  return vmulq_lane_s32(a, v, 1);
+  // CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+}
+
+uint16x4_t test_vmul_lane_u16(uint16x4_t a, uint16x4_t v) {
+  // CHECK: test_vmul_lane_u16
+  return vmul_lane_u16(a, v, 3);
+  // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+}
+
+uint16x8_t test_vmulq_lane_u16(uint16x8_t a, uint16x4_t v) {
+  // CHECK: test_vmulq_lane_u16
+  return vmulq_lane_u16(a, v, 3);
+  // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+}
+
+uint32x2_t test_vmul_lane_u32(uint32x2_t a, uint32x2_t v) {
+  // CHECK: test_vmul_lane_u32
+  return vmul_lane_u32(a, v, 1);
+  // CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+}
+
+uint32x4_t test_vmulq_lane_u32(uint32x4_t a, uint32x2_t v) {
+  // CHECK: test_vmulq_lane_u32
+  return vmulq_lane_u32(a, v, 1);
+  // CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+}
+
+int16x4_t test_vmul_laneq_s16(int16x4_t a, int16x8_t v) {
+  // CHECK: test_vmul_laneq_s16
+  return vmul_laneq_s16(a, v, 7);
+  // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+}
+
+int16x8_t test_vmulq_laneq_s16(int16x8_t a, int16x8_t v) {
+  // CHECK: test_vmulq_laneq_s16
+  return vmulq_laneq_s16(a, v, 7);
+  // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+}
+
+int32x2_t test_vmul_laneq_s32(int32x2_t a, int32x4_t v) {
+  // CHECK: test_vmul_laneq_s32
+  return vmul_laneq_s32(a, v, 3);
+  // CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+}
+
+int32x4_t test_vmulq_laneq_s32(int32x4_t a, int32x4_t v) {
+  // CHECK: test_vmulq_laneq_s32
+  return vmulq_laneq_s32(a, v, 3);
+  // CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+}
+
+uint16x4_t test_vmul_laneq_u16(uint16x4_t a, uint16x8_t v) {
+  // CHECK: test_vmul_laneq_u16
+  return vmul_laneq_u16(a, v, 7);
+  // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+}
+
+uint16x8_t test_vmulq_laneq_u16(uint16x8_t a, uint16x8_t v) {
+  // CHECK: test_vmulq_laneq_u16
+  return vmulq_laneq_u16(a, v, 7);
+  // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+}
+
+uint32x2_t test_vmul_laneq_u32(uint32x2_t a, uint32x4_t v) {
+  // CHECK: test_vmul_laneq_u32
+  return vmul_laneq_u32(a, v, 3);
+  // CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+}
+
+uint32x4_t test_vmulq_laneq_u32(uint32x4_t a, uint32x4_t v) {
+  // CHECK: test_vmulq_laneq_u32
+  return vmulq_laneq_u32(a, v, 3);
+  // CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+}
+
+float32x2_t test_vfma_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) {
+  // CHECK: test_vfma_lane_f32
+  return vfma_lane_f32(a, b, v, 1);
+  // CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+}
+
+float32x4_t test_vfmaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) {
+  // CHECK: test_vfmaq_lane_f32
+  return vfmaq_lane_f32(a, b, v, 1);
+  // CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+}
+
+float32x2_t test_vfma_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) {
+  // CHECK: test_vfma_laneq_f32
+  return vfma_laneq_f32(a, b, v, 3);
+  // CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+}
+
+float32x4_t test_vfmaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) {
+  // CHECK: test_vfmaq_laneq_f32
+  return vfmaq_laneq_f32(a, b, v, 3);
+  // CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+}
+
+float32x2_t test_vfms_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) {
+  // CHECK: test_vfms_lane_f32
+  return vfms_lane_f32(a, b, v, 1);
+  // CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+}
+
+float32x4_t test_vfmsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) {
+  // CHECK: test_vfmsq_lane_f32
+  return vfmsq_lane_f32(a, b, v, 1);
+  // CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+}
+
+float32x2_t test_vfms_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) {
+  // CHECK: test_vfms_laneq_f32
+  return vfms_laneq_f32(a, b, v, 3);
+  // CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+}
+
+float32x4_t test_vfmsq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) {
+  // CHECK: test_vfmsq_laneq_f32
+  return vfmsq_laneq_f32(a, b, v, 3);
+  // CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+}
+
+float64x2_t test_vfmaq_lane_f64(float64x2_t a, float64x2_t b, float64x1_t v) {
+  // CHECK: test_vfmaq_lane_f64
+  return vfmaq_lane_f64(a, b, v, 0);
+  // CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+}
+
+float64x2_t test_vfmaq_laneq_f64(float64x2_t a, float64x2_t b, float64x2_t v) {
+  // CHECK: test_vfmaq_laneq_f64
+  return vfmaq_laneq_f64(a, b, v, 1);
+  // CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
+}
+
+float64x2_t test_vfmsq_lane_f64(float64x2_t a, float64x2_t b, float64x1_t v) {
+  // CHECK: test_vfmsq_lane_f64
+  return vfmsq_lane_f64(a, b, v, 0);
+  // CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+}
+
+float64x2_t test_vfmsq_laneq_f64(float64x2_t a, float64x2_t b, float64x2_t v) {
+  // CHECK: test_vfmsq_laneq_f64
+  return vfmsq_laneq_f64(a, b, v, 1);
+  // CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
+}
+
+int32x4_t test_vmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) {
+  // CHECK: test_vmlal_lane_s16
+  return vmlal_lane_s16(a, b, v, 3);
+  // CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+}
+
+int64x2_t test_vmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) {
+  // CHECK: test_vmlal_lane_s32
+  return vmlal_lane_s32(a, b, v, 1);
+  // CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+}
+
+int32x4_t test_vmlal_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) {
+  // CHECK: test_vmlal_laneq_s16
+  return vmlal_laneq_s16(a, b, v, 7);
+  // CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+}
+
+int64x2_t test_vmlal_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) {
+  // CHECK: test_vmlal_laneq_s32
+  return vmlal_laneq_s32(a, b, v, 3);
+  // CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+}
+
+int32x4_t test_vmlal_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) {
+  // CHECK: test_vmlal_high_lane_s16
+  return vmlal_high_lane_s16(a, b, v, 3);
+  // CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+}
+
+int64x2_t test_vmlal_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) {
+  // CHECK: test_vmlal_high_lane_s32
+  return vmlal_high_lane_s32(a, b, v, 1);
+  // CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+}
+
+int32x4_t test_vmlal_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) {
+  // CHECK: test_vmlal_high_laneq_s16
+  return vmlal_high_laneq_s16(a, b, v, 7);
+  // CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+}
+
+int64x2_t test_vmlal_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) {
+  // CHECK: test_vmlal_high_laneq_s32
+  return vmlal_high_laneq_s32(a, b, v, 3);
+  // CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+}
+
+int32x4_t test_vmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) {
+  // CHECK: test_vmlsl_lane_s16
+  return vmlsl_lane_s16(a, b, v, 3);
+  // CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+}
+
+int64x2_t test_vmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) {
+  // CHECK: test_vmlsl_lane_s32
+  return vmlsl_lane_s32(a, b, v, 1);
+  // CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+}
+
+int32x4_t test_vmlsl_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) {
+  // CHECK: test_vmlsl_laneq_s16
+  return vmlsl_laneq_s16(a, b, v, 7);
+  // CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+}
+
+int64x2_t test_vmlsl_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) {
+  // CHECK: test_vmlsl_laneq_s32
+  return vmlsl_laneq_s32(a, b, v, 3);
+  // CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+}
+
+int32x4_t test_vmlsl_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) {
+  // CHECK: test_vmlsl_high_lane_s16
+  return vmlsl_high_lane_s16(a, b, v, 3);
+  // CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+}
+
+int64x2_t test_vmlsl_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) {
+  // CHECK: test_vmlsl_high_lane_s32
+  return vmlsl_high_lane_s32(a, b, v, 1);
+  // CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+}
+
+int32x4_t test_vmlsl_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) {
+  // CHECK: test_vmlsl_high_laneq_s16
+  return vmlsl_high_laneq_s16(a, b, v, 7);
+  // CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+}
+
+int64x2_t test_vmlsl_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) {
+  // CHECK: test_vmlsl_high_laneq_s32
+  return vmlsl_high_laneq_s32(a, b, v, 3);
+  // CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+}
+
+int32x4_t test_vmlal_lane_u16(int32x4_t a, int16x4_t b, int16x4_t v) {
+  // CHECK: test_vmlal_lane_u16
+  return vmlal_lane_u16(a, b, v, 3);
+  // CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+}
+
+int64x2_t test_vmlal_lane_u32(int64x2_t a, int32x2_t b, int32x2_t v) {
+  // CHECK: test_vmlal_lane_u32
+  return vmlal_lane_u32(a, b, v, 1);
+  // CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+}
+
+int32x4_t test_vmlal_laneq_u16(int32x4_t a, int16x4_t b, int16x8_t v) {
+  // CHECK: test_vmlal_laneq_u16
+  return vmlal_laneq_u16(a, b, v, 7);
+  // CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+}
+
+int64x2_t test_vmlal_laneq_u32(int64x2_t a, int32x2_t b, int32x4_t v) {
+  // CHECK: test_vmlal_laneq_u32
+  return vmlal_laneq_u32(a, b, v, 3);
+  // CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+}
+
+int32x4_t test_vmlal_high_lane_u16(int32x4_t a, int16x8_t b, int16x4_t v) {
+  // CHECK: test_vmlal_high_lane_u16
+  return vmlal_high_lane_u16(a, b, v, 3);
+  // CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+}
+
+int64x2_t test_vmlal_high_lane_u32(int64x2_t a, int32x4_t b, int32x2_t v) {
+  // CHECK: test_vmlal_high_lane_u32
+  return vmlal_high_lane_u32(a, b, v, 1);
+  // CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+}
+
+int32x4_t test_vmlal_high_laneq_u16(int32x4_t a, int16x8_t b, int16x8_t v) {
+  // CHECK: test_vmlal_high_laneq_u16
+  return vmlal_high_laneq_u16(a, b, v, 7);
+  // CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+}
+
+int64x2_t test_vmlal_high_laneq_u32(int64x2_t a, int32x4_t b, int32x4_t v) {
+  // CHECK: test_vmlal_high_laneq_u32
+  return vmlal_high_laneq_u32(a, b, v, 3);
+  // CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+}
+
+int32x4_t test_vmlsl_lane_u16(int32x4_t a, int16x4_t b, int16x4_t v) {
+  // CHECK: test_vmlsl_lane_u16
+  return vmlsl_lane_u16(a, b, v, 3);
+  // CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+}
+
+int64x2_t test_vmlsl_lane_u32(int64x2_t a, int32x2_t b, int32x2_t v) {
+  // CHECK: test_vmlsl_lane_u32
+  return vmlsl_lane_u32(a, b, v, 1);
+  // CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+}
+
+int32x4_t test_vmlsl_laneq_u16(int32x4_t a, int16x4_t b, int16x8_t v) {
+  // CHECK: test_vmlsl_laneq_u16
+  return vmlsl_laneq_u16(a, b, v, 7);
+  // CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+}
+
+int64x2_t test_vmlsl_laneq_u32(int64x2_t a, int32x2_t b, int32x4_t v) {
+  // CHECK: test_vmlsl_laneq_u32
+  return vmlsl_laneq_u32(a, b, v, 3);
+  // CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+}
+
+int32x4_t test_vmlsl_high_lane_u16(int32x4_t a, int16x8_t b, int16x4_t v) {
+  // CHECK: test_vmlsl_high_lane_u16
+  return vmlsl_high_lane_u16(a, b, v, 3);
+  // CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+}
+
+int64x2_t test_vmlsl_high_lane_u32(int64x2_t a, int32x4_t b, int32x2_t v) {
+  // CHECK: test_vmlsl_high_lane_u32
+  return vmlsl_high_lane_u32(a, b, v, 1);
+  // CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+}
+
+int32x4_t test_vmlsl_high_laneq_u16(int32x4_t a, int16x8_t b, int16x8_t v) {
+  // CHECK: test_vmlsl_high_laneq_u16
+  return vmlsl_high_laneq_u16(a, b, v, 7);
+  // CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+}
+
+int64x2_t test_vmlsl_high_laneq_u32(int64x2_t a, int32x4_t b, int32x4_t v) {
+  // CHECK: test_vmlsl_high_laneq_u32
+  return vmlsl_high_laneq_u32(a, b, v, 3);
+  // CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+}
+
+int32x4_t test_vmull_lane_s16(int16x4_t a, int16x4_t v) {
+  // CHECK: test_vmull_lane_s16
+  return vmull_lane_s16(a, v, 3);
+  // CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+}
+
+int64x2_t test_vmull_lane_s32(int32x2_t a, int32x2_t v) {
+  // CHECK: test_vmull_lane_s32
+  return vmull_lane_s32(a, v, 1);
+  // CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+}
+
+uint32x4_t test_vmull_lane_u16(uint16x4_t a, uint16x4_t v) {
+  // CHECK: test_vmull_lane_u16
+  return vmull_lane_u16(a, v, 3);
+  // CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+}
+
+uint64x2_t test_vmull_lane_u32(uint32x2_t a, uint32x2_t v) {
+  // CHECK: test_vmull_lane_u32
+  return vmull_lane_u32(a, v, 1);
+  // CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+}
+
+int32x4_t test_vmull_high_lane_s16(int16x8_t a, int16x4_t v) {
+  // CHECK: test_vmull_high_lane_s16
+  return vmull_high_lane_s16(a, v, 3);
+  // CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+}
+
+int64x2_t test_vmull_high_lane_s32(int32x4_t a, int32x2_t v) {
+  // CHECK: test_vmull_high_lane_s32
+  return vmull_high_lane_s32(a, v, 1);
+  // CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+}
+
+uint32x4_t test_vmull_high_lane_u16(uint16x8_t a, uint16x4_t v) {
+  // CHECK: test_vmull_high_lane_u16
+  return vmull_high_lane_u16(a, v, 3);
+  // CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+}
+
+uint64x2_t test_vmull_high_lane_u32(uint32x4_t a, uint32x2_t v) {
+  // CHECK: test_vmull_high_lane_u32
+  return vmull_high_lane_u32(a, v, 1);
+  // CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+}
+
+int32x4_t test_vmull_laneq_s16(int16x4_t a, int16x8_t v) {
+  // CHECK: test_vmull_laneq_s16
+  return vmull_laneq_s16(a, v, 7);
+  // CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+}
+
+int64x2_t test_vmull_laneq_s32(int32x2_t a, int32x4_t v) {
+  // CHECK: test_vmull_laneq_s32
+  return vmull_laneq_s32(a, v, 3);
+  // CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+}
+
+uint32x4_t test_vmull_laneq_u16(uint16x4_t a, uint16x8_t v) {
+  // CHECK: test_vmull_laneq_u16
+  return vmull_laneq_u16(a, v, 7);
+  // CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+}
+
+uint64x2_t test_vmull_laneq_u32(uint32x2_t a, uint32x4_t v) {
+  // CHECK: test_vmull_laneq_u32
+  return vmull_laneq_u32(a, v, 3);
+  // CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+}
+
+int32x4_t test_vmull_high_laneq_s16(int16x8_t a, int16x8_t v) {
+  // CHECK: test_vmull_high_laneq_s16
+  return vmull_high_laneq_s16(a, v, 7);
+  // CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+}
+
+int64x2_t test_vmull_high_laneq_s32(int32x4_t a, int32x4_t v) {
+  // CHECK: test_vmull_high_laneq_s32
+  return vmull_high_laneq_s32(a, v, 3);
+  // CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+}
+
+uint32x4_t test_vmull_high_laneq_u16(uint16x8_t a, uint16x8_t v) {
+  // CHECK: test_vmull_high_laneq_u16
+  return vmull_high_laneq_u16(a, v, 7);
+  // CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+}
+
+uint64x2_t test_vmull_high_laneq_u32(uint32x4_t a, uint32x4_t v) {
+  // CHECK: test_vmull_high_laneq_u32
+  return vmull_high_laneq_u32(a, v, 3);
+  // CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+}
+
+int32x4_t test_vqdmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) {
+  // CHECK: test_vqdmlal_lane_s16
+  return vqdmlal_lane_s16(a, b, v, 3);
+  // CHECK: sqdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+}
+
+int64x2_t test_vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) {
+  // CHECK: test_vqdmlal_lane_s32
+  return vqdmlal_lane_s32(a, b, v, 1);
+  // CHECK: sqdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+}
+
+int32x4_t test_vqdmlal_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) {
+  // CHECK: test_vqdmlal_high_lane_s16
+  return vqdmlal_high_lane_s16(a, b, v, 3);
+  // CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+}
+
+int64x2_t test_vqdmlal_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) {
+  // CHECK: test_vqdmlal_high_lane_s32
+  return vqdmlal_high_lane_s32(a, b, v, 1);
+  // CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+}
+
+int32x4_t test_vqdmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) {
+  // CHECK: test_vqdmlsl_lane_s16
+  return vqdmlsl_lane_s16(a, b, v, 3);
+  // CHECK: sqdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+}
+
+int64x2_t test_vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) {
+  // CHECK: test_vqdmlsl_lane_s32
+  return vqdmlsl_lane_s32(a, b, v, 1);
+  // CHECK: sqdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+}
+
+int32x4_t test_vqdmlsl_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) {
+  // CHECK: test_vqdmlsl_high_lane_s16
+  return vqdmlsl_high_lane_s16(a, b, v, 3);
+  // CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+}
+
+int64x2_t test_vqdmlsl_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) {
+  // CHECK: test_vqdmlsl_high_lane_s32
+  return vqdmlsl_high_lane_s32(a, b, v, 1);
+  // CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+}
+
+int32x4_t test_vqdmull_lane_s16(int16x4_t a, int16x4_t v) {
+  // CHECK: test_vqdmull_lane_s16
+  return vqdmull_lane_s16(a, v, 3);
+  // CHECK: sqdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+}
+
+int64x2_t test_vqdmull_lane_s32(int32x2_t a, int32x2_t v) {
+  // CHECK: test_vqdmull_lane_s32
+  return vqdmull_lane_s32(a, v, 1);
+  // CHECK: sqdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+}
+
+int32x4_t test_vqdmull_laneq_s16(int16x4_t a, int16x8_t v) {
+  // CHECK: test_vqdmull_laneq_s16
+  return vqdmull_laneq_s16(a, v, 3);
+  // CHECK: sqdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+}
+
+int64x2_t test_vqdmull_laneq_s32(int32x2_t a, int32x4_t v) {
+  // CHECK: test_vqdmull_laneq_s32
+  return vqdmull_laneq_s32(a, v, 3);
+  // CHECK: sqdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+}
+
+int32x4_t test_vqdmull_high_lane_s16(int16x8_t a, int16x4_t v) {
+  // CHECK: test_vqdmull_high_lane_s16
+  return vqdmull_high_lane_s16(a, v, 3);
+  // CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+}
+
+int64x2_t test_vqdmull_high_lane_s32(int32x4_t a, int32x2_t v) {
+  // CHECK: test_vqdmull_high_lane_s32
+  return vqdmull_high_lane_s32(a, v, 1);
+  // CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+}
+
+int32x4_t test_vqdmull_high_laneq_s16(int16x8_t a, int16x8_t v) {
+  // CHECK: test_vqdmull_high_laneq_s16
+  return vqdmull_high_laneq_s16(a, v, 7);
+  // CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+}
+
+int64x2_t test_vqdmull_high_laneq_s32(int32x4_t a, int32x4_t v) {
+  // CHECK: test_vqdmull_high_laneq_s32
+  return vqdmull_high_laneq_s32(a, v, 3);
+  // CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+}
+
+int16x4_t test_vqdmulh_lane_s16(int16x4_t a, int16x4_t v) {
+  // CHECK: test_vqdmulh_lane_s16
+  return vqdmulh_lane_s16(a, v, 3);
+  // CHECK: sqdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+}
+
+int16x8_t test_vqdmulhq_lane_s16(int16x8_t a, int16x4_t v) {
+  // CHECK: test_vqdmulhq_lane_s16
+  return vqdmulhq_lane_s16(a, v, 3);
+  // CHECK: sqdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+}
+
+int32x2_t test_vqdmulh_lane_s32(int32x2_t a, int32x2_t v) {
+  // CHECK: test_vqdmulh_lane_s32
+  return vqdmulh_lane_s32(a, v, 1);
+  // CHECK: sqdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+}
+
+int32x4_t test_vqdmulhq_lane_s32(int32x4_t a, int32x2_t v) {
+  // CHECK: test_vqdmulhq_lane_s32
+  return vqdmulhq_lane_s32(a, v, 1);
+  // CHECK: sqdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+}
+
+int16x4_t test_vqrdmulh_lane_s16(int16x4_t a, int16x4_t v) {
+  // CHECK: test_vqrdmulh_lane_s16
+  return vqrdmulh_lane_s16(a, v, 3);
+  // CHECK: sqrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+}
+
+int16x8_t test_vqrdmulhq_lane_s16(int16x8_t a, int16x4_t v) {
+  // CHECK: test_vqrdmulhq_lane_s16
+  return vqrdmulhq_lane_s16(a, v, 3);
+  // CHECK: sqrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+}
+
+int32x2_t test_vqrdmulh_lane_s32(int32x2_t a, int32x2_t v) {
+  // CHECK: test_vqrdmulh_lane_s32
+  return vqrdmulh_lane_s32(a, v, 1);
+  // CHECK: sqrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+}
+
+int32x4_t test_vqrdmulhq_lane_s32(int32x4_t a, int32x2_t v) {
+  // CHECK: test_vqrdmulhq_lane_s32
+  return vqrdmulhq_lane_s32(a, v, 1);
+  // CHECK: sqrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+}
+
+float32x2_t test_vmul_lane_f32(float32x2_t a, float32x2_t v) {
+  // CHECK: test_vmul_lane_f32
+  return vmul_lane_f32(a, v, 1);
+  // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+}
+
+
+float64x1_t test_vmul_lane_f64(float64x1_t a, float64x1_t v) {
+  // CHECK: test_vmul_lane_f64
+  return vmul_lane_f64(a, v, 0);
+  // CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+}
+
+
+float32x4_t test_vmulq_lane_f32(float32x4_t a, float32x2_t v) {
+  // CHECK: test_vmulq_lane_f32
+  return vmulq_lane_f32(a, v, 1);
+  // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+}
+
+float64x2_t test_vmulq_lane_f64(float64x2_t a, float64x1_t v) {
+  // CHECK: test_vmulq_lane_f64
+  return vmulq_lane_f64(a, v, 0);
+  // CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+}
+
+float32x2_t test_vmul_laneq_f32(float32x2_t a, float32x4_t v) {
+  // CHECK: test_vmul_laneq_f32
+  return vmul_laneq_f32(a, v, 3);
+  // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+}
+
+float64x1_t test_vmul_laneq_f64(float64x1_t a, float64x2_t v) {
+  // CHECK: test_vmul_laneq_f64
+  return vmul_laneq_f64(a, v, 1);
+  // CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+}
+
+
+float32x4_t test_vmulq_laneq_f32(float32x4_t a, float32x4_t v) {
+  // CHECK: test_vmulq_laneq_f32
+  return vmulq_laneq_f32(a, v, 3);
+  // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+}
+
+float64x2_t test_vmulq_laneq_f64(float64x2_t a, float64x2_t v) {
+  // CHECK: test_vmulq_laneq_f64
+  return vmulq_laneq_f64(a, v, 1);
+  // CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
+}
+
+float32x2_t test_vmulx_lane_f32(float32x2_t a, float32x2_t v) {
+  // CHECK: test_vmulx_lane_f32
+  return vmulx_lane_f32(a, v, 1);
+  // CHECK: fmulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+}
+
+float32x4_t test_vmulxq_lane_f32(float32x4_t a, float32x2_t v) {
+  // CHECK: test_vmulxq_lane_f32
+  return vmulxq_lane_f32(a, v, 1);
+  // CHECK: fmulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+}
+
+float64x2_t test_vmulxq_lane_f64(float64x2_t a, float64x1_t v) {
+  // CHECK: test_vmulxq_lane_f64
+  return vmulxq_lane_f64(a, v, 0);
+  // CHECK: fmulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+}
+
+float32x2_t test_vmulx_laneq_f32(float32x2_t a, float32x4_t v) {
+  // CHECK: test_vmulx_laneq_f32
+  return vmulx_laneq_f32(a, v, 3);
+  // CHECK: fmulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+}
+
+float32x4_t test_vmulxq_laneq_f32(float32x4_t a, float32x4_t v) {
+  // CHECK: test_vmulxq_laneq_f32
+  return vmulxq_laneq_f32(a, v, 3);
+  // CHECK: fmulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+}
+
+float64x2_t test_vmulxq_laneq_f64(float64x2_t a, float64x2_t v) {
+  // CHECK: test_vmulxq_laneq_f64
+  return vmulxq_laneq_f64(a, v, 1);
+  // CHECK: fmulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
+}
+
+int16x4_t test_vmla_lane_s16_0(int16x4_t a, int16x4_t b, int16x4_t v) {
+  // CHECK: test_vmla_lane_s16_0
+  return vmla_lane_s16(a, b, v, 0);
+  // CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int16x8_t test_vmlaq_lane_s16_0(int16x8_t a, int16x8_t b, int16x4_t v) {
+  // CHECK: test_vmlaq_lane_s16_0
+  return vmlaq_lane_s16(a, b, v, 0);
+  // CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int32x2_t test_vmla_lane_s32_0(int32x2_t a, int32x2_t b, int32x2_t v) {
+  // CHECK: test_vmla_lane_s32_0
+  return vmla_lane_s32(a, b, v, 0);
+  // CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlaq_lane_s32_0(int32x4_t a, int32x4_t b, int32x2_t v) {
+  // CHECK: test_vmlaq_lane_s32_0
+  return vmlaq_lane_s32(a, b, v, 0);
+  // CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int16x4_t test_vmla_laneq_s16_0(int16x4_t a, int16x4_t b, int16x8_t v) {
+  // CHECK: test_vmla_laneq_s16_0
+  return vmla_laneq_s16(a, b, v, 0);
+  // CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int16x8_t test_vmlaq_laneq_s16_0(int16x8_t a, int16x8_t b, int16x8_t v) {
+  // CHECK: test_vmlaq_laneq_s16_0
+  return vmlaq_laneq_s16(a, b, v, 0);
+  // CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int32x2_t test_vmla_laneq_s32_0(int32x2_t a, int32x2_t b, int32x4_t v) {
+  // CHECK: test_vmla_laneq_s32_0
+  return vmla_laneq_s32(a, b, v, 0);
+  // CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlaq_laneq_s32_0(int32x4_t a, int32x4_t b, int32x4_t v) {
+  // CHECK: test_vmlaq_laneq_s32_0
+  return vmlaq_laneq_s32(a, b, v, 0);
+  // CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int16x4_t test_vmls_lane_s16_0(int16x4_t a, int16x4_t b, int16x4_t v) {
+  // CHECK: test_vmls_lane_s16_0
+  return vmls_lane_s16(a, b, v, 0);
+  // CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int16x8_t test_vmlsq_lane_s16_0(int16x8_t a, int16x8_t b, int16x4_t v) {
+  // CHECK: test_vmlsq_lane_s16_0
+  return vmlsq_lane_s16(a, b, v, 0);
+  // CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int32x2_t test_vmls_lane_s32_0(int32x2_t a, int32x2_t b, int32x2_t v) {
+  // CHECK: test_vmls_lane_s32_0
+  return vmls_lane_s32(a, b, v, 0);
+  // CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlsq_lane_s32_0(int32x4_t a, int32x4_t b, int32x2_t v) {
+  // CHECK: test_vmlsq_lane_s32_0
+  return vmlsq_lane_s32(a, b, v, 0);
+  // CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int16x4_t test_vmls_laneq_s16_0(int16x4_t a, int16x4_t b, int16x8_t v) {
+  // CHECK: test_vmls_laneq_s16_0
+  return vmls_laneq_s16(a, b, v, 0);
+  // CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int16x8_t test_vmlsq_laneq_s16_0(int16x8_t a, int16x8_t b, int16x8_t v) {
+  // CHECK: test_vmlsq_laneq_s16_0
+  return vmlsq_laneq_s16(a, b, v, 0);
+  // CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int32x2_t test_vmls_laneq_s32_0(int32x2_t a, int32x2_t b, int32x4_t v) {
+  // CHECK: test_vmls_laneq_s32_0
+  return vmls_laneq_s32(a, b, v, 0);
+  // CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlsq_laneq_s32_0(int32x4_t a, int32x4_t b, int32x4_t v) {
+  // CHECK: test_vmlsq_laneq_s32_0
+  return vmlsq_laneq_s32(a, b, v, 0);
+  // CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int16x4_t test_vmul_lane_s16_0(int16x4_t a, int16x4_t v) {
+  // CHECK: test_vmul_lane_s16_0
+  return vmul_lane_s16(a, v, 0);
+  // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int16x8_t test_vmulq_lane_s16_0(int16x8_t a, int16x4_t v) {
+  // CHECK: test_vmulq_lane_s16_0
+  return vmulq_lane_s16(a, v, 0);
+  // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int32x2_t test_vmul_lane_s32_0(int32x2_t a, int32x2_t v) {
+  // CHECK: test_vmul_lane_s32_0
+  return vmul_lane_s32(a, v, 0);
+  // CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmulq_lane_s32_0(int32x4_t a, int32x2_t v) {
+  // CHECK: test_vmulq_lane_s32_0
+  return vmulq_lane_s32(a, v, 0);
+  // CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+uint16x4_t test_vmul_lane_u16_0(uint16x4_t a, uint16x4_t v) {
+  // CHECK: test_vmul_lane_u16_0
+  return vmul_lane_u16(a, v, 0);
+  // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+uint16x8_t test_vmulq_lane_u16_0(uint16x8_t a, uint16x4_t v) {
+  // CHECK: test_vmulq_lane_u16_0
+  return vmulq_lane_u16(a, v, 0);
+  // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+uint32x2_t test_vmul_lane_u32_0(uint32x2_t a, uint32x2_t v) {
+  // CHECK: test_vmul_lane_u32_0
+  return vmul_lane_u32(a, v, 0);
+  // CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+uint32x4_t test_vmulq_lane_u32_0(uint32x4_t a, uint32x2_t v) {
+  // CHECK: test_vmulq_lane_u32_0
+  return vmulq_lane_u32(a, v, 0);
+  // CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int16x4_t test_vmul_laneq_s16_0(int16x4_t a, int16x8_t v) {
+  // CHECK: test_vmul_laneq_s16_0
+  return vmul_laneq_s16(a, v, 0);
+  // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int16x8_t test_vmulq_laneq_s16_0(int16x8_t a, int16x8_t v) {
+  // CHECK: test_vmulq_laneq_s16_0
+  return vmulq_laneq_s16(a, v, 0);
+  // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int32x2_t test_vmul_laneq_s32_0(int32x2_t a, int32x4_t v) {
+  // CHECK: test_vmul_laneq_s32_0
+  return vmul_laneq_s32(a, v, 0);
+  // CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmulq_laneq_s32_0(int32x4_t a, int32x4_t v) {
+  // CHECK: test_vmulq_laneq_s32_0
+  return vmulq_laneq_s32(a, v, 0);
+  // CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+uint16x4_t test_vmul_laneq_u16_0(uint16x4_t a, uint16x8_t v) {
+  // CHECK: test_vmul_laneq_u16_0
+  return vmul_laneq_u16(a, v, 0);
+  // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+uint16x8_t test_vmulq_laneq_u16_0(uint16x8_t a, uint16x8_t v) {
+  // CHECK: test_vmulq_laneq_u16_0
+  return vmulq_laneq_u16(a, v, 0);
+  // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+uint32x2_t test_vmul_laneq_u32_0(uint32x2_t a, uint32x4_t v) {
+  // CHECK: test_vmul_laneq_u32_0
+  return vmul_laneq_u32(a, v, 0);
+  // CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+uint32x4_t test_vmulq_laneq_u32_0(uint32x4_t a, uint32x4_t v) {
+  // CHECK: test_vmulq_laneq_u32_0
+  return vmulq_laneq_u32(a, v, 0);
+  // CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+float32x2_t test_vfma_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) {
+  // CHECK: test_vfma_lane_f32_0
+  return vfma_lane_f32(a, b, v, 0);
+  // CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+float32x4_t test_vfmaq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) {
+  // CHECK: test_vfmaq_lane_f32_0
+  return vfmaq_lane_f32(a, b, v, 0);
+  // CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+float32x2_t test_vfma_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) {
+  // CHECK: test_vfma_laneq_f32_0
+  return vfma_laneq_f32(a, b, v, 0);
+  // CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+float32x4_t test_vfmaq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) {
+  // CHECK: test_vfmaq_laneq_f32_0
+  return vfmaq_laneq_f32(a, b, v, 0);
+  // CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+float32x2_t test_vfms_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) {
+  // CHECK: test_vfms_lane_f32_0
+  return vfms_lane_f32(a, b, v, 0);
+  // CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+float32x4_t test_vfmsq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) {
+  // CHECK: test_vfmsq_lane_f32_0
+  return vfmsq_lane_f32(a, b, v, 0);
+  // CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+float32x2_t test_vfms_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) {
+  // CHECK: test_vfms_laneq_f32_0
+  return vfms_laneq_f32(a, b, v, 0);
+  // CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+float32x4_t test_vfmsq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) {
+  // CHECK: test_vfmsq_laneq_f32_0
+  return vfmsq_laneq_f32(a, b, v, 0);
+  // CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+float64x2_t test_vfmaq_laneq_f64_0(float64x2_t a, float64x2_t b, float64x2_t v) {
+  // CHECK: test_vfmaq_laneq_f64_0
+  return vfmaq_laneq_f64(a, b, v, 0);
+  // CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+}
+
+float64x2_t test_vfmsq_laneq_f64_0(float64x2_t a, float64x2_t b, float64x2_t v) {
+  // CHECK: test_vfmsq_laneq_f64_0
+  return vfmsq_laneq_f64(a, b, v, 0);
+  // CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+}
+
+int32x4_t test_vmlal_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) {
+  // CHECK: test_vmlal_lane_s16_0
+  return vmlal_lane_s16(a, b, v, 0);
+  // CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlal_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) {
+  // CHECK: test_vmlal_lane_s32_0
+  return vmlal_lane_s32(a, b, v, 0);
+  // CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlal_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) {
+  // CHECK: test_vmlal_laneq_s16_0
+  return vmlal_laneq_s16(a, b, v, 0);
+  // CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlal_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) {
+  // CHECK: test_vmlal_laneq_s32_0
+  return vmlal_laneq_s32(a, b, v, 0);
+  // CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlal_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) {
+  // CHECK: test_vmlal_high_lane_s16_0
+  return vmlal_high_lane_s16(a, b, v, 0);
+  // CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlal_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) {
+  // CHECK: test_vmlal_high_lane_s32_0
+  return vmlal_high_lane_s32(a, b, v, 0);
+  // CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlal_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) {
+  // CHECK: test_vmlal_high_laneq_s16_0
+  return vmlal_high_laneq_s16(a, b, v, 0);
+  // CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlal_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) {
+  // CHECK: test_vmlal_high_laneq_s32_0
+  return vmlal_high_laneq_s32(a, b, v, 0);
+  // CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlsl_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) {
+  // CHECK: test_vmlsl_lane_s16_0
+  return vmlsl_lane_s16(a, b, v, 0);
+  // CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlsl_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) {
+  // CHECK: test_vmlsl_lane_s32_0
+  return vmlsl_lane_s32(a, b, v, 0);
+  // CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlsl_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) {
+  // CHECK: test_vmlsl_laneq_s16_0
+  return vmlsl_laneq_s16(a, b, v, 0);
+  // CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlsl_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) {
+  // CHECK: test_vmlsl_laneq_s32_0
+  return vmlsl_laneq_s32(a, b, v, 0);
+  // CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlsl_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) {
+  // CHECK: test_vmlsl_high_lane_s16_0
+  return vmlsl_high_lane_s16(a, b, v, 0);
+  // CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlsl_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) {
+  // CHECK: test_vmlsl_high_lane_s32_0
+  return vmlsl_high_lane_s32(a, b, v, 0);
+  // CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlsl_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) {
+  // CHECK: test_vmlsl_high_laneq_s16_0
+  return vmlsl_high_laneq_s16(a, b, v, 0);
+  // CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlsl_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) {
+  // CHECK: test_vmlsl_high_laneq_s32_0
+  return vmlsl_high_laneq_s32(a, b, v, 0);
+  // CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlal_lane_u16_0(int32x4_t a, int16x4_t b, int16x4_t v) {
+  // CHECK: test_vmlal_lane_u16_0
+  return vmlal_lane_u16(a, b, v, 0);
+  // CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlal_lane_u32_0(int64x2_t a, int32x2_t b, int32x2_t v) {
+  // CHECK: test_vmlal_lane_u32_0
+  return vmlal_lane_u32(a, b, v, 0);
+  // CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlal_laneq_u16_0(int32x4_t a, int16x4_t b, int16x8_t v) {
+  // CHECK: test_vmlal_laneq_u16_0
+  return vmlal_laneq_u16(a, b, v, 0);
+  // CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlal_laneq_u32_0(int64x2_t a, int32x2_t b, int32x4_t v) {
+  // CHECK: test_vmlal_laneq_u32_0
+  return vmlal_laneq_u32(a, b, v, 0);
+  // CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlal_high_lane_u16_0(int32x4_t a, int16x8_t b, int16x4_t v) {
+  // CHECK: test_vmlal_high_lane_u16_0
+  return vmlal_high_lane_u16(a, b, v, 0);
+  // CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlal_high_lane_u32_0(int64x2_t a, int32x4_t b, int32x2_t v) {
+  // CHECK: test_vmlal_high_lane_u32_0
+  return vmlal_high_lane_u32(a, b, v, 0);
+  // CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlal_high_laneq_u16_0(int32x4_t a, int16x8_t b, int16x8_t v) {
+  // CHECK: test_vmlal_high_laneq_u16_0
+  return vmlal_high_laneq_u16(a, b, v, 0);
+  // CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlal_high_laneq_u32_0(int64x2_t a, int32x4_t b, int32x4_t v) {
+  // CHECK: test_vmlal_high_laneq_u32_0
+  return vmlal_high_laneq_u32(a, b, v, 0);
+  // CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlsl_lane_u16_0(int32x4_t a, int16x4_t b, int16x4_t v) {
+  // CHECK: test_vmlsl_lane_u16_0
+  return vmlsl_lane_u16(a, b, v, 0);
+  // CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlsl_lane_u32_0(int64x2_t a, int32x2_t b, int32x2_t v) {
+  // CHECK: test_vmlsl_lane_u32_0
+  return vmlsl_lane_u32(a, b, v, 0);
+  // CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlsl_laneq_u16_0(int32x4_t a, int16x4_t b, int16x8_t v) {
+  // CHECK: test_vmlsl_laneq_u16_0
+  return vmlsl_laneq_u16(a, b, v, 0);
+  // CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlsl_laneq_u32_0(int64x2_t a, int32x2_t b, int32x4_t v) {
+  // CHECK: test_vmlsl_laneq_u32_0
+  return vmlsl_laneq_u32(a, b, v, 0);
+  // CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlsl_high_lane_u16_0(int32x4_t a, int16x8_t b, int16x4_t v) {
+  // CHECK: test_vmlsl_high_lane_u16_0
+  return vmlsl_high_lane_u16(a, b, v, 0);
+  // CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlsl_high_lane_u32_0(int64x2_t a, int32x4_t b, int32x2_t v) {
+  // CHECK: test_vmlsl_high_lane_u32_0
+  return vmlsl_high_lane_u32(a, b, v, 0);
+  // CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlsl_high_laneq_u16_0(int32x4_t a, int16x8_t b, int16x8_t v) {
+  // CHECK: test_vmlsl_high_laneq_u16_0
+  return vmlsl_high_laneq_u16(a, b, v, 0);
+  // CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlsl_high_laneq_u32_0(int64x2_t a, int32x4_t b, int32x4_t v) {
+  // CHECK: test_vmlsl_high_laneq_u32_0
+  return vmlsl_high_laneq_u32(a, b, v, 0);
+  // CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmull_lane_s16_0(int16x4_t a, int16x4_t v) {
+  // CHECK: test_vmull_lane_s16_0
+  return vmull_lane_s16(a, v, 0);
+  // CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmull_lane_s32_0(int32x2_t a, int32x2_t v) {
+  // CHECK: test_vmull_lane_s32_0
+  return vmull_lane_s32(a, v, 0);
+  // CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+uint32x4_t test_vmull_lane_u16_0(uint16x4_t a, uint16x4_t v) {
+  // CHECK: test_vmull_lane_u16_0
+  return vmull_lane_u16(a, v, 0);
+  // CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+uint64x2_t test_vmull_lane_u32_0(uint32x2_t a, uint32x2_t v) {
+  // CHECK: test_vmull_lane_u32_0
+  return vmull_lane_u32(a, v, 0);
+  // CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmull_high_lane_s16_0(int16x8_t a, int16x4_t v) {
+  // CHECK: test_vmull_high_lane_s16_0
+  return vmull_high_lane_s16(a, v, 0);
+  // CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmull_high_lane_s32_0(int32x4_t a, int32x2_t v) {
+  // CHECK: test_vmull_high_lane_s32_0
+  return vmull_high_lane_s32(a, v, 0);
+  // CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+uint32x4_t test_vmull_high_lane_u16_0(uint16x8_t a, uint16x4_t v) {
+  // CHECK: test_vmull_high_lane_u16_0
+  return vmull_high_lane_u16(a, v, 0);
+  // CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+uint64x2_t test_vmull_high_lane_u32_0(uint32x4_t a, uint32x2_t v) {
+  // CHECK: test_vmull_high_lane_u32_0
+  return vmull_high_lane_u32(a, v, 0);
+  // CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmull_laneq_s16_0(int16x4_t a, int16x8_t v) {
+  // CHECK: test_vmull_laneq_s16_0
+  return vmull_laneq_s16(a, v, 0);
+  // CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmull_laneq_s32_0(int32x2_t a, int32x4_t v) {
+  // CHECK: test_vmull_laneq_s32_0
+  return vmull_laneq_s32(a, v, 0);
+  // CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+uint32x4_t test_vmull_laneq_u16_0(uint16x4_t a, uint16x8_t v) {
+  // CHECK: test_vmull_laneq_u16_0
+  return vmull_laneq_u16(a, v, 0);
+  // CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+uint64x2_t test_vmull_laneq_u32_0(uint32x2_t a, uint32x4_t v) {
+  // CHECK: test_vmull_laneq_u32_0
+  return vmull_laneq_u32(a, v, 0);
+  // CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmull_high_laneq_s16_0(int16x8_t a, int16x8_t v) {
+  // CHECK: test_vmull_high_laneq_s16_0
+  return vmull_high_laneq_s16(a, v, 0);
+  // CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmull_high_laneq_s32_0(int32x4_t a, int32x4_t v) {
+  // CHECK: test_vmull_high_laneq_s32_0
+  return vmull_high_laneq_s32(a, v, 0);
+  // CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+uint32x4_t test_vmull_high_laneq_u16_0(uint16x8_t a, uint16x8_t v) {
+  // CHECK: test_vmull_high_laneq_u16_0
+  return vmull_high_laneq_u16(a, v, 0);
+  // CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+uint64x2_t test_vmull_high_laneq_u32_0(uint32x4_t a, uint32x4_t v) {
+  // CHECK: test_vmull_high_laneq_u32_0
+  return vmull_high_laneq_u32(a, v, 0);
+  // CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vqdmlal_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) {
+  // CHECK: test_vqdmlal_lane_s16_0
+  return vqdmlal_lane_s16(a, b, v, 0);
+  // CHECK: sqdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vqdmlal_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) {
+  // CHECK: test_vqdmlal_lane_s32_0
+  return vqdmlal_lane_s32(a, b, v, 0);
+  // CHECK: sqdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vqdmlal_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) {
+  // CHECK: test_vqdmlal_high_lane_s16_0
+  return vqdmlal_high_lane_s16(a, b, v, 0);
+  // CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vqdmlal_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) {
+  // CHECK: test_vqdmlal_high_lane_s32_0
+  return vqdmlal_high_lane_s32(a, b, v, 0);
+  // CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vqdmlsl_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) {
+  // CHECK: test_vqdmlsl_lane_s16_0
+  return vqdmlsl_lane_s16(a, b, v, 0);
+  // CHECK: sqdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vqdmlsl_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) {
+  // CHECK: test_vqdmlsl_lane_s32_0
+  return vqdmlsl_lane_s32(a, b, v, 0);
+  // CHECK: sqdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vqdmlsl_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) {
+  // CHECK: test_vqdmlsl_high_lane_s16_0
+  return vqdmlsl_high_lane_s16(a, b, v, 0);
+  // CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vqdmlsl_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) {
+  // CHECK: test_vqdmlsl_high_lane_s32_0
+  return vqdmlsl_high_lane_s32(a, b, v, 0);
+  // CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vqdmull_lane_s16_0(int16x4_t a, int16x4_t v) {
+  // CHECK: test_vqdmull_lane_s16_0
+  return vqdmull_lane_s16(a, v, 0);
+  // CHECK: sqdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vqdmull_lane_s32_0(int32x2_t a, int32x2_t v) {
+  // CHECK: test_vqdmull_lane_s32_0
+  return vqdmull_lane_s32(a, v, 0);
+  // CHECK: sqdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vqdmull_laneq_s16_0(int16x4_t a, int16x8_t v) {
+  // CHECK: test_vqdmull_laneq_s16_0
+  return vqdmull_laneq_s16(a, v, 0);
+  // CHECK: sqdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vqdmull_laneq_s32_0(int32x2_t a, int32x4_t v) {
+  // CHECK: test_vqdmull_laneq_s32_0
+  return vqdmull_laneq_s32(a, v, 0);
+  // CHECK: sqdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vqdmull_high_lane_s16_0(int16x8_t a, int16x4_t v) {
+  // CHECK: test_vqdmull_high_lane_s16_0
+  return vqdmull_high_lane_s16(a, v, 0);
+  // CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vqdmull_high_lane_s32_0(int32x4_t a, int32x2_t v) {
+  // CHECK: test_vqdmull_high_lane_s32_0
+  return vqdmull_high_lane_s32(a, v, 0);
+  // CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vqdmull_high_laneq_s16_0(int16x8_t a, int16x8_t v) {
+  // CHECK: test_vqdmull_high_laneq_s16_0
+  return vqdmull_high_laneq_s16(a, v, 0);
+  // CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vqdmull_high_laneq_s32_0(int32x4_t a, int32x4_t v) {
+  // CHECK: test_vqdmull_high_laneq_s32_0
+  return vqdmull_high_laneq_s32(a, v, 0);
+  // CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int16x4_t test_vqdmulh_lane_s16_0(int16x4_t a, int16x4_t v) {
+  // CHECK: test_vqdmulh_lane_s16_0
+  return vqdmulh_lane_s16(a, v, 0);
+  // CHECK: sqdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int16x8_t test_vqdmulhq_lane_s16_0(int16x8_t a, int16x4_t v) {
+  // CHECK: test_vqdmulhq_lane_s16_0
+  return vqdmulhq_lane_s16(a, v, 0);
+  // CHECK: sqdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int32x2_t test_vqdmulh_lane_s32_0(int32x2_t a, int32x2_t v) {
+  // CHECK: test_vqdmulh_lane_s32_0
+  return vqdmulh_lane_s32(a, v, 0);
+  // CHECK: sqdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vqdmulhq_lane_s32_0(int32x4_t a, int32x2_t v) {
+  // CHECK: test_vqdmulhq_lane_s32_0
+  return vqdmulhq_lane_s32(a, v, 0);
+  // CHECK: sqdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int16x4_t test_vqrdmulh_lane_s16_0(int16x4_t a, int16x4_t v) {
+  // CHECK: test_vqrdmulh_lane_s16_0
+  return vqrdmulh_lane_s16(a, v, 0);
+  // CHECK: sqrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+}
+
+int16x8_t test_vqrdmulhq_lane_s16_0(int16x8_t a, int16x4_t v) {
+  // CHECK: test_vqrdmulhq_lane_s16_0
+  return vqrdmulhq_lane_s16(a, v, 0);
+  // CHECK: sqrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int32x2_t test_vqrdmulh_lane_s32_0(int32x2_t a, int32x2_t v) {
+  // CHECK: test_vqrdmulh_lane_s32_0
+  return vqrdmulh_lane_s32(a, v, 0);
+  // CHECK: sqrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vqrdmulhq_lane_s32_0(int32x4_t a, int32x2_t v) {
+  // CHECK: test_vqrdmulhq_lane_s32_0
+  return vqrdmulhq_lane_s32(a, v, 0);
+  // CHECK: sqrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+float32x2_t test_vmul_lane_f32_0(float32x2_t a, float32x2_t v) {
+  // CHECK: test_vmul_lane_f32_0
+  return vmul_lane_f32(a, v, 0);
+  // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+float32x4_t test_vmulq_lane_f32_0(float32x4_t a, float32x2_t v) {
+  // CHECK: test_vmulq_lane_f32_0
+  return vmulq_lane_f32(a, v, 0);
+  // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+float32x2_t test_vmul_laneq_f32_0(float32x2_t a, float32x4_t v) {
+  // CHECK: test_vmul_laneq_f32_0
+  return vmul_laneq_f32(a, v, 0);
+  // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+float64x1_t test_vmul_laneq_f64_0(float64x1_t a, float64x2_t v) {
+  // CHECK: test_vmul_laneq_f64_0
+  return vmul_laneq_f64(a, v, 0);
+  // CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+}
+
+float32x4_t test_vmulq_laneq_f32_0(float32x4_t a, float32x4_t v) {
+  // CHECK: test_vmulq_laneq_f32_0
+  return vmulq_laneq_f32(a, v, 0);
+  // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+float64x2_t test_vmulq_laneq_f64_0(float64x2_t a, float64x2_t v) {
+  // CHECK: test_vmulq_laneq_f64_0
+  return vmulq_laneq_f64(a, v, 0);
+  // CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+}
+
+float32x2_t test_vmulx_lane_f32_0(float32x2_t a, float32x2_t v) {
+  // CHECK: test_vmulx_lane_f32_0
+  return vmulx_lane_f32(a, v, 0);
+  // CHECK: fmulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+float32x4_t test_vmulxq_lane_f32_0(float32x4_t a, float32x2_t v) {
+  // CHECK: test_vmulxq_lane_f32_0
+  return vmulxq_lane_f32(a, v, 0);
+  // CHECK: fmulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+float64x2_t test_vmulxq_lane_f64_0(float64x2_t a, float64x1_t v) {
+  // CHECK: test_vmulxq_lane_f64_0
+  return vmulxq_lane_f64(a, v, 0);
+  // CHECK: fmulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+}
+
+float32x2_t test_vmulx_laneq_f32_0(float32x2_t a, float32x4_t v) {
+  // CHECK: test_vmulx_laneq_f32_0
+  return vmulx_laneq_f32(a, v, 0);
+  // CHECK: fmulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+float32x4_t test_vmulxq_laneq_f32_0(float32x4_t a, float32x4_t v) {
+  // CHECK: test_vmulxq_laneq_f32_0
+  return vmulxq_laneq_f32(a, v, 0);
+  // CHECK: fmulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+float64x2_t test_vmulxq_laneq_f64_0(float64x2_t a, float64x2_t v) {
+  // CHECK: test_vmulxq_laneq_f64_0
+  return vmulxq_laneq_f64(a, v, 0);
+  // CHECK: fmulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+}
+
+int32x4_t test_vmull_high_n_s16(int16x8_t a, int16_t b) {
+  // CHECK: test_vmull_high_n_s16
+  return vmull_high_n_s16(a, b);
+  // CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmull_high_n_s32(int32x4_t a, int32_t b) {
+  // CHECK: test_vmull_high_n_s32
+  return vmull_high_n_s32(a, b);
+  // CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+uint32x4_t test_vmull_high_n_u16(uint16x8_t a, uint16_t b) {
+  // CHECK: test_vmull_high_n_u16
+  return vmull_high_n_u16(a, b);
+  // CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+uint64x2_t test_vmull_high_n_u32(uint32x4_t a, uint32_t b) {
+  // CHECK: test_vmull_high_n_u32
+  return vmull_high_n_u32(a, b);
+  // CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vqdmull_high_n_s16(int16x8_t a, int16_t b) {
+  // CHECK: test_vqdmull_high_n_s16
+  return vqdmull_high_n_s16(a, b);
+  // CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vqdmull_high_n_s32(int32x4_t a, int32_t b) {
+  // CHECK: test_vqdmull_high_n_s32
+  return vqdmull_high_n_s32(a, b);
+  // CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlal_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) {
+  // CHECK: test_vmlal_high_n_s16
+  return vmlal_high_n_s16(a, b, c);
+  // CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlal_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) {
+  // CHECK: test_vmlal_high_n_s32
+  return vmlal_high_n_s32(a, b, c);
+  // CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+uint32x4_t test_vmlal_high_n_u16(uint32x4_t a, uint16x8_t b, uint16_t c) {
+  // CHECK: test_vmlal_high_n_u16
+  return vmlal_high_n_u16(a, b, c);
+  // CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+uint64x2_t test_vmlal_high_n_u32(uint64x2_t a, uint32x4_t b, uint32_t c) {
+  // CHECK: test_vmlal_high_n_u32
+  return vmlal_high_n_u32(a, b, c);
+  // CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vqdmlal_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) {
+  // CHECK: test_vqdmlal_high_n_s16
+  return vqdmlal_high_n_s16(a, b, c);
+  // CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vqdmlal_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) {
+  // CHECK: test_vqdmlal_high_n_s32
+  return vqdmlal_high_n_s32(a, b, c);
+  // CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vmlsl_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) {
+  // CHECK: test_vmlsl_high_n_s16
+  return vmlsl_high_n_s16(a, b, c);
+  // CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vmlsl_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) {
+  // CHECK: test_vmlsl_high_n_s32
+  return vmlsl_high_n_s32(a, b, c);
+  // CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+uint32x4_t test_vmlsl_high_n_u16(uint32x4_t a, uint16x8_t b, uint16_t c) {
+  // CHECK: test_vmlsl_high_n_u16
+  return vmlsl_high_n_u16(a, b, c);
+  // CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+uint64x2_t test_vmlsl_high_n_u32(uint64x2_t a, uint32x4_t b, uint32_t c) {
+  // CHECK: test_vmlsl_high_n_u32
+  return vmlsl_high_n_u32(a, b, c);
+  // CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vqdmlsl_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) {
+  // CHECK: test_vqdmlsl_high_n_s16
+  return vqdmlsl_high_n_s16(a, b, c);
+  // CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+}
+
+int64x2_t test_vqdmlsl_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) {
+  // CHECK: test_vqdmlsl_high_n_s32
+  return vqdmlsl_high_n_s32(a, b, c);
+  // CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+float32x2_t test_vmul_n_f32(float32x2_t a, float32_t b) {
+  // CHECK: test_vmul_n_f32
+  return vmul_n_f32(a, b);
+  // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+float32x4_t test_vmulq_n_f32(float32x4_t a, float32_t b) {
+  // CHECK: test_vmulq_n_f32
+  return vmulq_n_f32(a, b);
+  // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+float64x2_t test_vmulq_n_f64(float64x2_t a, float64_t b) {
+  // CHECK: test_vmulq_n_f64
+  return vmulq_n_f64(a, b);
+  // CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+}
+
+float32x2_t test_vfma_n_f32(float32x2_t a, float32x2_t b, float32_t n) {
+  // CHECK: test_vfma_n_f32
+  return vfma_n_f32(a, b, n);
+  // CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+float32x4_t test_vfmaq_n_f32(float32x4_t a, float32x4_t b, float32_t n) {
+  // CHECK: test_vfmaq_n_f32
+  return vfmaq_n_f32(a, b, n);
+  // CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+float32x2_t test_vfms_n_f32(float32x2_t a, float32x2_t b, float32_t n) {
+  // CHECK: test_vfms_n_f32
+  return vfms_n_f32(a, b, n);
+  // CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+float32x4_t test_vfmsq_n_f32(float32x4_t a, float32x4_t b, float32_t n) {
+  // CHECK: test_vfmsq_n_f32
+  return vfmsq_n_f32(a, b, n);
+  // CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
diff --git a/test/CodeGen/aarch64-neon-across.c b/test/CodeGen/aarch64-neon-across.c
new file mode 100644
index 0000000..257b839
--- /dev/null
+++ b/test/CodeGen/aarch64-neon-across.c
@@ -0,0 +1,271 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
+// RUN:   -ffp-contract=fast -S -O3 -o - %s | FileCheck %s
+
+// Test new aarch64 intrinsics and types
+
+#include <arm_neon.h>
+
+int16_t test_vaddlv_s8(int8x8_t a) {
+  // CHECK: test_vaddlv_s8
+  return vaddlv_s8(a);
+  // CHECK: saddlv {{h[0-9]+}}, {{v[0-9]+}}.8b
+}
+
+int32_t test_vaddlv_s16(int16x4_t a) {
+  // CHECK: test_vaddlv_s16
+  return vaddlv_s16(a);
+  // CHECK: saddlv {{s[0-9]+}}, {{v[0-9]+}}.4h
+}
+
+uint16_t test_vaddlv_u8(uint8x8_t a) {
+  // CHECK: test_vaddlv_u8
+  return vaddlv_u8(a);
+  // CHECK: uaddlv {{h[0-9]+}}, {{v[0-9]+}}.8b
+}
+
+uint32_t test_vaddlv_u16(uint16x4_t a) {
+  // CHECK: test_vaddlv_u16
+  return vaddlv_u16(a);
+  // CHECK: uaddlv {{s[0-9]+}}, {{v[0-9]+}}.4h
+}
+
+int16_t test_vaddlvq_s8(int8x16_t a) {
+  // CHECK: test_vaddlvq_s8
+  return vaddlvq_s8(a);
+  // CHECK: saddlv {{h[0-9]+}}, {{v[0-9]+}}.16b
+}
+
+int32_t test_vaddlvq_s16(int16x8_t a) {
+  // CHECK: test_vaddlvq_s16
+  return vaddlvq_s16(a);
+  // CHECK: saddlv {{s[0-9]+}}, {{v[0-9]+}}.8h
+}
+
+int64_t test_vaddlvq_s32(int32x4_t a) {
+  // CHECK: test_vaddlvq_s32
+  return vaddlvq_s32(a);
+  // CHECK: saddlv {{d[0-9]+}}, {{v[0-9]+}}.4s
+}
+
+uint16_t test_vaddlvq_u8(uint8x16_t a) {
+  // CHECK: test_vaddlvq_u8
+  return vaddlvq_u8(a);
+  // CHECK: uaddlv {{h[0-9]+}}, {{v[0-9]+}}.16b
+}
+
+uint32_t test_vaddlvq_u16(uint16x8_t a) {
+  // CHECK: test_vaddlvq_u16
+  return vaddlvq_u16(a);
+  // CHECK: uaddlv {{s[0-9]+}}, {{v[0-9]+}}.8h
+}
+
+uint64_t test_vaddlvq_u32(uint32x4_t a) {
+  // CHECK: test_vaddlvq_u32
+  return vaddlvq_u32(a);
+  // CHECK: uaddlv {{d[0-9]+}}, {{v[0-9]+}}.4s
+}
+
+int8_t test_vmaxv_s8(int8x8_t a) {
+  // CHECK: test_vmaxv_s8
+  return vmaxv_s8(a);
+  // CHECK: smaxv {{b[0-9]+}}, {{v[0-9]+}}.8b
+}
+
+int16_t test_vmaxv_s16(int16x4_t a) {
+  // CHECK: test_vmaxv_s16
+  return vmaxv_s16(a);
+  // CHECK: smaxv {{h[0-9]+}}, {{v[0-9]+}}.4h
+}
+
+uint8_t test_vmaxv_u8(uint8x8_t a) {
+  // CHECK: test_vmaxv_u8
+  return vmaxv_u8(a);
+  // CHECK: umaxv {{b[0-9]+}}, {{v[0-9]+}}.8b
+}
+
+uint16_t test_vmaxv_u16(uint16x4_t a) {
+  // CHECK: test_vmaxv_u16
+  return vmaxv_u16(a);
+  // CHECK: umaxv {{h[0-9]+}}, {{v[0-9]+}}.4h
+}
+
+int8_t test_vmaxvq_s8(int8x16_t a) {
+  // CHECK: test_vmaxvq_s8
+  return vmaxvq_s8(a);
+  // CHECK: smaxv {{b[0-9]+}}, {{v[0-9]+}}.16b
+}
+
+int16_t test_vmaxvq_s16(int16x8_t a) {
+  // CHECK: test_vmaxvq_s16
+  return vmaxvq_s16(a);
+  // CHECK: smaxv {{h[0-9]+}}, {{v[0-9]+}}.8h
+}
+
+int32_t test_vmaxvq_s32(int32x4_t a) {
+  // CHECK: test_vmaxvq_s32
+  return vmaxvq_s32(a);
+  // CHECK: smaxv {{s[0-9]+}}, {{v[0-9]+}}.4s
+}
+
+uint8_t test_vmaxvq_u8(uint8x16_t a) {
+  // CHECK: test_vmaxvq_u8
+  return vmaxvq_u8(a);
+  // CHECK: umaxv {{b[0-9]+}}, {{v[0-9]+}}.16b
+}
+
+uint16_t test_vmaxvq_u16(uint16x8_t a) {
+  // CHECK: test_vmaxvq_u16
+  return vmaxvq_u16(a);
+  // CHECK: umaxv {{h[0-9]+}}, {{v[0-9]+}}.8h
+}
+
+uint32_t test_vmaxvq_u32(uint32x4_t a) {
+  // CHECK: test_vmaxvq_u32
+  return vmaxvq_u32(a);
+  // CHECK: umaxv {{s[0-9]+}}, {{v[0-9]+}}.4s
+}
+
+int8_t test_vminv_s8(int8x8_t a) {
+  // CHECK: test_vminv_s8
+  return vminv_s8(a);
+  // CHECK: sminv {{b[0-9]+}}, {{v[0-9]+}}.8b
+}
+
+int16_t test_vminv_s16(int16x4_t a) {
+  // CHECK: test_vminv_s16
+  return vminv_s16(a);
+  // CHECK: sminv {{h[0-9]+}}, {{v[0-9]+}}.4h
+}
+
+uint8_t test_vminv_u8(uint8x8_t a) {
+  // CHECK: test_vminv_u8
+  return vminv_u8(a);
+  // CHECK: uminv {{b[0-9]+}}, {{v[0-9]+}}.8b
+}
+
+uint16_t test_vminv_u16(uint16x4_t a) {
+  // CHECK: test_vminv_u16
+  return vminv_u16(a);
+  // CHECK: uminv {{h[0-9]+}}, {{v[0-9]+}}.4h
+}
+
+int8_t test_vminvq_s8(int8x16_t a) {
+  // CHECK: test_vminvq_s8
+  return vminvq_s8(a);
+  // CHECK: sminv {{b[0-9]+}}, {{v[0-9]+}}.16b
+}
+
+int16_t test_vminvq_s16(int16x8_t a) {
+  // CHECK: test_vminvq_s16
+  return vminvq_s16(a);
+  // CHECK: sminv {{h[0-9]+}}, {{v[0-9]+}}.8h
+}
+
+int32_t test_vminvq_s32(int32x4_t a) {
+  // CHECK: test_vminvq_s32
+  return vminvq_s32(a);
+  // CHECK: sminv {{s[0-9]+}}, {{v[0-9]+}}.4s
+}
+
+uint8_t test_vminvq_u8(uint8x16_t a) {
+  // CHECK: test_vminvq_u8
+  return vminvq_u8(a);
+  // CHECK: uminv {{b[0-9]+}}, {{v[0-9]+}}.16b
+}
+
+uint16_t test_vminvq_u16(uint16x8_t a) {
+  // CHECK: test_vminvq_u16
+  return vminvq_u16(a);
+  // CHECK: uminv {{h[0-9]+}}, {{v[0-9]+}}.8h
+}
+
+uint32_t test_vminvq_u32(uint32x4_t a) {
+  // CHECK: test_vminvq_u32
+  return vminvq_u32(a);
+  // CHECK: uminv {{s[0-9]+}}, {{v[0-9]+}}.4s
+}
+
+int8_t test_vaddv_s8(int8x8_t a) {
+  // CHECK: test_vaddv_s8
+  return vaddv_s8(a);
+  // CHECK: addv {{b[0-9]+}}, {{v[0-9]+}}.8b
+}
+
+int16_t test_vaddv_s16(int16x4_t a) {
+  // CHECK: test_vaddv_s16
+  return vaddv_s16(a);
+  // CHECK: addv {{h[0-9]+}}, {{v[0-9]+}}.4h
+}
+
+uint8_t test_vaddv_u8(uint8x8_t a) {
+  // CHECK: test_vaddv_u8
+  return vaddv_u8(a);
+  // CHECK: addv {{b[0-9]+}}, {{v[0-9]+}}.8b
+}
+
+uint16_t test_vaddv_u16(uint16x4_t a) {
+  // CHECK: test_vaddv_u16
+  return vaddv_u16(a);
+  // CHECK: addv {{h[0-9]+}}, {{v[0-9]+}}.4h
+}
+
+int8_t test_vaddvq_s8(int8x16_t a) {
+  // CHECK: test_vaddvq_s8
+  return vaddvq_s8(a);
+  // CHECK: addv {{b[0-9]+}}, {{v[0-9]+}}.16b
+}
+
+int16_t test_vaddvq_s16(int16x8_t a) {
+  // CHECK: test_vaddvq_s16
+  return vaddvq_s16(a);
+  // CHECK: addv {{h[0-9]+}}, {{v[0-9]+}}.8h
+}
+
+int32_t test_vaddvq_s32(int32x4_t a) {
+  // CHECK: test_vaddvq_s32
+  return vaddvq_s32(a);
+  // CHECK: addv {{s[0-9]+}}, {{v[0-9]+}}.4s
+}
+
+uint8_t test_vaddvq_u8(uint8x16_t a) {
+  // CHECK: test_vaddvq_u8
+  return vaddvq_u8(a);
+  // CHECK: addv {{b[0-9]+}}, {{v[0-9]+}}.16b
+}
+
+uint16_t test_vaddvq_u16(uint16x8_t a) {
+  // CHECK: test_vaddvq_u16
+  return vaddvq_u16(a);
+  // CHECK: addv {{h[0-9]+}}, {{v[0-9]+}}.8h
+}
+
+uint32_t test_vaddvq_u32(uint32x4_t a) {
+  // CHECK: test_vaddvq_u32
+  return vaddvq_u32(a);
+  // CHECK: addv {{s[0-9]+}}, {{v[0-9]+}}.4s
+}
+
+float32_t test_vmaxvq_f32(float32x4_t a) {
+  // CHECK: test_vmaxvq_f32
+  return vmaxvq_f32(a);
+  // CHECK: fmaxv {{s[0-9]+}}, {{v[0-9]+}}.4s
+}
+
+float32_t test_vminvq_f32(float32x4_t a) {
+  // CHECK: test_vminvq_f32
+  return vminvq_f32(a);
+  // CHECK: fminv {{s[0-9]+}}, {{v[0-9]+}}.4s
+}
+
+float32_t test_vmaxnmvq_f32(float32x4_t a) {
+  // CHECK: test_vmaxnmvq_f32
+  return vmaxnmvq_f32(a);
+  // CHECK: fmaxnmv {{s[0-9]+}}, {{v[0-9]+}}.4s
+}
+
+float32_t test_vminnmvq_f32(float32x4_t a) {
+  // CHECK: test_vminnmvq_f32
+  return vminnmvq_f32(a);
+  // CHECK: fminnmv {{s[0-9]+}}, {{v[0-9]+}}.4s
+}
diff --git a/test/CodeGen/aarch64-neon-copy.c b/test/CodeGen/aarch64-neon-copy.c
new file mode 100644
index 0000000..eb91bf9
--- /dev/null
+++ b/test/CodeGen/aarch64-neon-copy.c
@@ -0,0 +1,1319 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
+// RUN:   -ffp-contract=fast -S -O3 -o - %s | FileCheck %s
+
+// Test new aarch64 intrinsics and types
+
+#include <arm_neon.h>
+
+uint8x8_t test_vset_lane_u8(uint8_t v1, uint8x8_t v2) {
+   // CHECK: test_vset_lane_u8
+  return vset_lane_u8(v1, v2, 6);
+  // CHECK: ins {{v[0-9]+}}.b[6], {{w[0-9]+}}
+}
+
+uint16x4_t test_vset_lane_u16(uint16_t v1, uint16x4_t v2) {
+   // CHECK: test_vset_lane_u16
+  return vset_lane_u16(v1, v2, 2);
+  // CHECK: ins {{v[0-9]+}}.h[2], {{w[0-9]+}}
+}
+
+uint32x2_t test_vset_lane_u32(uint32_t v1, uint32x2_t v2) {
+   // CHECK: test_vset_lane_u32
+  return vset_lane_u32(v1, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}}
+}
+uint64x1_t test_vset_lane_u64(uint64_t v1, uint64x1_t v2) {
+   // CHECK: test_vset_lane_u64
+  return vset_lane_u64(v1, v2, 0);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+int8x8_t test_vset_lane_s8(int8_t v1, int8x8_t v2) {
+   // CHECK: test_vset_lane_s8
+  return vset_lane_s8(v1, v2, 6);
+  // CHECK: ins {{v[0-9]+}}.b[6], {{w[0-9]+}}
+}
+
+int16x4_t test_vset_lane_s16(int16_t v1, int16x4_t v2) {
+   // CHECK: test_vset_lane_s16
+  return vset_lane_s16(v1, v2, 2);
+  // CHECK: ins {{v[0-9]+}}.h[2], {{w[0-9]+}}
+}
+
+int32x2_t test_vset_lane_s32(int32_t v1, int32x2_t v2) {
+   // CHECK: test_vset_lane_s32
+  return vset_lane_s32(v1, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}}
+}
+
+  int64x1_t test_vset_lane_s64(int64_t v1, int64x1_t v2) {
+   // CHECK: test_vset_lane_s64
+  return vset_lane_s64(v1, v2, 0);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+uint8x16_t test_vsetq_lane_u8(uint8_t v1, uint8x16_t v2) {
+   // CHECK: test_vsetq_lane_u8
+  return vsetq_lane_u8(v1, v2, 6);
+  // CHECK: ins {{v[0-9]+}}.b[6], {{w[0-9]+}}
+}
+
+uint16x8_t test_vsetq_lane_u16(uint16_t v1, uint16x8_t v2) {
+   // CHECK: test_vsetq_lane_u16
+  return vsetq_lane_u16(v1, v2, 2);
+  // CHECK: ins {{v[0-9]+}}.h[2], {{w[0-9]+}}
+}
+
+uint32x4_t test_vsetq_lane_u32(uint32_t v1, uint32x4_t v2) {
+   // CHECK: test_vsetq_lane_u32
+  return vsetq_lane_u32(v1, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}}
+}
+
+  uint64x2_t test_vsetq_lane_u64(uint64_t v1, uint64x2_t v2) {
+   // CHECK: test_vsetq_lane_u64
+  return vsetq_lane_u64(v1, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.d[1], {{x[0-9]+}}
+}
+
+int8x16_t test_vsetq_lane_s8(int8_t v1, int8x16_t v2) {
+   // CHECK: test_vsetq_lane_s8
+  return vsetq_lane_s8(v1, v2, 6);
+  // CHECK: ins {{v[0-9]+}}.b[6], {{w[0-9]+}}
+}
+
+int16x8_t test_vsetq_lane_s16(int16_t v1, int16x8_t v2) {
+   // CHECK: test_vsetq_lane_s16
+  return vsetq_lane_s16(v1, v2, 2);
+  // CHECK: ins {{v[0-9]+}}.h[2], {{w[0-9]+}}
+}
+
+int32x4_t test_vsetq_lane_s32(int32_t v1, int32x4_t v2) {
+   // CHECK: test_vsetq_lane_s32
+  return vsetq_lane_s32(v1, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}}
+}
+
+int64x2_t test_vsetq_lane_s64(int64_t v1, int64x2_t v2) {
+   // CHECK: test_vsetq_lane_s64
+  return vsetq_lane_s64(v1, v2, 0);
+  // CHECK: ins {{v[0-9]+}}.d[0], {{x[0-9]+}}
+}
+
+poly8x8_t test_vset_lane_p8(poly8_t v1, poly8x8_t v2) {
+   // CHECK: test_vset_lane_p8
+  return vset_lane_p8(v1, v2, 6);
+  // CHECK: ins {{v[0-9]+}}.b[6], {{w[0-9]+}}
+}
+
+poly16x4_t test_vset_lane_p16(poly16_t v1, poly16x4_t v2) {
+   // CHECK: test_vset_lane_p16
+  return vset_lane_p16(v1, v2, 2);
+  // CHECK: ins {{v[0-9]+}}.h[2], {{w[0-9]+}}
+}
+
+poly8x16_t test_vsetq_lane_p8(poly8_t v1, poly8x16_t v2) {
+   // CHECK: test_vsetq_lane_p8
+  return vsetq_lane_p8(v1, v2, 6);
+  // CHECK: ins {{v[0-9]+}}.b[6], {{w[0-9]+}}
+}
+
+poly16x8_t test_vsetq_lane_p16(poly16_t v1, poly16x8_t v2) {
+   // CHECK: test_vsetq_lane_p16
+  return vsetq_lane_p16(v1, v2, 2);
+  // CHECK: ins {{v[0-9]+}}.h[2], {{w[0-9]+}}
+}
+
+float32x2_t test_vset_lane_f32(float32_t v1, float32x2_t v2) {
+   // CHECK: test_vset_lane_f32
+  return vset_lane_f32(v1, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+}
+
+float32x4_t test_vsetq_lane_f32(float32_t v1, float32x4_t v2) {
+   // CHECK: test_vsetq_lane_f32
+  return vsetq_lane_f32(v1, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+}
+
+float64x1_t test_vset_lane_f64(float64_t v1, float64x1_t v2) {
+   // CHECK: test_vset_lane_f64
+  return vset_lane_f64(v1, v2, 0);
+  // CHECK: ret
+}
+
+float64x2_t test_vsetq_lane_f64(float64_t v1, float64x2_t v2) {
+   // CHECK: test_vsetq_lane_f64
+  return vsetq_lane_f64(v1, v2, 0);
+  // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0]
+}
+
+uint8_t test_vget_lane_u8(uint8x8_t v1) {
+  // CHECK: test_vget_lane_u8
+  return vget_lane_u8(v1, 7);
+  // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[7]
+}
+
+uint16_t test_vget_lane_u16(uint16x4_t v1) {
+  // CHECK: test_vget_lane_u16
+  return vget_lane_u16(v1, 3);
+  // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[3]
+}
+
+uint32_t test_vget_lane_u32(uint32x2_t v1) {
+  // CHECK: test_vget_lane_u32
+  return vget_lane_u32(v1, 1);
+  // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.s[1]
+}
+
+uint64_t test_vget_lane_u64(uint64x1_t v1) {
+  // CHECK: test_vget_lane_u64
+  return vget_lane_u64(v1, 0);
+  // CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
+}
+
+uint8_t test_vgetq_lane_u8(uint8x16_t v1) {
+  // CHECK: test_vgetq_lane_u8
+  return vgetq_lane_u8(v1, 15);
+  // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[15]
+}
+
+uint16_t test_vgetq_lane_u16(uint16x8_t v1) {
+  // CHECK: test_vgetq_lane_u16
+  return vgetq_lane_u16(v1, 6);
+  // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[6]
+}
+
+uint32_t test_vgetq_lane_u32(uint32x4_t v1) {
+  // CHECK: test_vgetq_lane_u32
+  return vgetq_lane_u32(v1, 2);
+  // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.s[2]
+}
+
+uint64_t test_vgetq_lane_u64(uint64x2_t v1) {
+  // CHECK: test_vgetq_lane_u64
+  return vgetq_lane_u64(v1, 1);
+  // CHECK: umov {{x[0-9]+}}, {{v[0-9]+}}.d[1]
+}
+
+poly8_t test_vget_lane_p8(poly8x8_t v1) {
+  // CHECK: test_vget_lane_p8
+  return vget_lane_p8(v1, 7);
+  // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[7]
+}
+
+poly16_t test_vget_lane_p16(poly16x4_t v1) {
+  // CHECK: test_vget_lane_p16
+  return vget_lane_p16(v1, 3);
+  // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[3]
+}
+
+poly8_t test_vgetq_lane_p8(poly8x16_t v1) {
+  // CHECK: test_vgetq_lane_p8
+  return vgetq_lane_p8(v1, 14);
+  // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[14]
+}
+
+poly16_t test_vgetq_lane_p16(poly16x8_t v1) {
+  // CHECK: test_vgetq_lane_p16
+  return vgetq_lane_p16(v1, 6);
+  // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[6]
+}
+
+int32_t test_vget_lane_s8(int8x8_t v1) {
+  // CHECK: test_vget_lane_s8
+  return vget_lane_s8(v1, 7)+1;
+  // CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[7]
+}
+
+int32_t test_vget_lane_s16(int16x4_t v1) {
+  // CHECK: test_vget_lane_s16
+  return vget_lane_s16(v1, 3)+1;
+  // CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[3]
+}
+
+int64_t test_vget_lane_s32(int32x2_t v1) {
+  // CHECK: test_vget_lane_s32
+  return vget_lane_s32(v1, 1);
+  // CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[1]
+}
+
+int64_t test_vget_lane_s64(int64x1_t v1) {
+  // CHECK: test_vget_lane_s64
+  return vget_lane_s64(v1, 0);
+  // CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
+}
+
+int32_t test_vgetq_lane_s8(int8x16_t v1) {
+  // CHECK: test_vgetq_lane_s8
+  return vgetq_lane_s8(v1, 15)+1;
+  // CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[15]
+}
+
+int32_t test_vgetq_lane_s16(int16x8_t v1) {
+  // CHECK: test_vgetq_lane_s16
+  return vgetq_lane_s16(v1, 6)+1;
+  // CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[6]
+}
+
+int64_t test_vgetq_lane_s32(int32x4_t v1) {
+  // CHECK: test_vgetq_lane_s32
+  return vgetq_lane_s32(v1, 2);
+  // CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[2]
+}
+
+int64_t test_vgetq_lane_s64(int64x2_t v1) {
+  // CHECK: test_vgetq_lane_s64
+  return vgetq_lane_s64(v1, 1);
+  // CHECK: umov {{x[0-9]+}}, {{v[0-9]+}}.d[1]
+}
+
+int8x8_t test_vcopy_lane_s8(int8x8_t v1, int8x8_t v2) {
+  // CHECK: test_vcopy_lane_s8
+  return vcopy_lane_s8(v1, 5, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
+}
+
+int16x4_t test_vcopy_lane_s16(int16x4_t v1, int16x4_t v2) {
+  // CHECK: test_vcopy_lane_s16
+  return vcopy_lane_s16(v1, 2, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
+}
+
+poly8x8_t test_vcopy_lane_p8(poly8x8_t v1, poly8x8_t v2) {
+  // CHECK: test_vcopy_lane_p8
+  return vcopy_lane_p8(v1, 5, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
+}
+
+poly16x4_t test_vcopy_lane_p16(poly16x4_t v1, poly16x4_t v2) {
+  // CHECK: test_vcopy_lane_p16
+  return vcopy_lane_p16(v1, 2, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
+}
+
+int32x2_t test_vcopy_lane_s32(int32x2_t v1, int32x2_t v2) {
+  // CHECK: test_vcopy_lane_s32
+  return vcopy_lane_s32(v1, 0, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+float32x2_t test_vcopy_lane_f32(float32x2_t v1, float32x2_t v2) {
+  // CHECK: test_vcopy_lane_f32
+  return vcopy_lane_f32(v1, 0, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+uint8x8_t test_vcopy_lane_u8(uint8x8_t v1, uint8x8_t v2) {
+  // CHECK: test_vcopy_lane_u8
+  return vcopy_lane_u8(v1, 5, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
+}
+
+uint16x4_t test_vcopy_lane_u16(uint16x4_t v1, uint16x4_t v2) {
+  // CHECK: test_vcopy_lane_u16
+  return vcopy_lane_u16(v1, 2, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
+}
+
+uint32x2_t test_vcopy_lane_u32(uint32x2_t v1, uint32x2_t v2) {
+  // CHECK: test_vcopy_lane_u32
+  return vcopy_lane_u32(v1, 0, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+int8x8_t test_vcopy_laneq_s8(int8x8_t v1, int8x16_t v2) {
+  // CHECK: test_vcopy_laneq_s8
+  return vcopy_laneq_s8(v1, 5, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
+}
+
+int16x4_t test_vcopy_laneq_s16(int16x4_t v1, int16x8_t v2) {
+  // CHECK: test_vcopy_laneq_s16
+  return vcopy_laneq_s16(v1, 2, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
+}
+
+poly8x8_t test_vcopy_laneq_p8(poly8x8_t v1, poly8x16_t v2) {
+  // CHECK: test_vcopy_laneq_p8
+  return vcopy_laneq_p8(v1, 5, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
+}
+
+poly16x4_t test_vcopy_laneq_p16(poly16x4_t v1, poly16x8_t v2) {
+  // CHECK: test_vcopy_laneq_p16
+  return vcopy_laneq_p16(v1, 2, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
+}
+
+int32x2_t test_vcopy_laneq_s32(int32x2_t v1, int32x4_t v2) {
+  // CHECK: test_vcopy_laneq_s32
+  return vcopy_laneq_s32(v1, 0, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+float32x2_t test_vcopy_laneq_f32(float32x2_t v1, float32x4_t v2) {
+  // CHECK: test_vcopy_laneq_f32
+  return vcopy_laneq_f32(v1, 0, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+uint8x8_t test_vcopy_laneq_u8(uint8x8_t v1, uint8x16_t v2) {
+  // CHECK: test_vcopy_laneq_u8
+  return vcopy_laneq_u8(v1, 5, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
+}
+
+uint16x4_t test_vcopy_laneq_u16(uint16x4_t v1, uint16x8_t v2) {
+  // CHECK: test_vcopy_laneq_u16
+  return vcopy_laneq_u16(v1, 2, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
+}
+
+uint32x2_t test_vcopy_laneq_u32(uint32x2_t v1, uint32x4_t v2) {
+  // CHECK: test_vcopy_laneq_u32
+  return vcopy_laneq_u32(v1, 0, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+int8x16_t test_vcopyq_lane_s8(int8x16_t v1, int8x8_t v2) {
+  // CHECK: test_vcopyq_lane_s8
+  return vcopyq_lane_s8(v1, 5, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
+}
+
+int16x8_t test_vcopyq_lane_s16(int16x8_t v1, int16x4_t v2) {
+  // CHECK: test_vcopyq_lane_s16
+  return vcopyq_lane_s16(v1, 2, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
+}
+
+poly8x16_t test_vcopyq_lane_p8(poly8x16_t v1, poly8x8_t v2) {
+  // CHECK: test_vcopyq_lane_p8
+  return vcopyq_lane_p8(v1, 5, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
+}
+
+poly16x8_t test_vcopyq_lane_p16(poly16x8_t v1, poly16x4_t v2) {
+  // CHECK: test_vcopyq_lane_p16
+  return vcopyq_lane_p16(v1, 2, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
+}
+
+int32x4_t test_vcopyq_lane_s32(int32x4_t v1, int32x2_t v2) {
+  // CHECK: test_vcopyq_lane_s32
+  return vcopyq_lane_s32(v1, 0, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+int64x2_t test_vcopyq_lane_s64(int64x2_t v1, int64x1_t v2) {
+  // CHECK: test_vcopyq_lane_s64
+  return vcopyq_lane_s64(v1, 1, v2, 0);
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+float32x4_t test_vcopyq_lane_f32(float32x4_t v1, float32x2_t v2) {
+  // CHECK: test_vcopyq_lane_f32
+  return vcopyq_lane_f32(v1, 0, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+float64x2_t test_vcopyq_lane_f64(float64x2_t v1, float64x1_t v2) {
+  // CHECK: test_vcopyq_lane_f64
+  return vcopyq_lane_f64(v1, 1, v2, 0);
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+uint8x16_t test_vcopyq_lane_u8(uint8x16_t v1, uint8x8_t v2) {
+  // CHECK: test_vcopyq_lane_u8
+  return vcopyq_lane_u8(v1, 5, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
+}
+
+uint16x8_t test_vcopyq_lane_u16(uint16x8_t v1, uint16x4_t v2) {
+  // CHECK: test_vcopyq_lane_u16
+  return vcopyq_lane_u16(v1, 2, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
+}
+
+uint32x4_t test_vcopyq_lane_u32(uint32x4_t v1, uint32x2_t v2) {
+  // CHECK: test_vcopyq_lane_u32
+  return vcopyq_lane_u32(v1, 0, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+uint64x2_t test_vcopyq_lane_u64(uint64x2_t v1, uint64x1_t v2) {
+  // CHECK: test_vcopyq_lane_u64
+  return vcopyq_lane_u64(v1, 1, v2, 0);
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+int8x16_t test_vcopyq_laneq_s8(int8x16_t v1, int8x16_t v2) {
+  // CHECK: test_vcopyq_laneq_s8
+  return vcopyq_laneq_s8(v1, 5, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
+}
+
+int16x8_t test_vcopyq_laneq_s16(int16x8_t v1, int16x8_t v2) {
+  // CHECK: test_vcopyq_laneq_s16
+  return vcopyq_laneq_s16(v1, 2, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
+}
+
+poly8x16_t test_vcopyq_laneq_p8(poly8x16_t v1, poly8x16_t v2) {
+  // CHECK: test_vcopyq_laneq_p8
+  return vcopyq_laneq_p8(v1, 5, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
+}
+
+poly16x8_t test_vcopyq_laneq_p16(poly16x8_t v1, poly16x8_t v2) {
+  // CHECK: test_vcopyq_laneq_p16
+  return vcopyq_laneq_p16(v1, 2, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
+}
+
+int32x4_t test_vcopyq_laneq_s32(int32x4_t v1, int32x4_t v2) {
+  // CHECK: test_vcopyq_laneq_s32
+  return vcopyq_laneq_s32(v1, 0, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+float32x4_t test_vcopyq_laneq_f32(float32x4_t v1, float32x4_t v2) {
+  // CHECK: test_vcopyq_laneq_f32
+  return vcopyq_laneq_f32(v1, 0, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+int64x2_t test_vcopyq_laneq_s64(int64x2_t v1, int64x2_t v2) {
+  // CHECK: test_vcopyq_laneq_s64
+  return vcopyq_laneq_s64(v1, 1, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[1]
+}
+
+uint8x16_t test_vcopyq_laneq_u8(uint8x16_t v1, uint8x16_t v2) {
+  // CHECK: test_vcopyq_laneq_u8
+  return vcopyq_laneq_u8(v1, 5, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
+}
+
+uint16x8_t test_vcopyq_laneq_u16(uint16x8_t v1, uint16x8_t v2) {
+  // CHECK: test_vcopyq_laneq_u16
+  return vcopyq_laneq_u16(v1, 2, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
+}
+
+uint32x4_t test_vcopyq_laneq_u32(uint32x4_t v1, uint32x4_t v2) {
+  // CHECK: test_vcopyq_laneq_u32
+  return vcopyq_laneq_u32(v1, 0, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+uint64x2_t test_vcopyq_laneq_u64(uint64x2_t v1, uint64x2_t v2) {
+  // CHECK: test_vcopyq_laneq_u64
+  return vcopyq_laneq_u64(v1, 0, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+}
+
+int8x8_t test_vcreate_s8(uint64_t v1) {
+  // CHECK: test_vcreate_s8
+  return vcreate_s8(v1);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+int16x4_t test_vcreate_s16(uint64_t v1) {
+  // CHECK: test_vcreate_s16
+  return vcreate_s16(v1);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+int32x2_t test_vcreate_s32(uint64_t v1) {
+  // CHECK: test_vcreate_s32
+  return vcreate_s32(v1);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+int64x1_t test_vcreate_s64(uint64_t v1) {
+  // CHECK: test_vcreate_s64
+  return vcreate_s64(v1);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+uint8x8_t test_vcreate_u8(uint64_t v1) {
+  // CHECK: test_vcreate_u8
+  return vcreate_u8(v1);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+uint16x4_t test_vcreate_u16(uint64_t v1) {
+  // CHECK: test_vcreate_u16
+  return vcreate_u16(v1);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+uint32x2_t test_vcreate_u32(uint64_t v1) {
+  // CHECK: test_vcreate_u32
+  return vcreate_u32(v1);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+uint64x1_t test_vcreate_u64(uint64_t v1) {
+  // CHECK: test_vcreate_u64
+  return vcreate_u64(v1);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+poly8x8_t test_vcreate_p8(uint64_t v1) {
+  // CHECK: test_vcreate_p8
+  return vcreate_p8(v1);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+poly16x4_t test_vcreate_p16(uint64_t v1) {
+  // CHECK: test_vcreate_p16
+  return vcreate_p16(v1);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+float16x4_t test_vcreate_f16(uint64_t v1) {
+  // CHECK: test_vcreate_f16
+  return vcreate_f16(v1);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+float32x2_t test_vcreate_f32(uint64_t v1) {
+  // CHECK: test_vcreate_f32
+  return vcreate_f32(v1);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+float64x1_t test_vcreate_f64(uint64_t v1) {
+  // CHECK: test_vcreate_f64
+  return vcreate_f64(v1);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+uint8x8_t test_vdup_n_u8(uint8_t v1) {
+  // CHECK: test_vdup_n_u8
+  return vdup_n_u8(v1);
+  // CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
+}
+
+uint16x4_t test_vdup_n_u16(uint16_t v1) {
+  // CHECK: test_vdup_n_u16
+  return vdup_n_u16(v1);
+  // CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
+}
+
+uint32x2_t test_vdup_n_u32(uint32_t v1) {
+  // CHECK: test_vdup_n_u32
+  return vdup_n_u32(v1);
+  // CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}}
+}
+
+uint64x1_t test_vdup_n_u64(uint64_t v1) {
+  // CHECK: test_vdup_n_u64
+  return vdup_n_u64(v1);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+uint8x16_t test_vdupq_n_u8(uint8_t v1) {
+  // CHECK: test_vdupq_n_u8
+  return vdupq_n_u8(v1);
+  // CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
+}
+
+uint16x8_t test_vdupq_n_u16(uint16_t v1) {
+  // CHECK: test_vdupq_n_u16
+  return vdupq_n_u16(v1);
+  // CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
+}
+
+uint32x4_t test_vdupq_n_u32(uint32_t v1) {
+  // CHECK: test_vdupq_n_u32
+  return vdupq_n_u32(v1);
+  // CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
+}
+
+uint64x2_t test_vdupq_n_u64(uint64_t v1) {
+  // CHECK: test_vdupq_n_u64
+  return vdupq_n_u64(v1);
+  // CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}}
+}
+
+int8x8_t test_vdup_n_s8(int8_t v1) {
+  // CHECK: test_vdup_n_s8
+  return vdup_n_s8(v1);
+  // CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
+}
+
+int16x4_t test_vdup_n_s16(int16_t v1) {
+  // CHECK: test_vdup_n_s16
+  return vdup_n_s16(v1);
+  // CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
+}
+
+int32x2_t test_vdup_n_s32(int32_t v1) {
+  // CHECK: test_vdup_n_s32
+  return vdup_n_s32(v1);
+  // CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}}
+}
+
+int64x1_t test_vdup_n_s64(int64_t v1) {
+  // CHECK: test_vdup_n_s64
+  return vdup_n_s64(v1);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+int8x16_t test_vdupq_n_s8(int8_t v1) {
+  // CHECK: test_vdupq_n_s8
+  return vdupq_n_s8(v1);
+  // CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
+}
+
+int16x8_t test_vdupq_n_s16(int16_t v1) {
+  // CHECK: test_vdupq_n_s16
+  return vdupq_n_s16(v1);
+  // CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
+}
+
+int32x4_t test_vdupq_n_s32(int32_t v1) {
+  // CHECK: test_vdupq_n_s32
+  return vdupq_n_s32(v1);
+  // CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
+}
+
+int64x2_t test_vdupq_n_s64(int64_t v1) {
+  // CHECK: test_vdupq_n_s64
+  return vdupq_n_s64(v1);
+  // CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}}
+}
+
+poly8x8_t test_vdup_n_p8(poly8_t v1) {
+  // CHECK: test_vdup_n_p8
+  return vdup_n_p8(v1);
+  // CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
+}
+
+poly16x4_t test_vdup_n_p16(poly16_t v1) {
+  // CHECK: test_vdup_n_p16
+  return vdup_n_p16(v1);
+  // CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
+}
+
+poly8x16_t test_vdupq_n_p8(poly8_t v1) {
+  // CHECK: test_vdupq_n_p8
+  return vdupq_n_p8(v1);
+  // CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
+}
+
+poly16x8_t test_vdupq_n_p16(poly16_t v1) {
+  // CHECK: test_vdupq_n_p16
+  return vdupq_n_p16(v1);
+  // CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
+}
+
+float32x2_t test_vdup_n_f32(float32_t v1) {
+  // CHECK: test_vdup_n_f32
+  return vdup_n_f32(v1);
+  // CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+float64x1_t test_vdup_n_f64(float64_t v1) {
+  // CHECK: test_vdup_n_f64
+  return vdup_n_f64(v1);
+  // CHECK: ret
+}
+
+float32x4_t test_vdupq_n_f32(float32_t v1) {
+  // CHECK: test_vdupq_n_f32
+  return vdupq_n_f32(v1);
+  // CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+float64x2_t test_vdupq_n_f64(float64_t v1) {
+  // CHECK: test_vdupq_n_f64
+  return vdupq_n_f64(v1);
+  // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+}
+
+int8x8_t test_vdup_lane_s8(int8x8_t v1) {
+  // CHECK: test_vdup_lane_s8
+  return vdup_lane_s8(v1, 5);
+  // CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
+}
+
+int16x4_t test_vdup_lane_s16(int16x4_t v1) {
+  // CHECK: test_vdup_lane_s16
+  return vdup_lane_s16(v1, 2);
+  // CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
+}
+
+int32x2_t test_vdup_lane_s32(int32x2_t v1) {
+  // CHECK: test_vdup_lane_s32
+  return vdup_lane_s32(v1, 1);
+  // CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+}
+
+int64x1_t test_vdup_lane_s64(int64x1_t v1) {
+  // CHECK: test_vdup_lane_s64
+  return vdup_lane_s64(v1, 0);
+  // CHECK: ret
+}
+
+int8x16_t test_vdupq_lane_s8(int8x8_t v1) {
+  // CHECK: test_vdupq_lane_s8
+  return vdupq_lane_s8(v1, 5);
+  // CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
+}
+
+int16x8_t test_vdupq_lane_s16(int16x4_t v1) {
+  // CHECK: test_vdupq_lane_s16
+  return vdupq_lane_s16(v1, 2);
+  // CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
+}
+
+int32x4_t test_vdupq_lane_s32(int32x2_t v1) {
+  // CHECK: test_vdupq_lane_s32
+  return vdupq_lane_s32(v1, 1);
+  // CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+}
+
+int64x2_t test_vdupq_lane_s64(int64x1_t v1) {
+  // CHECK: test_vdupq_lane_s64
+  return vdupq_lane_s64(v1, 0);
+  // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+}
+
+uint8x8_t test_vdup_lane_u8(uint8x8_t v1) {
+  // CHECK: test_vdup_lane_u8
+  return vdup_lane_u8(v1, 5);
+  // CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
+}
+
+uint16x4_t test_vdup_lane_u16(uint16x4_t v1) {
+  // CHECK: test_vdup_lane_u16
+  return vdup_lane_u16(v1, 2);
+  // CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
+}
+
+uint32x2_t test_vdup_lane_u32(uint32x2_t v1) {
+  // CHECK: test_vdup_lane_u32
+  return vdup_lane_u32(v1, 1);
+  // CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+}
+
+uint64x1_t test_vdup_lane_u64(uint64x1_t v1) {
+  // CHECK: test_vdup_lane_u64
+  return vdup_lane_u64(v1, 0);
+  // CHECK: ret
+}
+
+uint8x16_t test_vdupq_lane_u8(uint8x8_t v1) {
+  // CHECK: test_vdupq_lane_u8
+  return vdupq_lane_u8(v1, 5);
+  // CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
+}
+
+uint16x8_t test_vdupq_lane_u16(uint16x4_t v1) {
+  // CHECK: test_vdupq_lane_u16
+  return vdupq_lane_u16(v1, 2);
+  // CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
+}
+
+uint32x4_t test_vdupq_lane_u32(uint32x2_t v1) {
+  // CHECK: test_vdupq_lane_u32
+  return vdupq_lane_u32(v1, 1);
+  // CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+}
+
+uint64x2_t test_vdupq_lane_u64(uint64x1_t v1) {
+  // CHECK: test_vdupq_lane_u64
+  return vdupq_lane_u64(v1, 0);
+  // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+}
+
+int8x8_t test_vdup_laneq_s8(int8x16_t v1) {
+  // CHECK: test_vdup_laneq_s8
+  return vdup_laneq_s8(v1, 5);
+  // CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
+}
+
+int16x4_t test_vdup_laneq_s16(int16x8_t v1) {
+  // CHECK: test_vdup_laneq_s16
+  return vdup_laneq_s16(v1, 2);
+  // CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
+}
+
+int32x2_t test_vdup_laneq_s32(int32x4_t v1) {
+  // CHECK: test_vdup_laneq_s32
+  return vdup_laneq_s32(v1, 1);
+  // CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+}
+
+int64x1_t test_vdup_laneq_s64(int64x2_t v1) {
+  // CHECK: test_vdup_laneq_s64
+  return vdup_laneq_s64(v1, 0);
+  // CHECK: ret
+}
+
+int8x16_t test_vdupq_laneq_s8(int8x16_t v1) {
+  // CHECK: test_vdupq_laneq_s8
+  return vdupq_laneq_s8(v1, 5);
+  // CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
+}
+
+int16x8_t test_vdupq_laneq_s16(int16x8_t v1) {
+  // CHECK: test_vdupq_laneq_s16
+  return vdupq_laneq_s16(v1, 2);
+  // CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
+}
+
+int32x4_t test_vdupq_laneq_s32(int32x4_t v1) {
+  // CHECK: test_vdupq_laneq_s32
+  return vdupq_laneq_s32(v1, 1);
+  // CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+}
+
+int64x2_t test_vdupq_laneq_s64(int64x2_t v1) {
+  // CHECK: test_vdupq_laneq_s64
+  return vdupq_laneq_s64(v1, 0);
+  // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+}
+
+uint8x8_t test_vdup_laneq_u8(uint8x16_t v1) {
+  // CHECK: test_vdup_laneq_u8
+  return vdup_laneq_u8(v1, 5);
+  // CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
+}
+
+uint16x4_t test_vdup_laneq_u16(uint16x8_t v1) {
+  // CHECK: test_vdup_laneq_u16
+  return vdup_laneq_u16(v1, 2);
+  // CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
+}
+
+uint32x2_t test_vdup_laneq_u32(uint32x4_t v1) {
+  // CHECK: test_vdup_laneq_u32
+  return vdup_laneq_u32(v1, 1);
+  // CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+}
+
+uint64x1_t test_vdup_laneq_u64(uint64x2_t v1) {
+  // CHECK: test_vdup_laneq_u64
+  return vdup_laneq_u64(v1, 0);
+  // CHECK: ret
+}
+
+uint8x16_t test_vdupq_laneq_u8(uint8x16_t v1) {
+  // CHECK: test_vdupq_laneq_u8
+  return vdupq_laneq_u8(v1, 5);
+  // CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
+}
+
+uint16x8_t test_vdupq_laneq_u16(uint16x8_t v1) {
+  // CHECK: test_vdupq_laneq_u16
+  return vdupq_laneq_u16(v1, 2);
+  // CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
+}
+
+uint32x4_t test_vdupq_laneq_u32(uint32x4_t v1) {
+  // CHECK: test_vdupq_laneq_u32
+  return vdupq_laneq_u32(v1, 1);
+  // CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+}
+
+uint64x2_t test_vdupq_laneq_u64(uint64x2_t v1) {
+  // CHECK: test_vdupq_laneq_u64
+  return vdupq_laneq_u64(v1, 0);
+  // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+}
+
+poly8x8_t test_vdup_lane_p8(poly8x8_t v1) {
+  // CHECK: test_vdup_lane_p8
+  return vdup_lane_p8(v1, 5);
+  // CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
+}
+
+poly16x4_t test_vdup_lane_p16(poly16x4_t v1) {
+  // CHECK: test_vdup_lane_p16
+  return vdup_lane_p16(v1, 2);
+  // CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
+}
+
+poly8x16_t test_vdupq_lane_p8(poly8x8_t v1) {
+  // CHECK: test_vdupq_lane_p8
+  return vdupq_lane_p8(v1, 5);
+  // CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
+}
+
+poly16x8_t test_vdupq_lane_p16(poly16x4_t v1) {
+  // CHECK: test_vdupq_lane_p16
+  return vdupq_lane_p16(v1, 2);
+  // CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
+}
+
+poly8x8_t test_vdup_laneq_p8(poly8x16_t v1) {
+  // CHECK: test_vdup_laneq_p8
+  return vdup_laneq_p8(v1, 5);
+  // CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
+}
+
+poly16x4_t test_vdup_laneq_p16(poly16x8_t v1) {
+  // CHECK: test_vdup_laneq_p16
+  return vdup_laneq_p16(v1, 2);
+  // CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
+}
+
+poly8x16_t test_vdupq_laneq_p8(poly8x16_t v1) {
+  // CHECK: test_vdupq_laneq_p8
+  return vdupq_laneq_p8(v1, 5);
+  // CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
+}
+
+poly16x8_t test_vdupq_laneq_p16(poly16x8_t v1) {
+  // CHECK: test_vdupq_laneq_p16
+  return vdupq_laneq_p16(v1, 2);
+  // CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
+}
+
+float16x4_t test_vdup_lane_f16(float16x4_t v1) {
+  // CHECK: test_vdup_lane_f16
+  return vdup_lane_f16(v1, 2);
+  // CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
+}
+
+float32x2_t test_vdup_lane_f32(float32x2_t v1) {
+  // CHECK: test_vdup_lane_f32
+  return vdup_lane_f32(v1, 1);
+  // CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+}
+
+float64x1_t test_vdup_lane_f64(float64x1_t v1) {
+  // CHECK: test_vdup_lane_f64
+  return vdup_lane_f64(v1, 0);
+  // CHECK: ret
+}
+
+float16x4_t test_vdup_laneq_f16(float16x8_t v1) {
+  // CHECK: test_vdup_laneq_f16
+  return vdup_laneq_f16(v1, 2);
+  // CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
+}
+
+float32x2_t test_vdup_laneq_f32(float32x4_t v1) {
+  // CHECK: test_vdup_laneq_f32
+  return vdup_laneq_f32(v1, 1);
+  // CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+}
+
+float64x1_t test_vdup_laneq_f64(float64x2_t v1) {
+  // CHECK: test_vdup_laneq_f64
+  return vdup_laneq_f64(v1, 0);
+  // CHECK: ret
+}
+
+float16x8_t test_vdupq_lane_f16(float16x4_t v1) {
+  // CHECK: test_vdupq_lane_f16
+  return vdupq_lane_f16(v1, 2);
+  // CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
+}
+
+float32x4_t test_vdupq_lane_f32(float32x2_t v1) {
+  // CHECK: test_vdupq_lane_f32
+  return vdupq_lane_f32(v1, 1);
+  // CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+}
+
+float64x2_t test_vdupq_lane_f64(float64x1_t v1) {
+  // CHECK: test_vdupq_lane_f64
+  return vdupq_lane_f64(v1, 0);
+  // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+}
+
+float16x8_t test_vdupq_laneq_f16(float16x8_t v1) {
+  // CHECK: test_vdupq_laneq_f16
+  return vdupq_laneq_f16(v1, 2);
+  // CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
+}
+
+float32x4_t test_vdupq_laneq_f32(float32x4_t v1) {
+  // CHECK: test_vdupq_laneq_f32
+  return vdupq_laneq_f32(v1, 1);
+  // CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+}
+
+float64x2_t test_vdupq_laneq_f64(float64x2_t v1) {
+  // CHECK: test_vdupq_laneq_f64
+  return vdupq_laneq_f64(v1, 0);
+  // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+}
+
+int8x8_t test_vmov_n_s8(int8_t v1) {
+  // CHECK: test_vmov_n_s8
+  return vmov_n_s8(v1);
+  // CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
+}
+
+int16x4_t test_vmov_n_s16(int16_t v1) {
+  // CHECK: test_vmov_n_s16
+  return vmov_n_s16(v1);
+  // CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
+}
+
+int32x2_t test_vmov_n_s32(int32_t v1) {
+  // CHECK: test_vmov_n_s32
+  return vmov_n_s32(v1);
+  // CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}}
+}
+
+int64x1_t test_vmov_n_s64(int64_t v1) {
+  // CHECK: test_vmov_n_s64
+  return vmov_n_s64(v1);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+int8x16_t test_vmovq_n_s8(int8_t v1) {
+  // CHECK: test_vmovq_n_s8
+  return vmovq_n_s8(v1);
+  // CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
+}
+
+int16x8_t test_vmovq_n_s16(int16_t v1) {
+  // CHECK: test_vmovq_n_s16
+  return vmovq_n_s16(v1);
+  // CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
+}
+
+int32x4_t test_vmovq_n_s32(int32_t v1) {
+  // CHECK: test_vmovq_n_s32
+  return vmovq_n_s32(v1);
+  // CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
+}
+
+int64x2_t test_vmovq_n_s64(int64_t v1) {
+  // CHECK: test_vmovq_n_s64
+  return vmovq_n_s64(v1);
+  // CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}}
+}
+
+uint8x8_t test_vmov_n_u8(uint8_t v1) {
+  // CHECK: test_vmov_n_u8
+  return vmov_n_u8(v1);
+  // CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
+}
+
+uint16x4_t test_vmov_n_u16(uint16_t v1) {
+  // CHECK: test_vmov_n_u16
+  return vmov_n_u16(v1);
+  // CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
+}
+
+uint32x2_t test_vmov_n_u32(uint32_t v1) {
+  // CHECK: test_vmov_n_u32
+  return vmov_n_u32(v1);
+  // CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}}
+}
+
+uint64x1_t test_vmov_n_u64(uint64_t v1) {
+  // CHECK: test_vmov_n_u64
+  return vmov_n_u64(v1);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+uint8x16_t test_vmovq_n_u8(uint8_t v1) {
+  // CHECK: test_vmovq_n_u8
+  return vmovq_n_u8(v1);
+  // CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
+}
+
+uint16x8_t test_vmovq_n_u16(uint16_t v1) {
+  // CHECK: test_vmovq_n_u16
+  return vmovq_n_u16(v1);
+  // CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
+}
+
+uint32x4_t test_vmovq_n_u32(uint32_t v1) {
+  // CHECK: test_vmovq_n_u32
+  return vmovq_n_u32(v1);
+  // CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
+}
+
+uint64x2_t test_vmovq_n_u64(uint64_t v1) {
+  // CHECK: test_vmovq_n_u64
+  return vmovq_n_u64(v1);
+  // CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}}
+}
+
+poly8x8_t test_vmov_n_p8(poly8_t v1) {
+  // CHECK: test_vmov_n_p8
+  return vmov_n_p8(v1);
+  // CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
+}
+
+poly16x4_t test_vmov_n_p16(poly16_t v1) {
+  // CHECK: test_vmov_n_p16
+  return vmov_n_p16(v1);
+  // CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
+}
+
+poly8x16_t test_vmovq_n_p8(poly8_t v1) {
+  // CHECK: test_vmovq_n_p8
+  return vmovq_n_p8(v1);
+  // CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
+}
+
+poly16x8_t test_vmovq_n_p16(poly16_t v1) {
+  // CHECK: test_vmovq_n_p16
+  return vmovq_n_p16(v1);
+  // CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
+}
+
+float32x2_t test_vmov_n_f32(float32_t v1) {
+  // CHECK: test_vmov_n_f32
+  return vmov_n_f32(v1);
+  // CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+float64x1_t test_vmov_n_f64(float64_t v1) {
+  // CHECK: test_vmov_n_f64
+  return vmov_n_f64(v1);
+  // CHECK: ret
+}
+
+float32x4_t test_vmovq_n_f32(float32_t v1) {
+  // CHECK: test_vmovq_n_f32
+  return vmovq_n_f32(v1);
+  // CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+float64x2_t test_vmovq_n_f64(float64_t v1) {
+  // CHECK: test_vmovq_n_f64
+  return vmovq_n_f64(v1);
+  // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+}
+
+// CHECK: test_vcopy_lane_s64
+int64x1_t test_vcopy_lane_s64(int64x1_t a, int64x1_t c) {
+  return vcopy_lane_s64(a, 0, c, 0);
+// CHECK: fmov {{d[0-9]+}}, {{d[0-9]+}}
+// CHECK-NOT: dup {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+}
+
+// CHECK: test_vcopy_lane_u64
+uint64x1_t test_vcopy_lane_u64(uint64x1_t a, uint64x1_t c) {
+  return vcopy_lane_u64(a, 0, c, 0);
+// CHECK: fmov {{d[0-9]+}}, {{d[0-9]+}}
+// CHECK-NOT: dup {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+}
+
+// CHECK: test_vcopy_lane_f64
+float64x1_t test_vcopy_lane_f64(float64x1_t a, float64x1_t c) {
+  return vcopy_lane_f64(a, 0, c, 0);
+// CHECK: fmov {{d[0-9]+}}, {{d[0-9]+}}
+// CHECK-NOT: dup {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+}
+
+// CHECK: test_vcopy_laneq_s64
+int64x1_t test_vcopy_laneq_s64(int64x1_t a, int64x2_t c) {
+  return vcopy_laneq_s64(a, 0, c, 1);
+// CHECK: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+}
+
+// CHECK: test_vcopy_laneq_u64
+uint64x1_t test_vcopy_laneq_u64(uint64x1_t a, uint64x2_t c) {
+  return vcopy_laneq_u64(a, 0, c, 1);
+// CHECK: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+}
+
+// CHECK: test_vcopy_laneq_f64
+float64x1_t test_vcopy_laneq_f64(float64x1_t a, float64x2_t c) {
+  return vcopy_laneq_f64(a, 0, c, 1);
+// CHECK: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+}
+
+// CHECK: test_vcopy_laneq_p64
+poly64x1_t test_vcopy_laneq_p64(poly64x1_t a, poly64x2_t c) {
+  return vcopy_laneq_p64(a, 0, c, 1);
+// CHECK: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+}
+
+// CHECK: test_vcopyq_laneq_f64
+float64x2_t test_vcopyq_laneq_f64(float64x2_t a, float64x2_t c) {
+// CHECK: ins  {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[1]
+  return vcopyq_laneq_f64(a, 1, c, 1);
+}
+
+// CHECK: test_vget_lane_f16
+int test_vget_lane_f16(float16x4_t v1) {
+  float16_t a = vget_lane_f16(v1, 3);
+  return (int)a;
+// CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[3]
+}
+
+// CHECK: test_vgetq_lane_f16
+int test_vgetq_lane_f16(float16x8_t v1) {
+  float16_t a = vgetq_lane_f16(v1, 7);
+  return (int)a;
+// CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[7]
+}
+
+// CHECK: test_vget_lane_f16_2
+float test_vget_lane_f16_2(float16x4_t v1) {
+  float16_t a = vget_lane_f16(v1, 3);
+  return (float)a;
+// CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[3]
+}
+
+// CHECK: test_vgetq_lane_f16_2
+float test_vgetq_lane_f16_2(float16x8_t v1) {
+  float16_t a = vgetq_lane_f16(v1, 7);
+  return (float)a;
+// CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[7]
+}
+
+// CHECK: test_vset_lane_f16
+float16x4_t test_vset_lane_f16(float16x4_t v1) {
+  float16_t a;
+  return vset_lane_f16(a, v1, 3);
+// CHECK: fmov  {{s[0-9]+}}, wzr
+// CHECK-NEXT: ins {{v[0-9]+}}.h[3],  {{v[0-9]+}}.h[0]
+}
+
+// CHECK: test_vsetq_lane_f16
+float16x8_t test_vsetq_lane_f16(float16x8_t v1) {
+  float16_t a;
+  return vsetq_lane_f16(a, v1, 7);
+// CHECK: fmov  {{s[0-9]+}}, wzr
+// CHECK-NEXT: ins {{v[0-9]+}}.h[7],  {{v[0-9]+}}.h[0]
+}
+
+// CHECK: test_vset_lane_f16_2
+float16x4_t test_vset_lane_f16_2(float16x4_t v1) {
+  float16_t a = vget_lane_f16(v1, 0);
+  return vset_lane_f16(a, v1, 3);
+// CHECK: ins {{v[0-9]+}}.h[3],  {{v[0-9]+}}.h[0]
+}
+
+// CHECK: test_vsetq_lane_f16_2
+float16x8_t test_vsetq_lane_f16_2(float16x8_t v1) {
+  float16_t a = vgetq_lane_f16(v1, 0);
+  return vsetq_lane_f16(a, v1, 7);
+// CHECK: ins {{v[0-9]+}}.h[7],  {{v[0-9]+}}.h[0]
+}
+
+
+// CHECK: test_vsetq_lane_f16_3
+float16x8_t test_vsetq_lane_f16_3(float16x8_t v1, float b, float c) {
+  float16_t a = (float16_t)b;
+  return vsetq_lane_f16(a, v1, 7);
+// CHECK: ins {{v[0-9]+}}.h[7],  {{w[0-9]+}}
+}
+
+// CHECK: test_vsetq_lane_f16_4
+float16x8_t test_vsetq_lane_f16_4(float16x8_t v1, float b, float c) {
+  float16_t a = (float16_t)b + 1.0;
+  return vsetq_lane_f16(a, v1, 7);
+// CHECK: ins {{v[0-9]+}}.h[7],  {{w[0-9]+}}
+}
+
diff --git a/test/CodeGen/aarch64-neon-crypto.c b/test/CodeGen/aarch64-neon-crypto.c
new file mode 100644
index 0000000..240f379
--- /dev/null
+++ b/test/CodeGen/aarch64-neon-crypto.c
@@ -0,0 +1,94 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
+// RUN:   -target-feature +crypto -S -O3 -o - %s | FileCheck %s
+// RUN: not %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
+// RUN:   -S -O3 -o - %s 2>&1 | FileCheck --check-prefix=CHECK-NO-CRYPTO %s
+
+// Test new aarch64 intrinsics and types
+
+#include <arm_neon.h>
+
+uint8x16_t test_vaeseq_u8(uint8x16_t data, uint8x16_t key) {
+  // CHECK: test_vaeseq_u8
+  // CHECK-NO-CRYPTO: warning: implicit declaration of function 'vaeseq_u8' is invalid in C99
+  return vaeseq_u8(data, key);
+  // CHECK: aese {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint8x16_t test_vaesdq_u8(uint8x16_t data, uint8x16_t key) {
+  // CHECK: test_vaesdq_u8
+  return vaesdq_u8(data, key);
+  // CHECK: aesd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint8x16_t test_vaesmcq_u8(uint8x16_t data) {
+  // CHECK: test_vaesmcq_u8
+  return vaesmcq_u8(data);
+  // CHECK: aesmc {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint8x16_t test_vaesimcq_u8(uint8x16_t data) {
+  // CHECK: test_vaesimcq_u8
+  return vaesimcq_u8(data);
+  // CHECK: aesimc {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint32_t test_vsha1h_u32(uint32_t hash_e) {
+  // CHECK: test_vsha1h_u32
+  return vsha1h_u32(hash_e);
+  // CHECK: sha1h {{s[0-9]+}}, {{s[0-9]+}}
+}
+
+uint32x4_t test_vsha1su1q_u32(uint32x4_t tw0_3, uint32x4_t w12_15) {
+  // CHECK: test_vsha1su1q_u32
+  return vsha1su1q_u32(tw0_3, w12_15);
+  // CHECK: sha1su1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint32x4_t test_vsha256su0q_u32(uint32x4_t w0_3, uint32x4_t w4_7) {
+  // CHECK: test_vsha256su0q_u32
+  return vsha256su0q_u32(w0_3, w4_7);
+  // CHECK: sha256su0 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint32x4_t test_vsha1cq_u32(uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) {
+  // CHECK: test_vsha1cq_u32
+  return vsha1cq_u32(hash_abcd, hash_e, wk);
+  // CHECK: sha1c {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s
+}
+
+uint32x4_t test_vsha1pq_u32(uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) {
+  // CHECK: test_vsha1pq_u32
+  return vsha1pq_u32(hash_abcd, hash_e, wk);
+  // CHECK: sha1p {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s
+}
+
+uint32x4_t test_vsha1mq_u32(uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) {
+  // CHECK: test_vsha1mq_u32
+  return vsha1mq_u32(hash_abcd, hash_e, wk);
+  // CHECK: sha1m {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s
+}
+
+uint32x4_t test_vsha1su0q_u32(uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11) {
+  // CHECK: test_vsha1su0q_u32
+  return vsha1su0q_u32(w0_3, w4_7, w8_11);
+  // CHECK: sha1su0 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint32x4_t test_vsha256hq_u32(uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk) {
+  // CHECK: test_vsha256hq_u32
+  return vsha256hq_u32(hash_abcd, hash_efgh, wk);
+  // CHECK: sha256h {{q[0-9]+}}, {{q[0-9]+}}, {{v[0-9]+}}.4s
+}
+
+uint32x4_t test_vsha256h2q_u32(uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk) {
+  // CHECK: test_vsha256h2q_u32
+  return vsha256h2q_u32(hash_efgh, hash_abcd, wk);
+  // CHECK: sha256h2 {{q[0-9]+}}, {{q[0-9]+}}, {{v[0-9]+}}.4s
+}
+
+uint32x4_t test_vsha256su1q_u32(uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15) {
+  // CHECK: test_vsha256su1q_u32
+  return vsha256su1q_u32(tw0_3, w8_11, w12_15);
+  // CHECK: sha256su1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
diff --git a/test/CodeGen/aarch64-neon-extract.c b/test/CodeGen/aarch64-neon-extract.c
new file mode 100644
index 0000000..faf35af
--- /dev/null
+++ b/test/CodeGen/aarch64-neon-extract.c
@@ -0,0 +1,148 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
+// RUN:   -ffp-contract=fast -S -O3 -o - %s | FileCheck %s
+
+// Test new aarch64 intrinsics and types
+
+#include <arm_neon.h>
+
+int8x8_t test_vext_s8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vext_s8
+  return vext_s8(a, b, 2);
+  // CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x2
+}
+
+int16x4_t test_vext_s16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vext_s16
+  return vext_s16(a, b, 3);
+  // CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x6
+}
+
+int32x2_t test_vext_s32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vext_s32
+  return vext_s32(a, b, 1);
+  // CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x4
+}
+
+int64x1_t test_vext_s64(int64x1_t a, int64x1_t b) {
+  // CHECK: test_vext_s64
+  return vext_s64(a, b, 0);
+}
+
+int8x16_t test_vextq_s8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vextq_s8
+  return vextq_s8(a, b, 2);
+  // CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x2
+}
+
+int16x8_t test_vextq_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vextq_s16
+  return vextq_s16(a, b, 3);
+  // CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x6
+}
+
+int32x4_t test_vextq_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vextq_s32
+  return vextq_s32(a, b, 1);
+  // CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x4
+}
+
+int64x2_t test_vextq_s64(int64x2_t a, int64x2_t b) {
+  // CHECK: test_vextq_s64
+  return vextq_s64(a, b, 1);
+  // CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x8
+}
+
+uint8x8_t test_vext_u8(uint8x8_t a, uint8x8_t b) {
+  // CHECK: test_vext_u8
+  return vext_u8(a, b, 2);
+  // CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x2
+}
+
+uint16x4_t test_vext_u16(uint16x4_t a, uint16x4_t b) {
+  // CHECK: test_vext_u16
+  return vext_u16(a, b, 3);
+  // CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x6
+}
+
+uint32x2_t test_vext_u32(uint32x2_t a, uint32x2_t b) {
+  // CHECK: test_vext_u32
+  return vext_u32(a, b, 1);
+  // CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x4
+}
+
+uint64x1_t test_vext_u64(uint64x1_t a, uint64x1_t b) {
+  // CHECK: test_vext_u64
+  return vext_u64(a, b, 0);
+}
+
+uint8x16_t test_vextq_u8(uint8x16_t a, uint8x16_t b) {
+  // CHECK: test_vextq_u8
+  return vextq_u8(a, b, 2);
+  // CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x2
+}
+
+uint16x8_t test_vextq_u16(uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vextq_u16
+  return vextq_u16(a, b, 3);
+  // CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x6
+}
+
+uint32x4_t test_vextq_u32(uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vextq_u32
+  return vextq_u32(a, b, 1);
+  // CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x4
+}
+
+uint64x2_t test_vextq_u64(uint64x2_t a, uint64x2_t b) {
+  // CHECK: test_vextq_u64
+  return vextq_u64(a, b, 1);
+  // CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x8
+}
+
+float32x2_t test_vext_f32(float32x2_t a, float32x2_t b) {
+  // CHECK: test_vext_f32
+  return vext_f32(a, b, 1);
+  // CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x4
+}
+
+float64x1_t test_vext_f64(float64x1_t a, float64x1_t b) {
+  // CHECK: test_vext_f64
+  return vext_f64(a, b, 0);
+}
+
+float32x4_t test_vextq_f32(float32x4_t a, float32x4_t b) {
+  // CHECK: test_vextq_f32
+  return vextq_f32(a, b, 1);
+  // CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x4
+}
+
+float64x2_t test_vextq_f64(float64x2_t a, float64x2_t b) {
+  // CHECK: test_vextq_f64
+  return vextq_f64(a, b, 1);
+  // CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x8
+}
+
+poly8x8_t test_vext_p8(poly8x8_t a, poly8x8_t b) {
+  // CHECK: test_vext_p8
+  return vext_p8(a, b, 2);
+  // CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x2
+}
+
+poly16x4_t test_vext_p16(poly16x4_t a, poly16x4_t b) {
+  // CHECK: test_vext_p16
+  return vext_p16(a, b, 3);
+  // CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x6
+}
+
+poly8x16_t test_vextq_p8(poly8x16_t a, poly8x16_t b) {
+  // CHECK: test_vextq_p8
+  return vextq_p8(a, b, 2);
+  // CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x2
+}
+
+poly16x8_t test_vextq_p16(poly16x8_t a, poly16x8_t b) {
+  // CHECK: test_vextq_p16
+  return vextq_p16(a, b, 3);
+  // CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x6
+}
diff --git a/test/CodeGen/aarch64-neon-fcvt-intrinsics.c b/test/CodeGen/aarch64-neon-fcvt-intrinsics.c
new file mode 100644
index 0000000..98f1389
--- /dev/null
+++ b/test/CodeGen/aarch64-neon-fcvt-intrinsics.c
@@ -0,0 +1,133 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
+// RUN:   -ffp-contract=fast -S -O3 -o - %s | FileCheck %s
+
+// Test new aarch64 intrinsics and types
+
+#include <arm_neon.h>
+
+float32_t test_vcvtxd_f32_f64(float64_t a) {
+// CHECK: test_vcvtxd_f32_f64
+// CHECK: fcvtxn {{s[0-9]+}}, {{d[0-9]+}}
+  return (float32_t)vcvtxd_f32_f64(a);
+}
+
+int32_t test_vcvtas_s32_f32(float32_t a) {
+// CHECK: test_vcvtas_s32_f32
+// CHECK: fcvtas {{s[0-9]+}}, {{s[0-9]+}}
+  return (int32_t)vcvtas_s32_f32(a);
+}
+
+int64_t test_test_vcvtad_s64_f64(float64_t a) {
+// CHECK: test_test_vcvtad_s64_f64
+// CHECK: fcvtas {{d[0-9]+}}, {{d[0-9]+}}
+  return (int64_t)vcvtad_s64_f64(a);
+}
+
+uint32_t test_vcvtas_u32_f32(float32_t a) {
+// CHECK: test_vcvtas_u32_f32
+// CHECK: fcvtau {{s[0-9]+}}, {{s[0-9]+}}
+  return (uint32_t)vcvtas_u32_f32(a);
+}
+
+uint64_t test_vcvtad_u64_f64(float64_t a) {
+// CHECK: test_vcvtad_u64_f64
+// CHECK: fcvtau {{d[0-9]+}}, {{d[0-9]+}}
+  return (uint64_t)vcvtad_u64_f64(a);
+}
+
+int32_t test_vcvtms_s32_f32(float32_t a) {
+// CHECK: test_vcvtms_s32_f32
+// CHECK: fcvtms {{s[0-9]+}}, {{s[0-9]+}}
+  return (int32_t)vcvtms_s32_f32(a);
+}
+
+int64_t test_vcvtmd_s64_f64(float64_t a) {
+// CHECK: test_vcvtmd_s64_f64
+// CHECK: fcvtms {{d[0-9]+}}, {{d[0-9]+}}
+  return (int64_t)vcvtmd_s64_f64(a);
+}
+
+uint32_t test_vcvtms_u32_f32(float32_t a) {
+// CHECK: test_vcvtms_u32_f32
+// CHECK: fcvtmu {{s[0-9]+}}, {{s[0-9]+}}
+  return (uint32_t)vcvtms_u32_f32(a);
+}
+
+uint64_t test_vcvtmd_u64_f64(float64_t a) {
+// CHECK: test_vcvtmd_u64_f64
+// CHECK: fcvtmu {{d[0-9]+}}, {{d[0-9]+}}
+  return (uint64_t)vcvtmd_u64_f64(a);
+}
+
+int32_t test_vcvtns_s32_f32(float32_t a) {
+// CHECK: test_vcvtns_s32_f32
+// CHECK: fcvtns {{s[0-9]+}}, {{s[0-9]+}}
+  return (int32_t)vcvtns_s32_f32(a);
+}
+
+int64_t test_vcvtnd_s64_f64(float64_t a) {
+// CHECK: test_vcvtnd_s64_f64
+// CHECK: fcvtns {{d[0-9]+}}, {{d[0-9]+}}
+  return (int64_t)vcvtnd_s64_f64(a);
+}
+
+uint32_t test_vcvtns_u32_f32(float32_t a) {
+// CHECK: test_vcvtns_u32_f32
+// CHECK: fcvtnu {{s[0-9]+}}, {{s[0-9]+}}
+  return (uint32_t)vcvtns_u32_f32(a);
+}
+
+uint64_t test_vcvtnd_u64_f64(float64_t a) {
+// CHECK: test_vcvtnd_u64_f64
+// CHECK: fcvtnu {{d[0-9]+}}, {{d[0-9]+}}
+  return (uint64_t)vcvtnd_u64_f64(a);
+}
+
+int32_t test_vcvtps_s32_f32(float32_t a) {
+// CHECK: test_vcvtps_s32_f32
+// CHECK: fcvtps {{s[0-9]+}}, {{s[0-9]+}}
+  return (int32_t)vcvtps_s32_f32(a);
+}
+
+int64_t test_vcvtpd_s64_f64(float64_t a) {
+// CHECK: test_vcvtpd_s64_f64
+// CHECK: fcvtps {{d[0-9]+}}, {{d[0-9]+}}
+  return (int64_t)vcvtpd_s64_f64(a);
+}
+
+uint32_t test_vcvtps_u32_f32(float32_t a) {
+// CHECK: test_vcvtps_u32_f32
+// CHECK: fcvtpu {{s[0-9]+}}, {{s[0-9]+}}
+  return (uint32_t)vcvtps_u32_f32(a);
+}
+
+uint64_t test_vcvtpd_u64_f64(float64_t a) {
+// CHECK: test_vcvtpd_u64_f64
+// CHECK: fcvtpu {{d[0-9]+}}, {{d[0-9]+}}
+  return (uint64_t)vcvtpd_u64_f64(a);
+}
+
+int32_t test_vcvts_s32_f32(float32_t a) {
+// CHECK: test_vcvts_s32_f32
+// CHECK: fcvtzs {{s[0-9]+}}, {{s[0-9]+}}
+  return (int32_t)vcvts_s32_f32(a);
+}
+
+int64_t test_vcvtd_s64_f64(float64_t a) {
+// CHECK: test_vcvtd_s64_f64
+// CHECK: fcvtzs {{d[0-9]+}}, {{d[0-9]+}}
+  return (int64_t)vcvtd_s64_f64(a);
+}
+
+uint32_t test_vcvts_u32_f32(float32_t a) {
+// CHECK: test_vcvts_u32_f32
+// CHECK: fcvtzu {{s[0-9]+}}, {{s[0-9]+}}
+  return (uint32_t)vcvts_u32_f32(a);
+}
+
+uint64_t test_vcvtd_u64_f64(float64_t a) {
+// CHECK: test_vcvtd_u64_f64
+// CHECK: fcvtzu {{d[0-9]+}}, {{d[0-9]+}}
+  return (uint64_t)vcvtd_u64_f64(a);
+}
diff --git a/test/CodeGen/aarch64-neon-intrinsics.c b/test/CodeGen/aarch64-neon-intrinsics.c
new file mode 100644
index 0000000..6e9b7f1
--- /dev/null
+++ b/test/CodeGen/aarch64-neon-intrinsics.c
@@ -0,0 +1,11725 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
+// RUN:   -ffp-contract=fast -S -O3 -o - %s | FileCheck %s
+
+// Test new aarch64 intrinsics and types
+
+#include <arm_neon.h>
+
+int8x8_t test_vadd_s8(int8x8_t v1, int8x8_t v2) {
+   // CHECK: test_vadd_s8
+  return vadd_s8(v1, v2);
+  // CHECK: add {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4_t test_vadd_s16(int16x4_t v1, int16x4_t v2) {
+   // CHECK: test_vadd_s16
+  return vadd_s16(v1, v2);
+  // CHECK: add {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vadd_s32(int32x2_t v1, int32x2_t v2) {
+   // CHECK: test_vadd_s32
+  return vadd_s32(v1, v2);
+  // CHECK: add {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int64x1_t test_vadd_s64(int64x1_t v1, int64x1_t v2) {
+  // CHECK: test_vadd_s64
+  return vadd_s64(v1, v2);
+  // CHECK: add {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+float32x2_t test_vadd_f32(float32x2_t v1, float32x2_t v2) {
+   // CHECK: test_vadd_f32
+  return vadd_f32(v1, v2);
+  // CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x8_t test_vadd_u8(uint8x8_t v1, uint8x8_t v2) {
+   // CHECK: test_vadd_u8
+  return vadd_u8(v1, v2);
+  // CHECK: add {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vadd_u16(uint16x4_t v1, uint16x4_t v2) {
+   // CHECK: test_vadd_u16
+  return vadd_u16(v1, v2);
+  // CHECK: add {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vadd_u32(uint32x2_t v1, uint32x2_t v2) {
+   // CHECK: test_vadd_u32
+  return vadd_u32(v1, v2);
+  // CHECK: add {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint64x1_t test_vadd_u64(uint64x1_t v1, uint64x1_t v2) {
+   // CHECK: test_vadd_u64
+  return vadd_u64(v1, v2);
+  // CHECK: add {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+int8x16_t test_vaddq_s8(int8x16_t v1, int8x16_t v2) {
+   // CHECK: test_vaddq_s8
+  return vaddq_s8(v1, v2);
+  // CHECK: add {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vaddq_s16(int16x8_t v1, int16x8_t v2) {
+   // CHECK: test_vaddq_s16
+  return vaddq_s16(v1, v2);
+  // CHECK: add {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vaddq_s32(int32x4_t v1,int32x4_t  v2) {
+   // CHECK: test_vaddq_s32
+  return vaddq_s32(v1, v2);
+  // CHECK: add {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int64x2_t test_vaddq_s64(int64x2_t v1, int64x2_t v2) {
+   // CHECK: test_vaddq_s64
+  return vaddq_s64(v1, v2);
+  // CHECK: add {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+float32x4_t test_vaddq_f32(float32x4_t v1, float32x4_t v2) {
+   // CHECK: test_vaddq_f32
+  return vaddq_f32(v1, v2);
+  // CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vaddq_f64(float64x2_t v1, float64x2_t v2) {
+  // CHECK: test_vaddq_f64
+  return vaddq_f64(v1, v2);
+  // CHECK: fadd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint8x16_t test_vaddq_u8(uint8x16_t v1, uint8x16_t v2) {
+   // CHECK: test_vaddq_u8
+  return vaddq_u8(v1, v2);
+  // CHECK: add {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vaddq_u16(uint16x8_t v1, uint16x8_t v2) {
+   // CHECK: test_vaddq_u16
+  return vaddq_u16(v1, v2);
+  // CHECK: add {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vaddq_u32(uint32x4_t v1, uint32x4_t v2) {
+   // CHECK: vaddq_u32
+  return vaddq_u32(v1, v2);
+  // CHECK: add {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vaddq_u64(uint64x2_t v1, uint64x2_t v2) {
+   // CHECK: test_vaddq_u64
+  return vaddq_u64(v1, v2);
+  // CHECK: add {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+int8x8_t test_vsub_s8(int8x8_t v1, int8x8_t v2) {
+   // CHECK: test_vsub_s8
+  return vsub_s8(v1, v2);
+  // CHECK: sub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+int16x4_t test_vsub_s16(int16x4_t v1, int16x4_t v2) {
+   // CHECK: test_vsub_s16
+  return vsub_s16(v1, v2);
+  // CHECK: sub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+int32x2_t test_vsub_s32(int32x2_t v1, int32x2_t v2) {
+   // CHECK: test_vsub_s32
+  return vsub_s32(v1, v2);
+  // CHECK: sub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int64x1_t test_vsub_s64(int64x1_t v1, int64x1_t v2) {
+   // CHECK: test_vsub_s64
+  return vsub_s64(v1, v2);
+  // CHECK: sub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+float32x2_t test_vsub_f32(float32x2_t v1, float32x2_t v2) {
+   // CHECK: test_vsub_f32
+  return vsub_f32(v1, v2);
+  // CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x8_t test_vsub_u8(uint8x8_t v1, uint8x8_t v2) {
+   // CHECK: test_vsub_u8
+  return vsub_u8(v1, v2);
+  // CHECK: sub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vsub_u16(uint16x4_t v1, uint16x4_t v2) {
+   // CHECK: test_vsub_u16
+  return vsub_u16(v1, v2);
+  // CHECK: sub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vsub_u32(uint32x2_t v1, uint32x2_t v2) {
+   // CHECK: test_vsub_u32
+  return vsub_u32(v1, v2);
+  // CHECK: sub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint64x1_t test_vsub_u64(uint64x1_t v1, uint64x1_t v2) {
+   // CHECK: test_vsub_u64
+  return vsub_u64(v1, v2);
+  // CHECK: sub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+int8x16_t test_vsubq_s8(int8x16_t v1, int8x16_t v2) {
+   // CHECK: test_vsubq_s8
+  return vsubq_s8(v1, v2);
+  // CHECK: sub {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vsubq_s16(int16x8_t v1, int16x8_t v2) {
+   // CHECK: test_vsubq_s16
+  return vsubq_s16(v1, v2);
+  // CHECK: sub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vsubq_s32(int32x4_t v1,int32x4_t  v2) {
+   // CHECK: test_vsubq_s32
+  return vsubq_s32(v1, v2);
+  // CHECK: sub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int64x2_t test_vsubq_s64(int64x2_t v1, int64x2_t v2) {
+   // CHECK: test_vsubq_s64
+  return vsubq_s64(v1, v2);
+  // CHECK: sub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+float32x4_t test_vsubq_f32(float32x4_t v1, float32x4_t v2) {
+   // CHECK: test_vsubq_f32
+  return vsubq_f32(v1, v2);
+  // CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vsubq_f64(float64x2_t v1, float64x2_t v2) {
+  // CHECK: test_vsubq_f64
+  return vsubq_f64(v1, v2);
+  // CHECK: fsub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint8x16_t test_vsubq_u8(uint8x16_t v1, uint8x16_t v2) {
+   // CHECK: test_vsubq_u8
+  return vsubq_u8(v1, v2);
+  // CHECK: sub {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vsubq_u16(uint16x8_t v1, uint16x8_t v2) {
+   // CHECK: test_vsubq_u16
+  return vsubq_u16(v1, v2);
+  // CHECK: sub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vsubq_u32(uint32x4_t v1, uint32x4_t v2) {
+   // CHECK: vsubq_u32
+  return vsubq_u32(v1, v2);
+  // CHECK: sub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vsubq_u64(uint64x2_t v1, uint64x2_t v2) {
+   // CHECK: test_vsubq_u64
+  return vsubq_u64(v1, v2);
+  // CHECK: sub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+int8x8_t test_vmul_s8(int8x8_t v1, int8x8_t v2) {
+  // CHECK: test_vmul_s8
+  return vmul_s8(v1, v2);
+  // CHECK: mul {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4_t test_vmul_s16(int16x4_t v1, int16x4_t v2) {
+  // CHECK: test_vmul_s16
+  return vmul_s16(v1, v2);
+  // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vmul_s32(int32x2_t v1, int32x2_t v2) {
+  // CHECK: test_vmul_s32
+  return vmul_s32(v1, v2);
+  // CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+float32x2_t test_vmul_f32(float32x2_t v1, float32x2_t v2) {
+  // CHECK: test_vmul_f32
+  return vmul_f32(v1, v2);
+  // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+
+uint8x8_t test_vmul_u8(uint8x8_t v1, uint8x8_t v2) {
+  // CHECK: test_vmul_u8
+  return vmul_u8(v1, v2);
+  // CHECK: mul {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vmul_u16(uint16x4_t v1, uint16x4_t v2) {
+  // CHECK: test_vmul_u16
+  return vmul_u16(v1, v2);
+  // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vmul_u32(uint32x2_t v1, uint32x2_t v2) {
+  // CHECK: test_vmul_u32
+  return vmul_u32(v1, v2);
+  // CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int8x16_t test_vmulq_s8(int8x16_t v1, int8x16_t v2) {
+  // CHECK: test_vmulq_s8
+  return vmulq_s8(v1, v2);
+  // CHECK: mul {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vmulq_s16(int16x8_t v1, int16x8_t v2) {
+  // CHECK: test_vmulq_s16
+  return vmulq_s16(v1, v2);
+  // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vmulq_s32(int32x4_t v1, int32x4_t v2) {
+  // CHECK: test_vmulq_s32
+  return vmulq_s32(v1, v2);
+  // CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+    
+uint8x16_t test_vmulq_u8(uint8x16_t v1, uint8x16_t v2) {
+  // CHECK: test_vmulq_u8
+  return vmulq_u8(v1, v2);
+  // CHECK: mul {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vmulq_u16(uint16x8_t v1, uint16x8_t v2) {
+  // CHECK: test_vmulq_u16
+  return vmulq_u16(v1, v2);
+  // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vmulq_u32(uint32x4_t v1, uint32x4_t v2) {
+  // CHECK: test_vmulq_u32
+  return vmulq_u32(v1, v2);
+  // CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float32x4_t test_vmulq_f32(float32x4_t v1, float32x4_t v2) {
+  // CHECK: test_vmulq_f32
+  return vmulq_f32(v1, v2);
+  // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vmulq_f64(float64x2_t v1, float64x2_t v2) {
+  // CHECK: test_vmulq_f64
+  return vmulq_f64(v1, v2);
+  // CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+poly8x8_t test_vmul_p8(poly8x8_t v1, poly8x8_t v2) {
+  //  test_vmul_p8
+  return vmul_p8(v1, v2);
+  //  pmul {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+poly8x16_t test_vmulq_p8(poly8x16_t v1, poly8x16_t v2) {
+  // test_vmulq_p8
+  return vmulq_p8(v1, v2);
+  // pmul {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+
+int8x8_t test_vmla_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
+  // CHECK: test_vmla_s8
+  return vmla_s8(v1, v2, v3);
+  // CHECK: mla {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vmla_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
+  // CHECK: test_vmla_s16
+  return vmla_s16(v1, v2, v3);
+  // CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vmla_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
+  // CHECK: test_vmla_s32
+  return vmla_s32(v1, v2, v3);
+  // CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+float32x2_t test_vmla_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
+  // CHECK: test_vmla_f32
+  return vmla_f32(v1, v2, v3);
+  // CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x8_t test_vmla_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
+  // CHECK: test_vmla_u8
+  return vmla_u8(v1, v2, v3);
+  // CHECK: mla {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vmla_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
+  // CHECK: test_vmla_u16
+  return vmla_u16(v1, v2, v3);
+  // CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vmla_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
+  // CHECK: test_vmla_u32
+  return vmla_u32(v1, v2, v3);
+  // CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int8x16_t test_vmlaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
+  // CHECK: test_vmlaq_s8
+  return vmlaq_s8(v1, v2, v3);
+  // CHECK: mla {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vmlaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
+  // CHECK: test_vmlaq_s16
+  return vmlaq_s16(v1, v2, v3);
+  // CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vmlaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
+  // CHECK: test_vmlaq_s32
+  return vmlaq_s32(v1, v2, v3);
+  // CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+} 
+
+float32x4_t test_vmlaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
+  // CHECK: test_vmlaq_f32
+  return vmlaq_f32(v1, v2, v3);
+  // CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint8x16_t test_vmlaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
+   // CHECK: test_vmlaq_u8
+  return vmlaq_u8(v1, v2, v3);
+  // CHECK: mla {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vmlaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
+  // CHECK: test_vmlaq_u16
+  return vmlaq_u16(v1, v2, v3);
+  // CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vmlaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
+  // CHECK: test_vmlaq_u32
+  return vmlaq_u32(v1, v2, v3);
+  // CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vmlaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
+  // CHECK: test_vmlaq_f64
+  return vmlaq_f64(v1, v2, v3);
+  // CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+int8x8_t test_vmls_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
+  // CHECK: test_vmls_s8
+  return vmls_s8(v1, v2, v3);
+  // CHECK: mls {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vmls_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
+  // CHECK: test_vmls_s16
+  return vmls_s16(v1, v2, v3);
+  // CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vmls_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
+  // CHECK: test_vmls_s32
+  return vmls_s32(v1, v2, v3);
+  // CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+float32x2_t test_vmls_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
+  // CHECK: test_vmls_f32
+  return vmls_f32(v1, v2, v3);
+  // CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x8_t test_vmls_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
+  // CHECK: test_vmls_u8
+  return vmls_u8(v1, v2, v3);
+  // CHECK: mls {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vmls_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
+  // CHECK: test_vmls_u16
+  return vmls_u16(v1, v2, v3);
+  // CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vmls_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
+  // CHECK: test_vmls_u32
+  return vmls_u32(v1, v2, v3);
+  // CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+int8x16_t test_vmlsq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
+  // CHECK: test_vmlsq_s8
+  return vmlsq_s8(v1, v2, v3);
+  // CHECK: mls {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vmlsq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
+  // CHECK: test_vmlsq_s16
+  return vmlsq_s16(v1, v2, v3);
+  // CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vmlsq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
+  // CHECK: test_vmlsq_s32
+  return vmlsq_s32(v1, v2, v3);
+  // CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float32x4_t test_vmlsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
+  // CHECK: test_vmlsq_f32
+  return vmlsq_f32(v1, v2, v3);
+  // CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+uint8x16_t test_vmlsq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
+  // CHECK: test_vmlsq_u8
+  return vmlsq_u8(v1, v2, v3);
+  // CHECK: mls {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vmlsq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
+  // CHECK: test_vmlsq_u16
+  return vmlsq_u16(v1, v2, v3);
+  // CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vmlsq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
+  // CHECK: test_vmlsq_u32
+  return vmlsq_u32(v1, v2, v3);
+  // CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vmlsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
+  // CHECK: test_vmlsq_f64
+  return vmlsq_f64(v1, v2, v3);
+  // CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+float32x2_t test_vfma_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
+  // CHECK: test_vfma_f32
+  return vfma_f32(v1, v2, v3);
+  // CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+float32x4_t test_vfmaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
+  // CHECK: test_vfmaq_f32
+  return vfmaq_f32(v1, v2, v3);
+  // CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vfmaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
+  // CHECK: test_vfmaq_f64
+  return vfmaq_f64(v1, v2, v3);
+  // CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+float32x2_t test_vfms_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
+  // CHECK: test_vfms_f32
+  return vfms_f32(v1, v2, v3);
+  // CHECK: fmls v0.2s, v1.2s, v2.2s
+}
+
+float32x4_t test_vfmsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
+  // CHECK: test_vfmsq_f32
+  return vfmsq_f32(v1, v2, v3);
+  // CHECK: fmls v0.4s, v1.4s, v2.4s
+}
+
+float64x2_t test_vfmsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
+  // CHECK: vfmsq_f64
+  return vfmsq_f64(v1, v2, v3);
+  // CHECK: fmls v0.2d, v1.2d, v2.2d
+}
+
+float64x2_t test_vdivq_f64(float64x2_t v1, float64x2_t v2) {
+  // CHECK: test_vdivq_f64
+  return vdivq_f64(v1, v2);
+  // CHECK: fdiv {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+float32x4_t test_vdivq_f32(float32x4_t v1, float32x4_t v2) {
+  // CHECK: test_vdivq_f32
+  return vdivq_f32(v1, v2);
+  // CHECK: fdiv {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float32x2_t test_vdiv_f32(float32x2_t v1, float32x2_t v2) {
+  // CHECK: test_vdiv_f32
+  return vdiv_f32(v1, v2);
+  // CHECK: fdiv {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int8x8_t test_vaba_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
+  // CHECK: test_vaba_s8
+  return vaba_s8(v1, v2, v3);
+  // CHECK: saba {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4_t test_vaba_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
+  // CHECK: test_vaba_s16
+  return vaba_s16(v1, v2, v3);
+  // CHECK: saba {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vaba_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
+  // CHECK: test_vaba_s32
+  return vaba_s32(v1, v2, v3);
+  // CHECK: saba {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x8_t test_vaba_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
+  // CHECK: test_vaba_u8
+  return vaba_u8(v1, v2, v3);
+  // CHECK: uaba {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vaba_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
+  // CHECK: test_vaba_u16
+  return vaba_u16(v1, v2, v3);
+  // CHECK: uaba {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vaba_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
+  // CHECK: test_vaba_u32
+  return vaba_u32(v1, v2, v3);
+  // CHECK: uaba {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int8x16_t test_vabaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
+  // CHECK: test_vabaq_s8
+  return vabaq_s8(v1, v2, v3);
+  // CHECK: saba {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vabaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
+  // CHECK: test_vabaq_s16
+  return vabaq_s16(v1, v2, v3);
+  // CHECK: saba {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vabaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
+  // CHECK: test_vabaq_s32
+  return vabaq_s32(v1, v2, v3);
+  // CHECK: saba {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint8x16_t test_vabaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
+  // CHECK: test_vabaq_u8
+  return vabaq_u8(v1, v2, v3);
+  // CHECK: uaba {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vabaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
+  // CHECK: test_vabaq_u16
+  return vabaq_u16(v1, v2, v3);
+  // CHECK: uaba {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vabaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
+  // CHECK: test_vabaq_u32
+  return vabaq_u32(v1, v2, v3);
+  // CHECK: uaba {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int8x8_t test_vabd_s8(int8x8_t v1, int8x8_t v2) {
+  // CHECK: test_vabd_s8
+  return vabd_s8(v1, v2);
+  // CHECK: sabd {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4_t test_vabd_s16(int16x4_t v1, int16x4_t v2) {
+  // CHECK: test_vabd_s16
+  return vabd_s16(v1, v2);
+  // CHECK: sabd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vabd_s32(int32x2_t v1, int32x2_t v2) {
+  // CHECK: test_vabd_s32
+  return vabd_s32(v1, v2);
+  // CHECK: sabd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x8_t test_vabd_u8(uint8x8_t v1, uint8x8_t v2) {
+  // CHECK: test_vabd_u8
+  return vabd_u8(v1, v2);
+  // CHECK: uabd {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vabd_u16(uint16x4_t v1, uint16x4_t v2) {
+  // CHECK: test_vabd_u16
+  return vabd_u16(v1, v2);
+  // CHECK: uabd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vabd_u32(uint32x2_t v1, uint32x2_t v2) {
+  // CHECK: test_vabd_u32
+  return vabd_u32(v1, v2);
+  // CHECK: uabd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+float32x2_t test_vabd_f32(float32x2_t v1, float32x2_t v2) {
+  // CHECK: test_vabd_f32
+  return vabd_f32(v1, v2);
+  // CHECK: fabd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int8x16_t test_vabdq_s8(int8x16_t v1, int8x16_t v2) {
+  // CHECK: test_vabdq_s8
+  return vabdq_s8(v1, v2);
+  // CHECK: sabd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vabdq_s16(int16x8_t v1, int16x8_t v2) {
+  // CHECK: test_vabdq_s16
+  return vabdq_s16(v1, v2);
+  // CHECK: sabd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vabdq_s32(int32x4_t v1, int32x4_t v2) {
+  // CHECK: test_vabdq_s32
+  return vabdq_s32(v1, v2);
+  // CHECK: sabd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint8x16_t test_vabdq_u8(uint8x16_t v1, uint8x16_t v2) {
+  // CHECK: test_vabdq_u8
+  return vabdq_u8(v1, v2);
+  // CHECK: uabd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vabdq_u16(uint16x8_t v1, uint16x8_t v2) {
+  // CHECK: test_vabdq_u16
+  return vabdq_u16(v1, v2);
+  // CHECK: uabd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vabdq_u32(uint32x4_t v1, uint32x4_t v2) {
+  // CHECK: test_vabdq_u32
+  return vabdq_u32(v1, v2);
+  // CHECK: uabd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float32x4_t test_vabdq_f32(float32x4_t v1, float32x4_t v2) {
+  // CHECK: test_vabdq_f32
+  return vabdq_f32(v1, v2);
+  // CHECK: fabd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vabdq_f64(float64x2_t v1, float64x2_t v2) {
+  // CHECK: test_vabdq_f64
+  return vabdq_f64(v1, v2);
+  // CHECK: fabd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+
+int8x8_t test_vbsl_s8(uint8x8_t v1, int8x8_t v2, int8x8_t v3) {
+  // CHECK: test_vbsl_s8
+  return vbsl_s8(v1, v2, v3);
+  // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vbsl_s16(uint16x4_t v1, int16x4_t v2, int16x4_t v3) {
+  // CHECK: test_vbsl_s16
+  return vbsl_s16(v1, v2, v3);
+  // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int32x2_t test_vbsl_s32(uint32x2_t v1, int32x2_t v2, int32x2_t v3) {
+  // CHECK: test_vbsl_s32
+  return vbsl_s32(v1, v2, v3);
+  // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint64x1_t test_vbsl_s64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) {
+  // CHECK: test_vbsl_s64
+  return vbsl_s64(v1, v2, v3);
+  // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vbsl_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
+  // CHECK: test_vbsl_u8
+  return vbsl_u8(v1, v2, v3);
+  // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vbsl_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
+  // CHECK: test_vbsl_u16
+  return vbsl_u16(v1, v2, v3);
+  // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint32x2_t test_vbsl_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
+  // CHECK: test_vbsl_u32
+  return vbsl_u32(v1, v2, v3);
+  // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint64x1_t test_vbsl_u64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) {
+  // CHECK: test_vbsl_u64
+  return vbsl_u64(v1, v2, v3);
+  // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+float32x2_t test_vbsl_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
+  // CHECK: test_vbsl_f32
+  return vbsl_f32(v1, v2, v3);
+  // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+float64x1_t test_vbsl_f64(uint64x1_t v1, float64x1_t v2, float64x1_t v3) {
+  // CHECK: test_vbsl_f64
+  return vbsl_f64(v1, v2, v3);
+  // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vbsl_p8(uint8x8_t v1, poly8x8_t v2, poly8x8_t v3) {
+  // CHECK: test_vbsl_p8
+  return vbsl_p8(v1, v2, v3);
+  // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+poly16x4_t test_vbsl_p16(uint16x4_t v1, poly16x4_t v2, poly16x4_t v3) {
+  // CHECK: test_vbsl_p16
+  return vbsl_p16(v1, v2, v3);
+  // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int8x16_t test_vbslq_s8(uint8x16_t v1, int8x16_t v2, int8x16_t v3) {
+  // CHECK: test_vbslq_s8
+  return vbslq_s8(v1, v2, v3);
+  // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vbslq_s16(uint16x8_t v1, int16x8_t v2, int16x8_t v3) {
+  // CHECK: test_vbslq_s16
+  return vbslq_s16(v1, v2, v3);
+  // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int32x4_t test_vbslq_s32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) {
+  // CHECK: test_vbslq_s32
+  return vbslq_s32(v1, v2, v3);
+  // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int64x2_t test_vbslq_s64(uint64x2_t v1, int64x2_t v2, int64x2_t v3) {
+  // CHECK: test_vbslq_s64
+  return vbslq_s64(v1, v2, v3);
+  // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint8x16_t test_vbslq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
+  // CHECK: test_vbslq_u8
+  return vbslq_u8(v1, v2, v3);
+  // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vbslq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
+  // CHECK: test_vbslq_u16
+  return vbslq_u16(v1, v2, v3);
+  // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int32x4_t test_vbslq_u32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) {
+  // CHECK: test_vbslq_u32
+  return vbslq_s32(v1, v2, v3);
+  // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint64x2_t test_vbslq_u64(uint64x2_t v1, uint64x2_t v2, uint64x2_t v3) {
+  // CHECK: test_vbslq_u64
+  return vbslq_u64(v1, v2, v3);
+  // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+float32x4_t test_vbslq_f32(uint32x4_t v1, float32x4_t v2, float32x4_t v3) {
+  // CHECK: test_vbslq_f32
+  return vbslq_f32(v1, v2, v3);
+  // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+poly8x16_t test_vbslq_p8(uint8x16_t v1, poly8x16_t v2, poly8x16_t v3) {
+  // CHECK: test_vbslq_p8
+  return vbslq_p8(v1, v2, v3);
+  // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+poly16x8_t test_vbslq_p16(uint16x8_t v1, poly16x8_t v2, poly16x8_t v3) {
+  // CHECK: test_vbslq_p16
+  return vbslq_p16(v1, v2, v3);
+  // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+float64x2_t test_vbslq_f64(uint64x2_t v1, float64x2_t v2, float64x2_t v3) {
+  // CHECK: test_vbslq_f64
+  return vbslq_f64(v1, v2, v3);
+  // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+float32x2_t test_vrecps_f32(float32x2_t v1, float32x2_t v2) {
+   // CHECK: test_vrecps_f32
+   return vrecps_f32(v1, v2);
+   // CHECK: frecps {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+float32x4_t test_vrecpsq_f32(float32x4_t v1, float32x4_t v2) {
+   // CHECK: test_vrecpsq_f32
+   return vrecpsq_f32(v1, v2);
+   // CHECK: frecps {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vrecpsq_f64(float64x2_t v1, float64x2_t v2) {
+   // CHECK: test_vrecpsq_f64
+  return vrecpsq_f64(v1, v2);
+  // CHECK: frecps {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+float32x2_t test_vrsqrts_f32(float32x2_t v1, float32x2_t v2) {
+   // CHECK: test_vrsqrts_f32
+  return vrsqrts_f32(v1, v2);
+  // CHECK: frsqrts {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+float32x4_t test_vrsqrtsq_f32(float32x4_t v1, float32x4_t v2) {
+   // CHECK: test_vrsqrtsq_f32
+  return vrsqrtsq_f32(v1, v2);
+  // CHECK: frsqrts {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vrsqrtsq_f64(float64x2_t v1, float64x2_t v2) {
+   // CHECK: test_vrsqrtsq_f64
+  return vrsqrtsq_f64(v1, v2);
+  // CHECK: frsqrts {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint32x2_t test_vcage_f32(float32x2_t v1, float32x2_t v2) {
+  // CHECK: test_vcage_f32
+  return vcage_f32(v1, v2);
+  // CHECK: facge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint64x1_t test_vcage_f64(float64x1_t a, float64x1_t b) {
+  // CHECK: test_vcage_f64
+  return vcage_f64(a, b);
+  // CHECK: facge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint32x4_t test_vcageq_f32(float32x4_t v1, float32x4_t v2) {
+  // CHECK: test_vcageq_f32
+  return vcageq_f32(v1, v2);
+  // CHECK: facge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vcageq_f64(float64x2_t v1, float64x2_t v2) {
+  // CHECK: test_vcageq_f64
+  return vcageq_f64(v1, v2);
+  // CHECK: facge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint32x2_t test_vcagt_f32(float32x2_t v1, float32x2_t v2) {
+  // CHECK: test_vcagt_f32
+  return vcagt_f32(v1, v2);
+  // CHECK: facgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint64x1_t test_vcagt_f64(float64x1_t a, float64x1_t b) {
+  // CHECK: test_vcagt_f64
+  return vcagt_f64(a, b);
+  // CHECK: facgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint32x4_t test_vcagtq_f32(float32x4_t v1, float32x4_t v2) {
+  // CHECK: test_vcagtq_f32
+  return vcagtq_f32(v1, v2);
+  // CHECK: facgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vcagtq_f64(float64x2_t v1, float64x2_t v2) {
+  // CHECK: test_vcagtq_f64
+  return vcagtq_f64(v1, v2);
+  // CHECK: facgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint32x2_t test_vcale_f32(float32x2_t v1, float32x2_t v2) {
+  // CHECK: test_vcale_f32
+  return vcale_f32(v1, v2);
+  // Using registers other than v0, v1 are possible, but would be odd.
+  // CHECK: facge {{v[0-9]+}}.2s, v1.2s, v0.2s
+}
+
+uint64x1_t test_vcale_f64(float64x1_t a, float64x1_t b) {
+  // CHECK: test_vcale_f64
+  return vcale_f64(a, b);
+  // CHECK: facge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint32x4_t test_vcaleq_f32(float32x4_t v1, float32x4_t v2) {
+  // CHECK: test_vcaleq_f32
+  return vcaleq_f32(v1, v2);
+  // Using registers other than v0, v1 are possible, but would be odd.
+  // CHECK: facge {{v[0-9]+}}.4s, v1.4s, v0.4s
+}
+
+uint64x2_t test_vcaleq_f64(float64x2_t v1, float64x2_t v2) {
+  // CHECK: test_vcaleq_f64
+  return vcaleq_f64(v1, v2);
+  // Using registers other than v0, v1 are possible, but would be odd.
+  // CHECK: facge {{v[0-9]+}}.2d, v1.2d, v0.2d
+}
+
+uint32x2_t test_vcalt_f32(float32x2_t v1, float32x2_t v2) {
+  // CHECK: test_vcalt_f32
+  return vcalt_f32(v1, v2);
+  // Using registers other than v0, v1 are possible, but would be odd.
+  // CHECK: facgt {{v[0-9]+}}.2s, v1.2s, v0.2s
+}
+
+uint64x1_t test_vcalt_f64(float64x1_t a, float64x1_t b) {
+  // CHECK: test_vcalt_f64
+  return vcalt_f64(a, b);
+  // CHECK: facgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint32x4_t test_vcaltq_f32(float32x4_t v1, float32x4_t v2) {
+  // CHECK: test_vcaltq_f32
+  return vcaltq_f32(v1, v2);
+  // Using registers other than v0, v1 are possible, but would be odd.
+  // CHECK: facgt {{v[0-9]+}}.4s, v1.4s, v0.4s
+}
+
+uint64x2_t test_vcaltq_f64(float64x2_t v1, float64x2_t v2) {
+  // CHECK: test_vcaltq_f64
+  return vcaltq_f64(v1, v2);
+  // Using registers other than v0, v1 are possible, but would be odd.
+  // CHECK: facgt {{v[0-9]+}}.2d, v1.2d, v0.2d
+}
+
+uint8x8_t test_vtst_s8(int8x8_t v1, int8x8_t v2) {
+   // CHECK: test_vtst_s8
+  return vtst_s8(v1, v2);
+  // CHECK: cmtst {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vtst_s16(int16x4_t v1, int16x4_t v2) {
+   // CHECK: test_vtst_s16
+  return vtst_s16(v1, v2);
+  // CHECK: cmtst {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vtst_s32(int32x2_t v1, int32x2_t v2) {
+   // CHECK: test_vtst_s32
+  return vtst_s32(v1, v2);
+  // CHECK: cmtst {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x8_t test_vtst_u8(uint8x8_t v1, uint8x8_t v2) {
+   // CHECK: test_vtst_u8
+  return vtst_u8(v1, v2);
+  // CHECK: cmtst {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vtst_u16(uint16x4_t v1, uint16x4_t v2) {
+   // CHECK: test_vtst_u16
+  return vtst_u16(v1, v2);
+  // CHECK: cmtst {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vtst_u32(uint32x2_t v1, uint32x2_t v2) {
+   // CHECK: test_vtst_u32
+  return vtst_u32(v1, v2);
+  // CHECK: cmtst {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x16_t test_vtstq_s8(int8x16_t v1, int8x16_t v2) {
+   // CHECK: test_vtstq_s8
+  return vtstq_s8(v1, v2);
+  // CHECK: cmtst {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vtstq_s16(int16x8_t v1, int16x8_t v2) {
+   // CHECK: test_vtstq_s16
+  return vtstq_s16(v1, v2);
+  // CHECK: cmtst {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vtstq_s32(int32x4_t v1, int32x4_t v2) {
+   // CHECK: test_vtstq_s32
+  return vtstq_s32(v1, v2);
+  // CHECK: cmtst {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint8x16_t test_vtstq_u8(uint8x16_t v1, uint8x16_t v2) {
+   // CHECK: test_vtstq_u8
+  return vtstq_u8(v1, v2);
+  // CHECK: cmtst {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vtstq_u16(uint16x8_t v1, uint16x8_t v2) {
+   // CHECK: test_vtstq_u16
+  return vtstq_u16(v1, v2);
+  // CHECK: cmtst {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vtstq_u32(uint32x4_t v1, uint32x4_t v2) {
+   // CHECK: test_vtstq_u32
+  return vtstq_u32(v1, v2);
+  // CHECK: cmtst {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vtstq_s64(int64x2_t v1, int64x2_t v2) {
+   // CHECK: test_vtstq_s64
+  return vtstq_s64(v1, v2);
+  // CHECK: cmtst {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint64x2_t test_vtstq_u64(uint64x2_t v1, uint64x2_t v2) {
+   // CHECK: test_vtstq_u64
+  return vtstq_u64(v1, v2);
+  // CHECK: cmtst {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint8x8_t test_vtst_p8(poly8x8_t v1, poly8x8_t v2) {
+   // CHECK: test_vtst_p8
+  return vtst_p8(v1, v2);
+  // CHECK: cmtst {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vtst_p16(poly16x4_t v1, poly16x4_t v2) {
+   // CHECK: test_vtst_p16
+  return vtst_p16(v1, v2);
+  // CHECK: cmtst {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint8x16_t test_vtstq_p8(poly8x16_t v1, poly8x16_t v2) {
+   // CHECK: test_vtstq_p8
+  return vtstq_p8(v1, v2);
+  // CHECK: cmtst {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vtstq_p16(poly16x8_t v1, poly16x8_t v2) {
+   // CHECK: test_vtstq_p16
+  return vtstq_p16(v1, v2);
+  // CHECK: cmtst {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+
+uint8x8_t test_vceq_s8(int8x8_t v1, int8x8_t v2) {
+  // CHECK: test_vceq_s8
+  return vceq_s8(v1, v2);
+  // CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vceq_s16(int16x4_t v1, int16x4_t v2) {
+  // CHECK: test_vceq_s16
+  return vceq_s16(v1, v2);
+  // CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vceq_s32(int32x2_t v1, int32x2_t v2) {
+  // CHECK: test_vceq_s32
+  return vceq_s32(v1, v2);
+  // CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint64x1_t test_vceq_s64(int64x1_t a, int64x1_t b) {
+  // CHECK: test_vceq_s64
+  return vceq_s64(a, b);
+  // CHECK: cmeq {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint64x1_t test_vceq_u64(uint64x1_t a, uint64x1_t b) {
+  // CHECK: test_vceq_u64
+  return vceq_u64(a, b);
+  // CHECK: cmeq {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint32x2_t test_vceq_f32(float32x2_t v1, float32x2_t v2) {
+  // CHECK: test_vceq_f32
+  return vceq_f32(v1, v2);
+  // CHECK: fcmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint64x1_t test_vceq_f64(float64x1_t a, float64x1_t b) {
+  // CHECK: test_vceq_f64
+  return vceq_f64(a, b);
+  // CHECK: fcmeq {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint8x8_t test_vceq_u8(uint8x8_t v1, uint8x8_t v2) {
+  // CHECK: test_vceq_u8
+  return vceq_u8(v1, v2);
+  // CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vceq_u16(uint16x4_t v1, uint16x4_t v2) {
+  // CHECK: test_vceq_u16
+  return vceq_u16(v1, v2);
+  // CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vceq_u32(uint32x2_t v1, uint32x2_t v2) {
+  // CHECK: test_vceq_u32
+  return vceq_u32(v1, v2);
+  // CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x8_t test_vceq_p8(poly8x8_t v1, poly8x8_t v2) {
+  // CHECK: test_vceq_p8
+  return vceq_p8(v1, v2);
+  // CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint8x16_t test_vceqq_s8(int8x16_t v1, int8x16_t v2) {
+  // CHECK: test_vceqq_s8
+  return vceqq_s8(v1, v2);
+  // CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vceqq_s16(int16x8_t v1, int16x8_t v2) {
+  // CHECK: test_vceqq_s16
+  return vceqq_s16(v1, v2);
+  // CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vceqq_s32(int32x4_t v1, int32x4_t v2) {
+  // CHECK: test_vceqq_s32
+  return vceqq_s32(v1, v2);
+  // CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint32x4_t test_vceqq_f32(float32x4_t v1, float32x4_t v2) {
+  // CHECK: test_vceqq_f32
+  return vceqq_f32(v1, v2);
+  // CHECK: fcmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint8x16_t test_vceqq_u8(uint8x16_t v1, uint8x16_t v2) {
+  // CHECK: test_vceqq_u8
+  return vceqq_u8(v1, v2);
+  // CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vceqq_u16(uint16x8_t v1, uint16x8_t v2) {
+  // CHECK: test_vceqq_u16
+  return vceqq_u16(v1, v2);
+  // CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vceqq_u32(uint32x4_t v1, uint32x4_t v2) {
+  // CHECK: test_vceqq_u32
+  return vceqq_u32(v1, v2);
+  // CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint8x16_t test_vceqq_p8(poly8x16_t v1, poly8x16_t v2) {
+  // CHECK: test_vceqq_p8
+  return vceqq_p8(v1, v2);
+  // CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+
+uint64x2_t test_vceqq_s64(int64x2_t v1, int64x2_t v2) {
+  // CHECK: test_vceqq_s64
+  return vceqq_s64(v1, v2);
+  // CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint64x2_t test_vceqq_u64(uint64x2_t v1, uint64x2_t v2) {
+  // CHECK: test_vceqq_u64
+  return vceqq_u64(v1, v2);
+  // CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint64x2_t test_vceqq_f64(float64x2_t v1, float64x2_t v2) {
+  // CHECK: test_vceqq_f64
+  return vceqq_f64(v1, v2);
+  // CHECK: fcmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+uint8x8_t test_vcge_s8(int8x8_t v1, int8x8_t v2) {
+// CHECK: test_vcge_s8
+  return vcge_s8(v1, v2);
+// CHECK: cmge {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vcge_s16(int16x4_t v1, int16x4_t v2) {
+// CHECK: test_vcge_s16
+  return vcge_s16(v1, v2);
+// CHECK: cmge {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vcge_s32(int32x2_t v1, int32x2_t v2) {
+// CHECK: test_vcge_s32
+  return vcge_s32(v1, v2);
+// CHECK: cmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint64x1_t test_vcge_s64(int64x1_t a, int64x1_t b) {
+  // CHECK: test_vcge_s64
+  return vcge_s64(a, b);
+  // CHECK: cmge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint64x1_t test_vcge_u64(uint64x1_t a, uint64x1_t b) {
+  // CHECK: test_vcge_u64
+  return vcge_u64(a, b);
+  // CHECK: cmhs {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint32x2_t test_vcge_f32(float32x2_t v1, float32x2_t v2) {
+// CHECK: test_vcge_f32
+  return vcge_f32(v1, v2);
+// CHECK: fcmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint64x1_t test_vcge_f64(float64x1_t a, float64x1_t b) {
+  // CHECK: test_vcge_f64
+  return vcge_f64(a, b);
+  // CHECK: fcmge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint8x8_t test_vcge_u8(uint8x8_t v1, uint8x8_t v2) {
+// CHECK: test_vcge_u8
+  return vcge_u8(v1, v2);
+// CHECK: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vcge_u16(uint16x4_t v1, uint16x4_t v2) {
+// CHECK: test_vcge_u16
+  return vcge_u16(v1, v2);
+// CHECK: cmhs {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vcge_u32(uint32x2_t v1, uint32x2_t v2) {
+// CHECK: test_vcge_u32
+  return vcge_u32(v1, v2);
+// CHECK: cmhs {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x16_t test_vcgeq_s8(int8x16_t v1, int8x16_t v2) {
+// CHECK: test_vcgeq_s8
+  return vcgeq_s8(v1, v2);
+// CHECK: cmge {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vcgeq_s16(int16x8_t v1, int16x8_t v2) {
+// CHECK: test_vcgeq_s16
+  return vcgeq_s16(v1, v2);
+// CHECK: cmge {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vcgeq_s32(int32x4_t v1, int32x4_t v2) {
+// CHECK: test_vcgeq_s32
+  return vcgeq_s32(v1, v2);
+// CHECK: cmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint32x4_t test_vcgeq_f32(float32x4_t v1, float32x4_t v2) {
+// CHECK: test_vcgeq_f32
+  return vcgeq_f32(v1, v2);
+// CHECK: fcmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint8x16_t test_vcgeq_u8(uint8x16_t v1, uint8x16_t v2) {
+// CHECK: test_vcgeq_u8
+  return vcgeq_u8(v1, v2);
+// CHECK: cmhs {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vcgeq_u16(uint16x8_t v1, uint16x8_t v2) {
+// CHECK: test_vcgeq_u16
+  return vcgeq_u16(v1, v2);
+// CHECK: cmhs {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vcgeq_u32(uint32x4_t v1, uint32x4_t v2) {
+// CHECK: test_vcgeq_u32
+  return vcgeq_u32(v1, v2);
+// CHECK: cmhs {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vcgeq_s64(int64x2_t v1, int64x2_t v2) {
+// CHECK: test_vcgeq_s64
+  return vcgeq_s64(v1, v2);
+// CHECK: cmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint64x2_t test_vcgeq_u64(uint64x2_t v1, uint64x2_t v2) {
+// CHECK: test_vcgeq_u64
+  return vcgeq_u64(v1, v2);
+// CHECK: cmhs {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint64x2_t test_vcgeq_f64(float64x2_t v1, float64x2_t v2) {
+// CHECK: test_vcgeq_f64
+  return vcgeq_f64(v1, v2);
+// CHECK: fcmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+// Notes about vcle:
+// LE condition predicate implemented as GE, so check reversed operands.
+// Using registers other than v0, v1 are possible, but would be odd.
+uint8x8_t test_vcle_s8(int8x8_t v1, int8x8_t v2) {
+  // CHECK: test_vcle_s8
+  return vcle_s8(v1, v2);
+  // CHECK: cmge {{v[0-9]+}}.8b, v1.8b, v0.8b
+}
+
+uint16x4_t test_vcle_s16(int16x4_t v1, int16x4_t v2) {
+  // CHECK: test_vcle_s16
+  return vcle_s16(v1, v2);
+  // CHECK: cmge {{v[0-9]+}}.4h, v1.4h, v0.4h
+}
+
+uint32x2_t test_vcle_s32(int32x2_t v1, int32x2_t v2) {
+  // CHECK: test_vcle_s32
+  return vcle_s32(v1, v2);
+  // CHECK: cmge {{v[0-9]+}}.2s, v1.2s, v0.2s
+}
+
+uint64x1_t test_vcle_s64(int64x1_t a, int64x1_t b) {
+  // CHECK: test_vcle_s64
+  return vcle_s64(a, b);
+  // CHECK: cmge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint64x1_t test_vcle_u64(uint64x1_t a, uint64x1_t b) {
+  // CHECK: test_vcle_u64
+  return vcle_u64(a, b);
+  // CHECK: cmhs {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint32x2_t test_vcle_f32(float32x2_t v1, float32x2_t v2) {
+  // CHECK: test_vcle_f32
+  return vcle_f32(v1, v2);
+  // CHECK: fcmge {{v[0-9]+}}.2s, v1.2s, v0.2s
+}
+
+uint64x1_t test_vcle_f64(float64x1_t a, float64x1_t b) {
+  // CHECK: test_vcle_f64
+  return vcle_f64(a, b);
+  // CHECK: fcmge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint8x8_t test_vcle_u8(uint8x8_t v1, uint8x8_t v2) {
+  // CHECK: test_vcle_u8
+  return vcle_u8(v1, v2);
+  // CHECK: cmhs {{v[0-9]+}}.8b, v1.8b, v0.8b
+}
+
+uint16x4_t test_vcle_u16(uint16x4_t v1, uint16x4_t v2) {
+  // CHECK: test_vcle_u16
+  return vcle_u16(v1, v2);
+  // CHECK: cmhs {{v[0-9]+}}.4h, v1.4h, v0.4h
+}
+
+uint32x2_t test_vcle_u32(uint32x2_t v1, uint32x2_t v2) {
+  // CHECK: test_vcle_u32
+  return vcle_u32(v1, v2);
+  // CHECK: cmhs {{v[0-9]+}}.2s, v1.2s, v0.2s
+}
+
+uint8x16_t test_vcleq_s8(int8x16_t v1, int8x16_t v2) {
+  // CHECK: test_vcleq_s8
+  return vcleq_s8(v1, v2);
+  // CHECK: cmge {{v[0-9]+}}.16b, v1.16b, v0.16b
+}
+
+uint16x8_t test_vcleq_s16(int16x8_t v1, int16x8_t v2) {
+  // CHECK: test_vcleq_s16
+  return vcleq_s16(v1, v2);
+  // CHECK: cmge {{v[0-9]+}}.8h, v1.8h, v0.8h
+}
+
+uint32x4_t test_vcleq_s32(int32x4_t v1, int32x4_t v2) {
+  // CHECK: test_vcleq_s32
+  return vcleq_s32(v1, v2);
+  // CHECK: cmge {{v[0-9]+}}.4s, v1.4s, v0.4s
+}
+
+uint32x4_t test_vcleq_f32(float32x4_t v1, float32x4_t v2) {
+  // CHECK: test_vcleq_f32
+  return vcleq_f32(v1, v2);
+  // CHECK: fcmge {{v[0-9]+}}.4s, v1.4s, v0.4s
+}
+
+uint8x16_t test_vcleq_u8(uint8x16_t v1, uint8x16_t v2) {
+  // CHECK: test_vcleq_u8
+  return vcleq_u8(v1, v2);
+  // CHECK: cmhs {{v[0-9]+}}.16b, v1.16b, v0.16b
+}
+
+uint16x8_t test_vcleq_u16(uint16x8_t v1, uint16x8_t v2) {
+  // CHECK: test_vcleq_u16
+  return vcleq_u16(v1, v2);
+  // CHECK: cmhs {{v[0-9]+}}.8h, v1.8h, v0.8h
+}
+
+uint32x4_t test_vcleq_u32(uint32x4_t v1, uint32x4_t v2) {
+  // CHECK: test_vcleq_u32
+  return vcleq_u32(v1, v2);
+  // CHECK: cmhs {{v[0-9]+}}.4s, v1.4s, v0.4s
+}
+
+uint64x2_t test_vcleq_s64(int64x2_t v1, int64x2_t v2) {
+  // CHECK: test_vcleq_s64
+  return vcleq_s64(v1, v2);
+  // CHECK: cmge {{v[0-9]+}}.2d, v1.2d, v0.2d
+}
+
+uint64x2_t test_vcleq_u64(uint64x2_t v1, uint64x2_t v2) {
+  // CHECK: test_vcleq_u64
+  return vcleq_u64(v1, v2);
+  // CHECK: cmhs {{v[0-9]+}}.2d, v1.2d, v0.2d
+}
+
+uint64x2_t test_vcleq_f64(float64x2_t v1, float64x2_t v2) {
+  // CHECK: test_vcleq_f64
+  return vcleq_f64(v1, v2);
+  // CHECK: fcmge {{v[0-9]+}}.2d, v1.2d, v0.2d
+}
+
+
+uint8x8_t test_vcgt_s8(int8x8_t v1, int8x8_t v2) {
+  // CHECK: test_vcgt_s8
+  return vcgt_s8(v1, v2);
+  // CHECK: cmgt {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vcgt_s16(int16x4_t v1, int16x4_t v2) {
+  // CHECK: test_vcgt_s16
+  return vcgt_s16(v1, v2);
+  // CHECK: cmgt {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vcgt_s32(int32x2_t v1, int32x2_t v2) {
+  // CHECK: test_vcgt_s32
+  return vcgt_s32(v1, v2);
+  // CHECK: cmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint64x1_t test_vcgt_s64(int64x1_t a, int64x1_t b) {
+  // CHECK: test_vcgt_s64
+  return vcgt_s64(a, b);
+  // CHECK: cmgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint64x1_t test_vcgt_u64(uint64x1_t a, uint64x1_t b) {
+  // CHECK: test_vcgt_u64
+  return vcgt_u64(a, b);
+  // CHECK: cmhi {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint32x2_t test_vcgt_f32(float32x2_t v1, float32x2_t v2) {
+  // CHECK: test_vcgt_f32
+  return vcgt_f32(v1, v2);
+  // CHECK: fcmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint64x1_t test_vcgt_f64(float64x1_t a, float64x1_t b) {
+  // CHECK: test_vcgt_f64
+  return vcgt_f64(a, b);
+  // CHECK: fcmgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint8x8_t test_vcgt_u8(uint8x8_t v1, uint8x8_t v2) {
+  // CHECK: test_vcgt_u8
+  return vcgt_u8(v1, v2);
+  // CHECK: cmhi {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vcgt_u16(uint16x4_t v1, uint16x4_t v2) {
+  // CHECK: test_vcgt_u16
+  return vcgt_u16(v1, v2);
+  // CHECK: cmhi {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vcgt_u32(uint32x2_t v1, uint32x2_t v2) {
+  // CHECK: test_vcgt_u32
+  return vcgt_u32(v1, v2);
+  // CHECK: cmhi {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x16_t test_vcgtq_s8(int8x16_t v1, int8x16_t v2) {
+  // CHECK: test_vcgtq_s8
+  return vcgtq_s8(v1, v2);
+  // CHECK: cmgt {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vcgtq_s16(int16x8_t v1, int16x8_t v2) {
+  // CHECK: test_vcgtq_s16
+  return vcgtq_s16(v1, v2);
+  // CHECK: cmgt {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vcgtq_s32(int32x4_t v1, int32x4_t v2) {
+  // CHECK: test_vcgtq_s32
+  return vcgtq_s32(v1, v2);
+  // CHECK: cmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint32x4_t test_vcgtq_f32(float32x4_t v1, float32x4_t v2) {
+  // CHECK: test_vcgtq_f32
+  return vcgtq_f32(v1, v2);
+  // CHECK: fcmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint8x16_t test_vcgtq_u8(uint8x16_t v1, uint8x16_t v2) {
+  // CHECK: test_vcgtq_u8
+  return vcgtq_u8(v1, v2);
+  // CHECK: cmhi {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vcgtq_u16(uint16x8_t v1, uint16x8_t v2) {
+  // CHECK: test_vcgtq_u16
+  return vcgtq_u16(v1, v2);
+  // CHECK: cmhi {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vcgtq_u32(uint32x4_t v1, uint32x4_t v2) {
+  // CHECK: test_vcgtq_u32
+  return vcgtq_u32(v1, v2);
+  // CHECK: cmhi {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vcgtq_s64(int64x2_t v1, int64x2_t v2) {
+  // CHECK: test_vcgtq_s64
+  return vcgtq_s64(v1, v2);
+  // CHECK: cmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint64x2_t test_vcgtq_u64(uint64x2_t v1, uint64x2_t v2) {
+  // CHECK: test_vcgtq_u64
+  return vcgtq_u64(v1, v2);
+  // CHECK: cmhi {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint64x2_t test_vcgtq_f64(float64x2_t v1, float64x2_t v2) {
+  // CHECK: test_vcgtq_f64
+  return vcgtq_f64(v1, v2);
+  // CHECK: fcmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+
+// Notes about vclt:
+// LT condition predicate implemented as GT, so check reversed operands.
+// Using registers other than v0, v1 are possible, but would be odd.
+
+uint8x8_t test_vclt_s8(int8x8_t v1, int8x8_t v2) {
+  // CHECK: test_vclt_s8
+  return vclt_s8(v1, v2);
+  // CHECK: cmgt {{v[0-9]+}}.8b, v1.8b, v0.8b
+}
+
+uint16x4_t test_vclt_s16(int16x4_t v1, int16x4_t v2) {
+  // CHECK: test_vclt_s16
+  return vclt_s16(v1, v2);
+  // CHECK: cmgt {{v[0-9]+}}.4h, v1.4h, v0.4h
+}
+
+uint32x2_t test_vclt_s32(int32x2_t v1, int32x2_t v2) {
+  // CHECK: test_vclt_s32
+  return vclt_s32(v1, v2);
+  // CHECK: cmgt {{v[0-9]+}}.2s, v1.2s, v0.2s
+}
+
+uint64x1_t test_vclt_s64(int64x1_t a, int64x1_t b) {
+  // CHECK: test_vclt_s64
+  return vclt_s64(a, b);
+  // CHECK: cmgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint64x1_t test_vclt_u64(uint64x1_t a, uint64x1_t b) {
+  // CHECK: test_vclt_u64
+  return vclt_u64(a, b);
+  // CHECK: cmhi {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint32x2_t test_vclt_f32(float32x2_t v1, float32x2_t v2) {
+  // CHECK: test_vclt_f32
+  return vclt_f32(v1, v2);
+  // CHECK: fcmgt {{v[0-9]+}}.2s, v1.2s, v0.2s
+}
+
+uint64x1_t test_vclt_f64(float64x1_t a, float64x1_t b) {
+  // CHECK: test_vclt_f64
+  return vclt_f64(a, b);
+  // CHECK: fcmgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint8x8_t test_vclt_u8(uint8x8_t v1, uint8x8_t v2) {
+  // CHECK: test_vclt_u8
+  return vclt_u8(v1, v2);
+  // CHECK: cmhi {{v[0-9]+}}.8b, v1.8b, v0.8b
+}
+
+uint16x4_t test_vclt_u16(uint16x4_t v1, uint16x4_t v2) {
+  // CHECK: test_vclt_u16
+  return vclt_u16(v1, v2);
+  // CHECK: cmhi {{v[0-9]+}}.4h, v1.4h, v0.4h
+}
+
+uint32x2_t test_vclt_u32(uint32x2_t v1, uint32x2_t v2) {
+  // CHECK: test_vclt_u32
+  return vclt_u32(v1, v2);
+  // CHECK: cmhi {{v[0-9]+}}.2s, v1.2s, v0.2s
+}
+
+uint8x16_t test_vcltq_s8(int8x16_t v1, int8x16_t v2) {
+  // CHECK: test_vcltq_s8
+  return vcltq_s8(v1, v2);
+  // CHECK: cmgt {{v[0-9]+}}.16b, v1.16b, v0.16b
+}
+
+uint16x8_t test_vcltq_s16(int16x8_t v1, int16x8_t v2) {
+  // CHECK: test_vcltq_s16
+  return vcltq_s16(v1, v2);
+  // CHECK: cmgt {{v[0-9]+}}.8h, v1.8h, v0.8h
+}
+
+uint32x4_t test_vcltq_s32(int32x4_t v1, int32x4_t v2) {
+  // CHECK: test_vcltq_s32
+  return vcltq_s32(v1, v2);
+  // CHECK: cmgt {{v[0-9]+}}.4s, v1.4s, v0.4s
+}
+
+uint32x4_t test_vcltq_f32(float32x4_t v1, float32x4_t v2) {
+  // CHECK: test_vcltq_f32
+  return vcltq_f32(v1, v2);
+  // CHECK: fcmgt {{v[0-9]+}}.4s, v1.4s, v0.4s
+}
+
+uint8x16_t test_vcltq_u8(uint8x16_t v1, uint8x16_t v2) {
+  // CHECK: test_vcltq_u8
+  return vcltq_u8(v1, v2);
+  // CHECK: cmhi {{v[0-9]+}}.16b, v1.16b, v0.16b
+}
+
+uint16x8_t test_vcltq_u16(uint16x8_t v1, uint16x8_t v2) {
+  // CHECK: test_vcltq_u16
+  return vcltq_u16(v1, v2);
+  // CHECK: cmhi {{v[0-9]+}}.8h, v1.8h, v0.8h
+}
+
+uint32x4_t test_vcltq_u32(uint32x4_t v1, uint32x4_t v2) {
+  // CHECK: test_vcltq_u32
+  return vcltq_u32(v1, v2);
+  // CHECK: cmhi {{v[0-9]+}}.4s, v1.4s, v0.4s
+}
+
+uint64x2_t test_vcltq_s64(int64x2_t v1, int64x2_t v2) {
+  // CHECK: test_vcltq_s64
+  return vcltq_s64(v1, v2);
+  // CHECK: cmgt {{v[0-9]+}}.2d, v1.2d, v0.2d
+}
+
+uint64x2_t test_vcltq_u64(uint64x2_t v1, uint64x2_t v2) {
+  // CHECK: test_vcltq_u64
+  return vcltq_u64(v1, v2);
+  // CHECK: cmhi {{v[0-9]+}}.2d, v1.2d, v0.2d
+}
+
+uint64x2_t test_vcltq_f64(float64x2_t v1, float64x2_t v2) {
+  // CHECK: test_vcltq_f64
+  return vcltq_f64(v1, v2);
+  // CHECK: fcmgt {{v[0-9]+}}.2d, v1.2d, v0.2d
+}
+
+
+int8x8_t test_vhadd_s8(int8x8_t v1, int8x8_t v2) {
+// CHECK: test_vhadd_s8
+  return vhadd_s8(v1, v2);
+  // CHECK: shadd {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4_t test_vhadd_s16(int16x4_t v1, int16x4_t v2) {
+// CHECK: test_vhadd_s16
+  return vhadd_s16(v1, v2);
+  // CHECK: shadd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vhadd_s32(int32x2_t v1, int32x2_t v2) {
+// CHECK: test_vhadd_s32
+  return vhadd_s32(v1, v2);
+  // CHECK: shadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x8_t test_vhadd_u8(uint8x8_t v1, uint8x8_t v2) {
+// CHECK: test_vhadd_u8
+  return vhadd_u8(v1, v2);
+  // CHECK: uhadd {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vhadd_u16(uint16x4_t v1, uint16x4_t v2) {
+// CHECK: test_vhadd_u16
+  return vhadd_u16(v1, v2);
+  // CHECK: uhadd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vhadd_u32(uint32x2_t v1, uint32x2_t v2) {
+// CHECK: test_vhadd_u32
+  return vhadd_u32(v1, v2);
+  // CHECK: uhadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int8x16_t test_vhaddq_s8(int8x16_t v1, int8x16_t v2) {
+// CHECK: test_vhaddq_s8
+  return vhaddq_s8(v1, v2);
+  // CHECK: shadd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vhaddq_s16(int16x8_t v1, int16x8_t v2) {
+// CHECK: test_vhaddq_s16
+  return vhaddq_s16(v1, v2);
+  // CHECK: shadd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vhaddq_s32(int32x4_t v1, int32x4_t v2) {
+// CHECK: test_vhaddq_s32
+  return vhaddq_s32(v1, v2);
+  // CHECK: shadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint8x16_t test_vhaddq_u8(uint8x16_t v1, uint8x16_t v2) {
+// CHECK: test_vhaddq_u8
+  return vhaddq_u8(v1, v2);
+  // CHECK: uhadd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vhaddq_u16(uint16x8_t v1, uint16x8_t v2) {
+// CHECK: test_vhaddq_u16
+  return vhaddq_u16(v1, v2);
+  // CHECK: uhadd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vhaddq_u32(uint32x4_t v1, uint32x4_t v2) {
+// CHECK: test_vhaddq_u32
+  return vhaddq_u32(v1, v2);
+  // CHECK: uhadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+
+int8x8_t test_vhsub_s8(int8x8_t v1, int8x8_t v2) {
+// CHECK: test_vhsub_s8
+  return vhsub_s8(v1, v2);
+  // CHECK: shsub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4_t test_vhsub_s16(int16x4_t v1, int16x4_t v2) {
+// CHECK: test_vhsub_s16
+  return vhsub_s16(v1, v2);
+  // CHECK: shsub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vhsub_s32(int32x2_t v1, int32x2_t v2) {
+// CHECK: test_vhsub_s32
+  return vhsub_s32(v1, v2);
+  // CHECK: shsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x8_t test_vhsub_u8(uint8x8_t v1, uint8x8_t v2) {
+// CHECK: test_vhsub_u8
+  return vhsub_u8(v1, v2);
+  // CHECK: uhsub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vhsub_u16(uint16x4_t v1, uint16x4_t v2) {
+// CHECK: test_vhsub_u16
+  return vhsub_u16(v1, v2);
+  // CHECK: uhsub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vhsub_u32(uint32x2_t v1, uint32x2_t v2) {
+// CHECK: test_vhsub_u32
+  return vhsub_u32(v1, v2);
+  // CHECK: uhsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int8x16_t test_vhsubq_s8(int8x16_t v1, int8x16_t v2) {
+// CHECK: test_vhsubq_s8
+  return vhsubq_s8(v1, v2);
+  // CHECK: shsub {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vhsubq_s16(int16x8_t v1, int16x8_t v2) {
+// CHECK: test_vhsubq_s16
+  return vhsubq_s16(v1, v2);
+  // CHECK: shsub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vhsubq_s32(int32x4_t v1, int32x4_t v2) {
+// CHECK: test_vhsubq_s32
+  return vhsubq_s32(v1, v2);
+  // CHECK: shsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint8x16_t test_vhsubq_u8(uint8x16_t v1, uint8x16_t v2) {
+// CHECK: test_vhsubq_u8
+  return vhsubq_u8(v1, v2);
+  // CHECK: uhsub {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vhsubq_u16(uint16x8_t v1, uint16x8_t v2) {
+// CHECK: test_vhsubq_u16
+  return vhsubq_u16(v1, v2);
+  // CHECK: uhsub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vhsubq_u32(uint32x4_t v1, uint32x4_t v2) {
+// CHECK: test_vhsubq_u32
+  return vhsubq_u32(v1, v2);
+  // CHECK: uhsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+
+int8x8_t test_vrhadd_s8(int8x8_t v1, int8x8_t v2) {
+// CHECK: test_vrhadd_s8
+  return vrhadd_s8(v1, v2);
+// CHECK: srhadd {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4_t test_vrhadd_s16(int16x4_t v1, int16x4_t v2) {
+// CHECK: test_vrhadd_s16
+  return vrhadd_s16(v1, v2);
+// CHECK: srhadd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vrhadd_s32(int32x2_t v1, int32x2_t v2) {
+// CHECK: test_vrhadd_s32
+  return vrhadd_s32(v1, v2);
+// CHECK: srhadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x8_t test_vrhadd_u8(uint8x8_t v1, uint8x8_t v2) {
+// CHECK: test_vrhadd_u8
+  return vrhadd_u8(v1, v2);
+// CHECK: urhadd {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vrhadd_u16(uint16x4_t v1, uint16x4_t v2) {
+// CHECK: test_vrhadd_u16
+  return vrhadd_u16(v1, v2);
+// CHECK: urhadd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vrhadd_u32(uint32x2_t v1, uint32x2_t v2) {
+// CHECK: test_vrhadd_u32
+  return vrhadd_u32(v1, v2);
+// CHECK: urhadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int8x16_t test_vrhaddq_s8(int8x16_t v1, int8x16_t v2) {
+// CHECK: test_vrhaddq_s8
+  return vrhaddq_s8(v1, v2);
+// CHECK: srhadd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vrhaddq_s16(int16x8_t v1, int16x8_t v2) {
+// CHECK: test_vrhaddq_s16
+  return vrhaddq_s16(v1, v2);
+// CHECK: srhadd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vrhaddq_s32(int32x4_t v1, int32x4_t v2) {
+// CHECK: test_vrhaddq_s32
+  return vrhaddq_s32(v1, v2);
+// CHECK: srhadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint8x16_t test_vrhaddq_u8(uint8x16_t v1, uint8x16_t v2) {
+// CHECK: test_vrhaddq_u8
+  return vrhaddq_u8(v1, v2);
+// CHECK: urhadd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vrhaddq_u16(uint16x8_t v1, uint16x8_t v2) {
+// CHECK: test_vrhaddq_u16
+  return vrhaddq_u16(v1, v2);
+// CHECK: urhadd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vrhaddq_u32(uint32x4_t v1, uint32x4_t v2) {
+// CHECK: test_vrhaddq_u32
+  return vrhaddq_u32(v1, v2);
+// CHECK: urhadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) {
+// CHECK: test_vqadd_s8
+  return vqadd_s8(a, b);
+  // CHECK: sqadd {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) {
+// CHECK: test_vqadd_s16
+  return vqadd_s16(a, b);
+  // CHECK: sqadd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) {
+// CHECK: test_vqadd_s32
+  return vqadd_s32(a, b);
+  // CHECK: sqadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) {
+// CHECK: test_vqadd_s64
+  return vqadd_s64(a, b);
+// CHECK:  sqadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) {
+// CHECK: test_vqadd_u8
+  return vqadd_u8(a, b);
+  // CHECK: uqadd {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) {
+// CHECK: test_vqadd_u16
+  return vqadd_u16(a, b);
+  // CHECK: uqadd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) {
+// CHECK: test_vqadd_u32
+  return vqadd_u32(a, b);
+  // CHECK: uqadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) {
+// CHECK:  test_vqadd_u64
+  return vqadd_u64(a, b);
+// CHECK:  uqadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) {
+// CHECK: test_vqaddq_s8
+  return vqaddq_s8(a, b);
+  // CHECK: sqadd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) {
+// CHECK: test_vqaddq_s16
+  return vqaddq_s16(a, b);
+  // CHECK: sqadd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) {
+// CHECK: test_vqaddq_s32
+  return vqaddq_s32(a, b);
+  // CHECK: sqadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) {
+// CHECK: test_vqaddq_s64
+  return vqaddq_s64(a, b);
+// CHECK: sqadd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) {
+// CHECK: test_vqaddq_u8
+  return vqaddq_u8(a, b);
+  // CHECK: uqadd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) {
+// CHECK: test_vqaddq_u16
+  return vqaddq_u16(a, b);
+  // CHECK: uqadd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) {
+// CHECK: test_vqaddq_u32
+  return vqaddq_u32(a, b);
+  // CHECK: uqadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) {
+// CHECK: test_vqaddq_u64
+  return vqaddq_u64(a, b);
+// CHECK: uqadd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+
+int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) {
+// CHECK: test_vqsub_s8
+  return vqsub_s8(a, b);
+  // CHECK: sqsub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) {
+// CHECK: test_vqsub_s16
+  return vqsub_s16(a, b);
+  // CHECK: sqsub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) {
+// CHECK: test_vqsub_s32
+  return vqsub_s32(a, b);
+  // CHECK: sqsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) {
+// CHECK: test_vqsub_s64
+  return vqsub_s64(a, b);
+// CHECK: sqsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) {
+// CHECK: test_vqsub_u8
+  return vqsub_u8(a, b);
+  // CHECK: uqsub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) {
+// CHECK: test_vqsub_u16
+  return vqsub_u16(a, b);
+  // CHECK: uqsub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) {
+// CHECK: test_vqsub_u32
+  return vqsub_u32(a, b);
+  // CHECK: uqsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) {
+// CHECK: test_vqsub_u64
+  return vqsub_u64(a, b);
+// CHECK:  uqsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) {
+// CHECK: test_vqsubq_s8
+  return vqsubq_s8(a, b);
+  // CHECK: sqsub {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) {
+// CHECK: test_vqsubq_s16
+  return vqsubq_s16(a, b);
+  // CHECK: sqsub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) {
+// CHECK: test_vqsubq_s32
+  return vqsubq_s32(a, b);
+  // CHECK: sqsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) {
+// CHECK: test_vqsubq_s64
+  return vqsubq_s64(a, b);
+// CHECK: sqsub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) {
+// CHECK: test_vqsubq_u8
+  return vqsubq_u8(a, b);
+  // CHECK: uqsub {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) {
+// CHECK: test_vqsubq_u16
+  return vqsubq_u16(a, b);
+  // CHECK: uqsub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) {
+// CHECK: test_vqsubq_u32
+  return vqsubq_u32(a, b);
+  // CHECK: uqsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) {
+// CHECK: test_vqsubq_u64
+  return vqsubq_u64(a, b);
+  // CHECK: uqsub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+
+int8x8_t test_vshl_s8(int8x8_t a, int8x8_t b) {
+// CHECK: test_vshl_s8
+  return vshl_s8(a, b);
+// CHECK: sshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4_t test_vshl_s16(int16x4_t a, int16x4_t b) {
+// CHECK: test_vshl_s16
+  return vshl_s16(a, b);
+// CHECK: sshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vshl_s32(int32x2_t a, int32x2_t b) {
+// CHECK: test_vshl_s32
+  return vshl_s32(a, b);
+// CHECK: sshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int64x1_t test_vshl_s64(int64x1_t a, int64x1_t b) {
+// CHECK: test_vshl_s64
+  return vshl_s64(a, b);
+// CHECK: sshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint8x8_t test_vshl_u8(uint8x8_t a, int8x8_t b) {
+// CHECK: test_vshl_u8
+  return vshl_u8(a, b);
+// CHECK: ushl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vshl_u16(uint16x4_t a, int16x4_t b) {
+// CHECK: test_vshl_u16
+  return vshl_u16(a, b);
+// CHECK: ushl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vshl_u32(uint32x2_t a, int32x2_t b) {
+// CHECK: test_vshl_u32
+  return vshl_u32(a, b);
+// CHECK: ushl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint64x1_t test_vshl_u64(uint64x1_t a, int64x1_t b) {
+// CHECK: test_vshl_u64
+  return vshl_u64(a, b);
+// CHECK: ushl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) {
+// CHECK: test_vshlq_s8
+  return vshlq_s8(a, b);
+// CHECK: sshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) {
+// CHECK: test_vshlq_s16
+  return vshlq_s16(a, b);
+// CHECK: sshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) {
+// CHECK: test_vshlq_s32
+  return vshlq_s32(a, b);
+// CHECK: sshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) {
+// CHECK: test_vshlq_s64
+  return vshlq_s64(a, b);
+// CHECK: sshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) {
+// CHECK: test_vshlq_u8
+  return vshlq_u8(a, b);
+// CHECK: ushl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) {
+// CHECK: test_vshlq_u16
+  return vshlq_u16(a, b);
+// CHECK: ushl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) {
+// CHECK: test_vshlq_u32
+  return vshlq_u32(a, b);
+// CHECK: ushl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) {
+// CHECK: test_vshlq_u64
+  return vshlq_u64(a, b);
+// CHECK: ushl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+
+int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) {
+// CHECK: test_vqshl_s8
+  return vqshl_s8(a, b);
+// CHECK: sqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) {
+// CHECK: test_vqshl_s16
+  return vqshl_s16(a, b);
+// CHECK: sqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) {
+// CHECK: test_vqshl_s32
+  return vqshl_s32(a, b);
+// CHECK: sqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) {
+// CHECK: test_vqshl_s64
+  return vqshl_s64(a, b);
+// CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) {
+// CHECK: test_vqshl_u8
+  return vqshl_u8(a, b);
+// CHECK: uqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) {
+// CHECK: test_vqshl_u16
+  return vqshl_u16(a, b);
+// CHECK: uqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) {
+// CHECK: test_vqshl_u32
+  return vqshl_u32(a, b);
+// CHECK: uqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) {
+// CHECK: test_vqshl_u64
+  return vqshl_u64(a, b);
+// CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) {
+// CHECK: test_vqshlq_s8
+  return vqshlq_s8(a, b);
+// CHECK: sqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) {
+// CHECK: test_vqshlq_s16
+  return vqshlq_s16(a, b);
+// CHECK: sqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) {
+// CHECK: test_vqshlq_s32
+  return vqshlq_s32(a, b);
+// CHECK: sqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) {
+// CHECK: test_vqshlq_s64
+  return vqshlq_s64(a, b);
+// CHECK: sqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) {
+// CHECK: test_vqshlq_u8
+  return vqshlq_u8(a, b);
+// CHECK: uqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vqshlq_u16(uint16x8_t a, int16x8_t b) {
+// CHECK: test_vqshlq_u16
+  return vqshlq_u16(a, b);
+// CHECK: uqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vqshlq_u32(uint32x4_t a, int32x4_t b) {
+// CHECK: test_vqshlq_u32
+  return vqshlq_u32(a, b);
+// CHECK: uqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) {
+// CHECK: test_vqshlq_u32
+  return vqshlq_u64(a, b);
+// CHECK: uqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) {
+// CHECK: test_vrshl_s8
+  return vrshl_s8(a, b);
+// CHECK: srshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) {
+// CHECK: test_vrshl_s16
+  return vrshl_s16(a, b);
+// CHECK: srshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) {
+// CHECK: test_vrshl_s32
+  return vrshl_s32(a, b);
+// CHECK: srshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) {
+// CHECK: test_vrshl_s64
+  return vrshl_s64(a, b);
+// CHECK: srshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) {
+// CHECK: test_vrshl_u8
+  return vrshl_u8(a, b);
+// CHECK: urshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) {
+// CHECK: test_vrshl_u16
+  return vrshl_u16(a, b);
+// CHECK: urshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) {
+// CHECK: test_vrshl_u32
+  return vrshl_u32(a, b);
+// CHECK: urshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) {
+// CHECK: test_vrshl_u64
+  return vrshl_u64(a, b);
+// CHECK: urshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) {
+// CHECK: test_vrshlq_s8
+  return vrshlq_s8(a, b);
+// CHECK: srshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) {
+// CHECK: test_vrshlq_s16
+  return vrshlq_s16(a, b);
+// CHECK: srshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) {
+// CHECK: test_vrshlq_s32
+  return vrshlq_s32(a, b);
+// CHECK: srshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) {
+// CHECK: test_vrshlq_s64
+  return vrshlq_s64(a, b);
+// CHECK: srshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) {
+// CHECK: test_vrshlq_u8
+  return vrshlq_u8(a, b);
+// CHECK: urshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) {
+// CHECK: test_vrshlq_u16
+  return vrshlq_u16(a, b);
+// CHECK: urshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) {
+// CHECK: test_vrshlq_u32
+  return vrshlq_u32(a, b);
+// CHECK: urshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) {
+// CHECK: test_vrshlq_u64
+  return vrshlq_u64(a, b);
+// CHECK: urshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+
+int8x8_t test_vqrshl_s8(int8x8_t a, int8x8_t b) {
+// CHECK: test_vqrshl_s8
+  return vqrshl_s8(a, b);
+// CHECK: sqrshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4_t test_vqrshl_s16(int16x4_t a, int16x4_t b) {
+// CHECK: test_vqrshl_s16
+  return vqrshl_s16(a, b);
+// CHECK: sqrshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vqrshl_s32(int32x2_t a, int32x2_t b) {
+// CHECK: test_vqrshl_s32
+  return vqrshl_s32(a, b);
+// CHECK: sqrshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int64x1_t test_vqrshl_s64(int64x1_t a, int64x1_t b) {
+// CHECK: test_vqrshl_s64
+  return vqrshl_s64(a, b);
+// CHECK: sqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint8x8_t test_vqrshl_u8(uint8x8_t a, int8x8_t b) {
+// CHECK: test_vqrshl_u8
+  return vqrshl_u8(a, b);
+// CHECK: uqrshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vqrshl_u16(uint16x4_t a, int16x4_t b) {
+// CHECK: test_vqrshl_u16
+  return vqrshl_u16(a, b);
+// CHECK: uqrshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vqrshl_u32(uint32x2_t a, int32x2_t b) {
+// CHECK: test_vqrshl_u32
+  return vqrshl_u32(a, b);
+// CHECK: uqrshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint64x1_t test_vqrshl_u64(uint64x1_t a, int64x1_t b) {
+// CHECK: test_vqrshl_u64
+  return vqrshl_u64(a, b);
+// CHECK: uqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+int8x16_t test_vqrshlq_s8(int8x16_t a, int8x16_t b) {
+// CHECK: test_vqrshlq_s8
+  return vqrshlq_s8(a, b);
+// CHECK: sqrshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vqrshlq_s16(int16x8_t a, int16x8_t b) {
+// CHECK: test_vqrshlq_s16
+  return vqrshlq_s16(a, b);
+// CHECK: sqrshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vqrshlq_s32(int32x4_t a, int32x4_t b) {
+// CHECK: test_vqrshlq_s32
+  return vqrshlq_s32(a, b);
+// CHECK: sqrshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int64x2_t test_vqrshlq_s64(int64x2_t a, int64x2_t b) {
+// CHECK: test_vqrshlq_s64
+  return vqrshlq_s64(a, b);
+// CHECK: sqrshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+// CHECK: test_vqrshlq_u8
+uint8x16_t test_vqrshlq_u8(uint8x16_t a, int8x16_t b) {
+  return vqrshlq_u8(a, b);
+// CHECK: uqrshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vqrshlq_u16(uint16x8_t a, int16x8_t b) {
+// CHECK: test_vqrshlq_u16
+  return vqrshlq_u16(a, b);
+// CHECK: uqrshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vqrshlq_u32(uint32x4_t a, int32x4_t b) {
+// CHECK: test_vqrshlq_u32
+  return vqrshlq_u32(a, b);
+// CHECK: uqrshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) {
+// CHECK: test_vqrshlq_u64
+  return vqrshlq_u64(a, b);
+// CHECK: uqrshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) {
+// CHECK: test_vmax_s8
+  return vmax_s8(a, b);
+// CHECK: smax {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) {
+// CHECK: test_vmax_s16
+  return vmax_s16(a, b);
+// CHECK: smax {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) {
+// CHECK: test_vmax_s32
+  return vmax_s32(a, b);
+// CHECK: smax {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) {
+// CHECK: test_vmax_u8
+  return vmax_u8(a, b);
+// CHECK: umax {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) {
+// CHECK: test_vmax_u16
+  return vmax_u16(a, b);
+// CHECK: umax {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) {
+// CHECK: test_vmax_u32
+  return vmax_u32(a, b);
+// CHECK: umax {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) {
+// CHECK: test_vmax_f32
+  return vmax_f32(a, b);
+// CHECK: fmax {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) {
+// CHECK: test_vmaxq_s8
+  return vmaxq_s8(a, b);
+// CHECK: smax {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) {
+// CHECK: test_vmaxq_s16
+  return vmaxq_s16(a, b);
+// CHECK: smax {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) {
+// CHECK: test_vmaxq_s32
+  return vmaxq_s32(a, b);
+// CHECK: smax {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) {
+// CHECK: test_vmaxq_u8
+  return vmaxq_u8(a, b);
+// CHECK: umax {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) {
+// CHECK: test_vmaxq_u16
+  return vmaxq_u16(a, b);
+// CHECK: umax {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) {
+// CHECK: test_vmaxq_u32
+  return vmaxq_u32(a, b);
+// CHECK: umax {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) {
+// CHECK: test_vmaxq_f32
+  return vmaxq_f32(a, b);
+// CHECK: fmax {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vmaxq_f64(float64x2_t a, float64x2_t b) {
+// CHECK: test_vmaxq_f64
+  return vmaxq_f64(a, b);
+// CHECK: fmax {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+
+int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) {
+// CHECK: test_vmin_s8
+  return vmin_s8(a, b);
+// CHECK: smin {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4_t test_vmin_s16(int16x4_t a, int16x4_t b) {
+// CHECK: test_vmin_s16
+  return vmin_s16(a, b);
+// CHECK: smin {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vmin_s32(int32x2_t a, int32x2_t b) {
+// CHECK: test_vmin_s32
+  return vmin_s32(a, b);
+// CHECK: smin {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x8_t test_vmin_u8(uint8x8_t a, uint8x8_t b) {
+// CHECK: test_vmin_u8
+  return vmin_u8(a, b);
+// CHECK: umin {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vmin_u16(uint16x4_t a, uint16x4_t b) {
+// CHECK: test_vmin_u16
+  return vmin_u16(a, b);
+// CHECK: umin {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vmin_u32(uint32x2_t a, uint32x2_t b) {
+// CHECK: test_vmin_u32
+  return vmin_u32(a, b);
+// CHECK: umin {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+float32x2_t test_vmin_f32(float32x2_t a, float32x2_t b) {
+// CHECK: test_vmin_f32
+  return vmin_f32(a, b);
+// CHECK: fmin {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int8x16_t test_vminq_s8(int8x16_t a, int8x16_t b) {
+// CHECK: test_vminq_s8
+  return vminq_s8(a, b);
+// CHECK: smin {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) {
+// CHECK: test_vminq_s16
+  return vminq_s16(a, b);
+// CHECK: smin {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vminq_s32(int32x4_t a, int32x4_t b) {
+// CHECK: test_vminq_s32
+  return vminq_s32(a, b);
+// CHECK: smin {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) {
+// CHECK: test_vminq_u8
+  return vminq_u8(a, b);
+// CHECK: umin {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vminq_u16(uint16x8_t a, uint16x8_t b) {
+// CHECK: test_vminq_u16
+  return vminq_u16(a, b);
+// CHECK: umin {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) {
+// CHECK: test_vminq_u32
+  return vminq_u32(a, b);
+// CHECK: umin {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float32x4_t test_vminq_f32(float32x4_t a, float32x4_t b) {
+// CHECK: test_vminq_f32
+  return vminq_f32(a, b);
+// CHECK: fmin {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vminq_f64(float64x2_t a, float64x2_t b) {
+// CHECK: test_vminq_f64
+  return vminq_f64(a, b);
+// CHECK: fmin {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+float32x2_t test_vmaxnm_f32(float32x2_t a, float32x2_t b) {
+// CHECK: test_vmaxnm_f32
+  return vmaxnm_f32(a, b);
+// CHECK: fmaxnm {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) {
+// CHECK: test_vmaxnmq_f32
+  return vmaxnmq_f32(a, b);
+// CHECK: fmaxnm {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vmaxnmq_f64(float64x2_t a, float64x2_t b) {
+// CHECK: test_vmaxnmq_f64
+  return vmaxnmq_f64(a, b);
+// CHECK: fmaxnm {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+float32x2_t test_vminnm_f32(float32x2_t a, float32x2_t b) {
+// CHECK: test_vminnm_f32
+  return vminnm_f32(a, b);
+// CHECK: fminnm {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) {
+// CHECK: test_vminnmq_f32
+  return vminnmq_f32(a, b);
+// CHECK: fminnm {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vminnmq_f64(float64x2_t a, float64x2_t b) {
+// CHECK: test_vminnmq_f64
+  return vminnmq_f64(a, b);
+// CHECK: fminnm {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) {
+// CHECK: test_vpmax_s8
+  return vpmax_s8(a, b);
+// CHECK: smaxp {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) {
+// CHECK: test_vpmax_s16
+  return vpmax_s16(a, b);
+// CHECK: smaxp {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) {
+// CHECK: test_vpmax_s32
+  return vpmax_s32(a, b);
+// CHECK: smaxp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) {
+// CHECK: test_vpmax_u8
+  return vpmax_u8(a, b);
+// CHECK: umaxp {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) {
+// CHECK: test_vpmax_u16
+  return vpmax_u16(a, b);
+// CHECK: umaxp {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) {
+// CHECK: test_vpmax_u32
+  return vpmax_u32(a, b);
+// CHECK: umaxp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) {
+// CHECK: test_vpmax_f32
+  return vpmax_f32(a, b);
+// CHECK: fmaxp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int8x16_t test_vpmaxq_s8(int8x16_t a, int8x16_t b) {
+// CHECK: test_vpmaxq_s8
+  return vpmaxq_s8(a, b);
+// CHECK: smaxp {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vpmaxq_s16(int16x8_t a, int16x8_t b) {
+// CHECK: test_vpmaxq_s16
+  return vpmaxq_s16(a, b);
+// CHECK: smaxp {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vpmaxq_s32(int32x4_t a, int32x4_t b) {
+// CHECK: test_vpmaxq_s32
+  return vpmaxq_s32(a, b);
+// CHECK: smaxp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint8x16_t test_vpmaxq_u8(uint8x16_t a, uint8x16_t b) {
+// CHECK: test_vpmaxq_u8
+  return vpmaxq_u8(a, b);
+// CHECK: umaxp {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vpmaxq_u16(uint16x8_t a, uint16x8_t b) {
+// CHECK: test_vpmaxq_u16
+  return vpmaxq_u16(a, b);
+// CHECK: umaxp {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vpmaxq_u32(uint32x4_t a, uint32x4_t b) {
+// CHECK: test_vpmaxq_u32
+  return vpmaxq_u32(a, b);
+// CHECK: umaxp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float32x4_t test_vpmaxq_f32(float32x4_t a, float32x4_t b) {
+// CHECK: test_vpmaxq_f32
+  return vpmaxq_f32(a, b);
+// CHECK: fmaxp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vpmaxq_f64(float64x2_t a, float64x2_t b) {
+// CHECK: test_vpmaxq_f64
+  return vpmaxq_f64(a, b);
+// CHECK: fmaxp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) {
+// CHECK: test_vpmin_s8
+  return vpmin_s8(a, b);
+// CHECK: sminp {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) {
+// CHECK: test_vpmin_s16
+  return vpmin_s16(a, b);
+// CHECK: sminp {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) {
+// CHECK: test_vpmin_s32
+  return vpmin_s32(a, b);
+// CHECK: sminp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) {
+// CHECK: test_vpmin_u8
+  return vpmin_u8(a, b);
+// CHECK: uminp {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) {
+// CHECK: test_vpmin_u16
+  return vpmin_u16(a, b);
+// CHECK: uminp {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) {
+// CHECK: test_vpmin_u32
+  return vpmin_u32(a, b);
+// CHECK: uminp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) {
+// CHECK: test_vpmin_f32
+  return vpmin_f32(a, b);
+// CHECK: fminp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int8x16_t test_vpminq_s8(int8x16_t a, int8x16_t b) {
+// CHECK: test_vpminq_s8
+  return vpminq_s8(a, b);
+// CHECK: sminp {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vpminq_s16(int16x8_t a, int16x8_t b) {
+// CHECK: test_vpminq_s16
+  return vpminq_s16(a, b);
+// CHECK: sminp {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vpminq_s32(int32x4_t a, int32x4_t b) {
+// CHECK: test_vpminq_s32
+  return vpminq_s32(a, b);
+// CHECK: sminp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint8x16_t test_vpminq_u8(uint8x16_t a, uint8x16_t b) {
+// CHECK: test_vpminq_u8
+  return vpminq_u8(a, b);
+// CHECK: uminp {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vpminq_u16(uint16x8_t a, uint16x8_t b) {
+// CHECK: test_vpminq_u16
+  return vpminq_u16(a, b);
+// CHECK: uminp {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vpminq_u32(uint32x4_t a, uint32x4_t b) {
+// CHECK: test_vpminq_u32
+  return vpminq_u32(a, b);
+// CHECK: uminp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float32x4_t test_vpminq_f32(float32x4_t a, float32x4_t b) {
+// CHECK: test_vpminq_f32
+  return vpminq_f32(a, b);
+// CHECK: fminp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vpminq_f64(float64x2_t a, float64x2_t b) {
+// CHECK: test_vpminq_f64
+  return vpminq_f64(a, b);
+// CHECK: fminp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+float32x2_t test_vpmaxnm_f32(float32x2_t a, float32x2_t b) {
+// CHECK: test_vpmaxnm_f32
+  return vpmaxnm_f32(a, b);
+// CHECK: fmaxnmp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+float32x4_t test_vpmaxnmq_f32(float32x4_t a, float32x4_t b) {
+// CHECK: test_vpmaxnmq_f32
+  return vpmaxnmq_f32(a, b);
+// CHECK: fmaxnmp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vpmaxnmq_f64(float64x2_t a, float64x2_t b) {
+// CHECK: test_vpmaxnmq_f64
+  return vpmaxnmq_f64(a, b);
+// CHECK: fmaxnmp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+float32x2_t test_vpminnm_f32(float32x2_t a, float32x2_t b) {
+// CHECK: test_vpminnm_f32
+  return vpminnm_f32(a, b);
+// CHECK: fminnmp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+float32x4_t test_vpminnmq_f32(float32x4_t a, float32x4_t b) {
+// CHECK: test_vpminnmq_f32
+  return vpminnmq_f32(a, b);
+// CHECK: fminnmp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vpminnmq_f64(float64x2_t a, float64x2_t b) {
+// CHECK: test_vpminnmq_f64
+  return vpminnmq_f64(a, b);
+// CHECK: fminnmp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) {
+// CHECK: test_vpadd_s8
+  return vpadd_s8(a, b);
+// CHECK: addp {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) {
+// CHECK: test_vpadd_s16
+  return vpadd_s16(a, b);
+// CHECK: addp {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) {
+// CHECK: test_vpadd_s32
+  return vpadd_s32(a, b);
+// CHECK: addp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) {
+// CHECK: test_vpadd_u8
+  return vpadd_u8(a, b);
+// CHECK: addp {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) {
+// CHECK: test_vpadd_u16
+  return vpadd_u16(a, b);
+// CHECK: addp {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) {
+// CHECK: test_vpadd_u32
+  return vpadd_u32(a, b);
+// CHECK: addp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) {
+// CHECK: test_vpadd_f32
+  return vpadd_f32(a, b);
+// CHECK: faddp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int8x16_t test_vpaddq_s8(int8x16_t a, int8x16_t b) {
+// CHECK: test_vpaddq_s8
+  return vpaddq_s8(a, b);
+// CHECK: addp {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vpaddq_s16(int16x8_t a, int16x8_t b) {
+// CHECK: test_vpaddq_s16
+  return vpaddq_s16(a, b);
+// CHECK: addp {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vpaddq_s32(int32x4_t a, int32x4_t b) {
+// CHECK: test_vpaddq_s32
+  return vpaddq_s32(a, b);
+// CHECK: addp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint8x16_t test_vpaddq_u8(uint8x16_t a, uint8x16_t b) {
+// CHECK: test_vpaddq_u8
+  return vpaddq_u8(a, b);
+// CHECK: addp {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vpaddq_u16(uint16x8_t a, uint16x8_t b) {
+// CHECK: test_vpaddq_u16
+  return vpaddq_u16(a, b);
+// CHECK: addp {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vpaddq_u32(uint32x4_t a, uint32x4_t b) {
+// CHECK: test_vpaddq_u32
+  return vpaddq_u32(a, b);
+// CHECK: addp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float32x4_t test_vpaddq_f32(float32x4_t a, float32x4_t b) {
+// CHECK: test_vpaddq_f32
+  return vpaddq_f32(a, b);
+// CHECK: faddp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vpaddq_f64(float64x2_t a, float64x2_t b) {
+// CHECK: test_vpaddq_f64
+  return vpaddq_f64(a, b);
+// CHECK: faddp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+int16x4_t test_vqdmulh_s16(int16x4_t a, int16x4_t b) {
+// CHECK: test_vqdmulh_s16
+  return vqdmulh_s16(a, b);
+// CHECK: sqdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vqdmulh_s32(int32x2_t a, int32x2_t b) {
+// CHECK: test_vqdmulh_s32
+  return vqdmulh_s32(a, b);
+// CHECK: sqdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b) {
+// CHECK: test_vqdmulhq_s16
+  return vqdmulhq_s16(a, b);
+// CHECK: sqdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vqdmulhq_s32(int32x4_t a, int32x4_t b) {
+// CHECK: test_vqdmulhq_s32
+  return vqdmulhq_s32(a, b);
+// CHECK: sqdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int16x4_t test_vqrdmulh_s16(int16x4_t a, int16x4_t b) {
+// CHECK: test_vqrdmulh_s16
+  return vqrdmulh_s16(a, b);
+// CHECK: sqrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vqrdmulh_s32(int32x2_t a, int32x2_t b) {
+// CHECK: test_vqrdmulh_s32
+  return vqrdmulh_s32(a, b);
+// CHECK: sqrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b) {
+// CHECK: test_vqrdmulhq_s16
+  return vqrdmulhq_s16(a, b);
+// CHECK: sqrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vqrdmulhq_s32(int32x4_t a, int32x4_t b) {
+// CHECK: test_vqrdmulhq_s32
+  return vqrdmulhq_s32(a, b);
+// CHECK: sqrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+
+float32x2_t test_vmulx_f32(float32x2_t a, float32x2_t b) {
+// CHECK: test_vmulx_f32
+  return vmulx_f32(a, b);
+// CHECK: fmulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+float32x4_t test_vmulxq_f32(float32x4_t a, float32x4_t b) {
+// CHECK: test_vmulxq_f32
+  return vmulxq_f32(a, b);
+// CHECK: fmulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vmulxq_f64(float64x2_t a, float64x2_t b) {
+// CHECK: test_vmulxq_f64
+  return vmulxq_f64(a, b);
+// CHECK: fmulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+int8x8_t test_vshl_n_s8(int8x8_t a) {
+// CHECK: test_vshl_n_s8
+  return vshl_n_s8(a, 3);
+// CHECK: shl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+}
+
+int16x4_t test_vshl_n_s16(int16x4_t a) {
+// CHECK: test_vshl_n_s16
+  return vshl_n_s16(a, 3);
+// CHECK: shl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+}
+
+int32x2_t test_vshl_n_s32(int32x2_t a) {
+// CHECK: test_vshl_n_s32
+  return vshl_n_s32(a, 3);
+// CHECK: shl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+}
+
+int8x16_t test_vshlq_n_s8(int8x16_t a) {
+// CHECK: test_vshlq_n_s8
+  return vshlq_n_s8(a, 3);
+// CHECK: shl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+}
+
+int16x8_t test_vshlq_n_s16(int16x8_t a) {
+// CHECK: test_vshlq_n_s16
+  return vshlq_n_s16(a, 3);
+// CHECK: shl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+}
+
+int32x4_t test_vshlq_n_s32(int32x4_t a) {
+// CHECK: test_vshlq_n_s32
+  return vshlq_n_s32(a, 3);
+// CHECK: shl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+}
+
+int64x2_t test_vshlq_n_s64(int64x2_t a) {
+// CHECK: test_vshlq_n_s64
+  return vshlq_n_s64(a, 3);
+// CHECK: shl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+}
+
+int8x8_t test_vshl_n_u8(int8x8_t a) {
+// CHECK: test_vshl_n_u8
+  return vshl_n_u8(a, 3);
+// CHECK: shl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+}
+
+int16x4_t test_vshl_n_u16(int16x4_t a) {
+// CHECK: test_vshl_n_u16
+  return vshl_n_u16(a, 3);
+// CHECK: shl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+}
+
+int32x2_t test_vshl_n_u32(int32x2_t a) {
+// CHECK: test_vshl_n_u32
+  return vshl_n_u32(a, 3);
+// CHECK: shl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+}
+
+int8x16_t test_vshlq_n_u8(int8x16_t a) {
+// CHECK: test_vshlq_n_u8
+  return vshlq_n_u8(a, 3);
+// CHECK: shl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+}
+
+int16x8_t test_vshlq_n_u16(int16x8_t a) {
+// CHECK: test_vshlq_n_u16
+  return vshlq_n_u16(a, 3);
+// CHECK: shl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+}
+
+int32x4_t test_vshlq_n_u32(int32x4_t a) {
+// CHECK: test_vshlq_n_u32
+  return vshlq_n_u32(a, 3);
+// CHECK: shl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+}
+
+int64x2_t test_vshlq_n_u64(int64x2_t a) {
+// CHECK: test_vshlq_n_u64
+  return vshlq_n_u64(a, 3);
+// CHECK: shl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+}
+
+int8x8_t test_vshr_n_s8(int8x8_t a) {
+  // CHECK: test_vshr_n_s8
+  return vshr_n_s8(a, 3);
+  // CHECK: sshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+}
+
+int16x4_t test_vshr_n_s16(int16x4_t a) {
+  // CHECK: test_vshr_n_s16
+  return vshr_n_s16(a, 3);
+  // CHECK: sshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+}
+
+int32x2_t test_vshr_n_s32(int32x2_t a) {
+  // CHECK: test_vshr_n_s32
+  return vshr_n_s32(a, 3);
+  // CHECK: sshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+}
+
+int8x16_t test_vshrq_n_s8(int8x16_t a) {
+  // CHECK: test_vshrq_n_s8
+  return vshrq_n_s8(a, 3);
+  // CHECK: sshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+}
+
+int16x8_t test_vshrq_n_s16(int16x8_t a) {
+  // CHECK: test_vshrq_n_s16
+  return vshrq_n_s16(a, 3);
+  // CHECK: sshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+}
+
+int32x4_t test_vshrq_n_s32(int32x4_t a) {
+  // CHECK: test_vshrq_n_s32
+  return vshrq_n_s32(a, 3);
+  // CHECK: sshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+}
+
+int64x2_t test_vshrq_n_s64(int64x2_t a) {
+  // CHECK: test_vshrq_n_s64
+  return vshrq_n_s64(a, 3);
+  // CHECK: sshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+}
+
+int8x8_t test_vshr_n_u8(int8x8_t a) {
+  // CHECK: test_vshr_n_u8
+  return vshr_n_u8(a, 3);
+  // CHECK: ushr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+}
+
+int16x4_t test_vshr_n_u16(int16x4_t a) {
+  // CHECK: test_vshr_n_u16
+  return vshr_n_u16(a, 3);
+  // CHECK: ushr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+}
+
+int32x2_t test_vshr_n_u32(int32x2_t a) {
+  // CHECK: test_vshr_n_u32
+  return vshr_n_u32(a, 3);
+  // CHECK: ushr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+}
+
+int8x16_t test_vshrq_n_u8(int8x16_t a) {
+  // CHECK: test_vshrq_n_u8
+  return vshrq_n_u8(a, 3);
+  // CHECK: ushr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+}
+
+int16x8_t test_vshrq_n_u16(int16x8_t a) {
+  // CHECK: test_vshrq_n_u16
+  return vshrq_n_u16(a, 3);
+  // CHECK: ushr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+}
+
+int32x4_t test_vshrq_n_u32(int32x4_t a) {
+  // CHECK: test_vshrq_n_u32
+  return vshrq_n_u32(a, 3);
+  // CHECK: ushr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+}
+
+int64x2_t test_vshrq_n_u64(int64x2_t a) {
+  // CHECK: test_vshrq_n_u64
+  return vshrq_n_u64(a, 3);
+  // CHECK: ushr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+}
+
+int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vsra_n_s8
+  return vsra_n_s8(a, b, 3);
+  // CHECK: ssra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+}
+
+int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vsra_n_s16
+  return vsra_n_s16(a, b, 3);
+  // CHECK: ssra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+}
+
+int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vsra_n_s32
+  return vsra_n_s32(a, b, 3);
+  // CHECK: ssra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+}
+
+int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vsraq_n_s8
+  return vsraq_n_s8(a, b, 3);
+  // CHECK: ssra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+}
+
+int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vsraq_n_s16
+  return vsraq_n_s16(a, b, 3);
+  // CHECK: ssra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+}
+
+int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vsraq_n_s32
+  return vsraq_n_s32(a, b, 3);
+  // CHECK: ssra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+}
+
+int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) {
+  // CHECK: test_vsraq_n_s64
+  return vsraq_n_s64(a, b, 3);
+  // CHECK: ssra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+}
+
+int8x8_t test_vsra_n_u8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vsra_n_u8
+  return vsra_n_u8(a, b, 3);
+  // CHECK: usra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+}
+
+int16x4_t test_vsra_n_u16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vsra_n_u16
+  return vsra_n_u16(a, b, 3);
+  // CHECK: usra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+}
+
+int32x2_t test_vsra_n_u32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vsra_n_u32
+  return vsra_n_u32(a, b, 3);
+  // CHECK: usra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+}
+
+int8x16_t test_vsraq_n_u8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vsraq_n_u8
+  return vsraq_n_u8(a, b, 3);
+  // CHECK: usra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+}
+
+int16x8_t test_vsraq_n_u16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vsraq_n_u16
+  return vsraq_n_u16(a, b, 3);
+  // CHECK: usra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+}
+
+int32x4_t test_vsraq_n_u32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vsraq_n_u32
+  return vsraq_n_u32(a, b, 3);
+  // CHECK: usra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+}
+
+int64x2_t test_vsraq_n_u64(int64x2_t a, int64x2_t b) {
+  // CHECK: test_vsraq_n_u64
+  return vsraq_n_u64(a, b, 3);
+  // CHECK: usra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+}
+
+int8x8_t test_vrshr_n_s8(int8x8_t a) {
+  // CHECK: test_vrshr_n_s8
+  return vrshr_n_s8(a, 3);
+  // CHECK: srshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+}
+
+int16x4_t test_vrshr_n_s16(int16x4_t a) {
+  // CHECK: test_vrshr_n_s16
+  return vrshr_n_s16(a, 3);
+  // CHECK: srshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+}
+
+int32x2_t test_vrshr_n_s32(int32x2_t a) {
+  // CHECK: test_vrshr_n_s32
+  return vrshr_n_s32(a, 3);
+  // CHECK: srshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+}
+
+int8x16_t test_vrshrq_n_s8(int8x16_t a) {
+  // CHECK: test_vrshrq_n_s8
+  return vrshrq_n_s8(a, 3);
+  // CHECK: srshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+}
+
+int16x8_t test_vrshrq_n_s16(int16x8_t a) {
+  // CHECK: test_vrshrq_n_s16
+  return vrshrq_n_s16(a, 3);
+  // CHECK: srshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+}
+
+int32x4_t test_vrshrq_n_s32(int32x4_t a) {
+  // CHECK: test_vrshrq_n_s32
+  return vrshrq_n_s32(a, 3);
+  // CHECK: srshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+}
+
+int64x2_t test_vrshrq_n_s64(int64x2_t a) {
+  // CHECK: test_vrshrq_n_s64
+  return vrshrq_n_s64(a, 3);
+  // CHECK: srshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+}
+
+int8x8_t test_vrshr_n_u8(int8x8_t a) {
+  // CHECK: test_vrshr_n_u8
+  return vrshr_n_u8(a, 3);
+  // CHECK: urshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+}
+
+int16x4_t test_vrshr_n_u16(int16x4_t a) {
+  // CHECK: test_vrshr_n_u16
+  return vrshr_n_u16(a, 3);
+  // CHECK: urshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+}
+
+int32x2_t test_vrshr_n_u32(int32x2_t a) {
+  // CHECK: test_vrshr_n_u32
+  return vrshr_n_u32(a, 3);
+  // CHECK: urshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+}
+
+int8x16_t test_vrshrq_n_u8(int8x16_t a) {
+  // CHECK: test_vrshrq_n_u8
+  return vrshrq_n_u8(a, 3);
+  // CHECK: urshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+}
+
+int16x8_t test_vrshrq_n_u16(int16x8_t a) {
+  // CHECK: test_vrshrq_n_u16
+  return vrshrq_n_u16(a, 3);
+  // CHECK: urshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+}
+
+int32x4_t test_vrshrq_n_u32(int32x4_t a) {
+  // CHECK: test_vrshrq_n_u32
+  return vrshrq_n_u32(a, 3);
+  // CHECK: urshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+}
+
+int64x2_t test_vrshrq_n_u64(int64x2_t a) {
+  // CHECK: test_vrshrq_n_u64
+  return vrshrq_n_u64(a, 3);
+  // CHECK: urshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+}
+
+int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vrsra_n_s8
+  return vrsra_n_s8(a, b, 3);
+  // CHECK: srsra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+}
+
+int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vrsra_n_s16
+  return vrsra_n_s16(a, b, 3);
+  // CHECK: srsra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+}
+
+int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vrsra_n_s32
+  return vrsra_n_s32(a, b, 3);
+  // CHECK: srsra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+}
+
+int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vrsraq_n_s8
+  return vrsraq_n_s8(a, b, 3);
+  // CHECK: srsra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+}
+
+int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vrsraq_n_s16
+  return vrsraq_n_s16(a, b, 3);
+  // CHECK: srsra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+}
+
+int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vrsraq_n_s32
+  return vrsraq_n_s32(a, b, 3);
+  // CHECK: srsra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+}
+
+int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) {
+  // CHECK: test_vrsraq_n_s64
+  return vrsraq_n_s64(a, b, 3);
+  // CHECK: srsra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+}
+
+int8x8_t test_vrsra_n_u8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vrsra_n_u8
+  return vrsra_n_u8(a, b, 3);
+  // CHECK: ursra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+}
+
+int16x4_t test_vrsra_n_u16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vrsra_n_u16
+  return vrsra_n_u16(a, b, 3);
+  // CHECK: ursra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+}
+
+int32x2_t test_vrsra_n_u32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vrsra_n_u32
+  return vrsra_n_u32(a, b, 3);
+  // CHECK: ursra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+}
+
+int8x16_t test_vrsraq_n_u8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vrsraq_n_u8
+  return vrsraq_n_u8(a, b, 3);
+  // CHECK: ursra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+}
+
+int16x8_t test_vrsraq_n_u16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vrsraq_n_u16
+  return vrsraq_n_u16(a, b, 3);
+  // CHECK: ursra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+}
+
+int32x4_t test_vrsraq_n_u32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vrsraq_n_u32
+  return vrsraq_n_u32(a, b, 3);
+  // CHECK: ursra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+}
+
+int64x2_t test_vrsraq_n_u64(int64x2_t a, int64x2_t b) {
+  // CHECK: test_vrsraq_n_u64
+  return vrsraq_n_u64(a, b, 3);
+  // CHECK: ursra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+}
+
+int8x8_t test_vsri_n_s8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vsri_n_s8
+  return vsri_n_s8(a, b, 3);
+  // CHECK: sri {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+}
+
+int16x4_t test_vsri_n_s16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vsri_n_s16
+  return vsri_n_s16(a, b, 3);
+  // CHECK: sri {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+}
+
+int32x2_t test_vsri_n_s32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vsri_n_s32
+  return vsri_n_s32(a, b, 3);
+  // CHECK: sri {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+}
+
+int8x16_t test_vsriq_n_s8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vsriq_n_s8
+  return vsriq_n_s8(a, b, 3);
+  // CHECK: sri {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+}
+
+int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vsriq_n_s16
+  return vsriq_n_s16(a, b, 3);
+  // CHECK: sri {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+}
+
+int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vsriq_n_s32
+  return vsriq_n_s32(a, b, 3);
+  // CHECK: sri {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+}
+
+int64x2_t test_vsriq_n_s64(int64x2_t a, int64x2_t b) {
+  // CHECK: test_vsriq_n_s64
+  return vsriq_n_s64(a, b, 3);
+  // CHECK: sri {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+}
+
+int8x8_t test_vsri_n_u8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vsri_n_u8
+  return vsri_n_u8(a, b, 3);
+  // CHECK: sri {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+}
+
+int16x4_t test_vsri_n_u16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vsri_n_u16
+  return vsri_n_u16(a, b, 3);
+  // CHECK: sri {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+}
+
+int32x2_t test_vsri_n_u32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vsri_n_u32
+  return vsri_n_u32(a, b, 3);
+  // CHECK: sri {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+}
+
+int8x16_t test_vsriq_n_u8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vsriq_n_u8
+  return vsriq_n_u8(a, b, 3);
+  // CHECK: sri {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+}
+
+int16x8_t test_vsriq_n_u16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vsriq_n_u16
+  return vsriq_n_u16(a, b, 3);
+  // CHECK: sri {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+}
+
+int32x4_t test_vsriq_n_u32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vsriq_n_u32
+  return vsriq_n_u32(a, b, 3);
+  // CHECK: sri {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+}
+
+int64x2_t test_vsriq_n_u64(int64x2_t a, int64x2_t b) {
+  // CHECK: test_vsriq_n_u64
+  return vsriq_n_u64(a, b, 3);
+  // CHECK: sri {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+}
+
+poly8x8_t test_vsri_n_p8(poly8x8_t a, poly8x8_t b) {
+  // CHECK: test_vsri_n_p8
+  return vsri_n_p8(a, b, 3);
+  // CHECK: sri {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+}
+
+poly16x4_t test_vsri_n_p16(poly16x4_t a, poly16x4_t b) {
+  // CHECK: test_vsri_n_p16
+  return vsri_n_p16(a, b, 15);
+  // CHECK: sri {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #15
+}
+
+poly8x16_t test_vsriq_n_p8(poly8x16_t a, poly8x16_t b) {
+  // CHECK: test_vsriq_n_p8
+  return vsriq_n_p8(a, b, 3);
+  // CHECK: sri {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+}
+
+poly16x8_t test_vsriq_n_p16(poly16x8_t a, poly16x8_t b) {
+  // CHECK: test_vsriq_n_p16
+  return vsriq_n_p16(a, b, 15);
+  // CHECK: sri {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #15
+}
+
+int8x8_t test_vsli_n_s8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vsli_n_s8
+  return vsli_n_s8(a, b, 3);
+  // CHECK: sli {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+}
+
+int16x4_t test_vsli_n_s16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vsli_n_s16
+  return vsli_n_s16(a, b, 3);
+  // CHECK: sli {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+}
+
+int32x2_t test_vsli_n_s32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vsli_n_s32
+  return vsli_n_s32(a, b, 3);
+  // CHECK: sli {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+}
+
+int8x16_t test_vsliq_n_s8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vsliq_n_s8
+  return vsliq_n_s8(a, b, 3);
+  // CHECK: sli {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+}
+
+int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vsliq_n_s16
+  return vsliq_n_s16(a, b, 3);
+  // CHECK: sli {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+}
+
+int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vsliq_n_s32
+  return vsliq_n_s32(a, b, 3);
+  // CHECK: sli {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+}
+
+int64x2_t test_vsliq_n_s64(int64x2_t a, int64x2_t b) {
+  // CHECK: test_vsliq_n_s64
+  return vsliq_n_s64(a, b, 3);
+  // CHECK: sli {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+}
+
+uint8x8_t test_vsli_n_u8(uint8x8_t a, uint8x8_t b) {
+  // CHECK: test_vsli_n_u8
+  return vsli_n_u8(a, b, 3);
+  // CHECK: sli {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+}
+
+uint16x4_t test_vsli_n_u16(uint16x4_t a, uint16x4_t b) {
+  // CHECK: test_vsli_n_u16
+  return vsli_n_u16(a, b, 3);
+  // CHECK: sli {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+}
+
+uint32x2_t test_vsli_n_u32(uint32x2_t a, uint32x2_t b) {
+  // CHECK: test_vsli_n_u32
+  return vsli_n_u32(a, b, 3);
+  // CHECK: sli {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+}
+
+uint8x16_t test_vsliq_n_u8(uint8x16_t a, uint8x16_t b) {
+  // CHECK: test_vsliq_n_u8
+  return vsliq_n_u8(a, b, 3);
+  // CHECK: sli {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+}
+
+uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vsliq_n_u16
+  return vsliq_n_u16(a, b, 3);
+  // CHECK: sli {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+}
+
+uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vsliq_n_u32
+  return vsliq_n_u32(a, b, 3);
+  // CHECK: sli {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+}
+
+uint64x2_t test_vsliq_n_u64(uint64x2_t a, uint64x2_t b) {
+  // CHECK: test_vsliq_n_u64
+  return vsliq_n_u64(a, b, 3);
+  // CHECK: sli {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+}
+
+poly8x8_t test_vsli_n_p8(poly8x8_t a, poly8x8_t b) {
+  // CHECK: test_vsli_n_p8
+  return vsli_n_p8(a, b, 3);
+  // CHECK: sli {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+}
+
+poly16x4_t test_vsli_n_p16(poly16x4_t a, poly16x4_t b) {
+  // CHECK: test_vsli_n_p16
+  return vsli_n_p16(a, b, 15);
+  // CHECK: sli {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #15
+}
+
+poly8x16_t test_vsliq_n_p8(poly8x16_t a, poly8x16_t b) {
+  // CHECK: test_vsliq_n_p8
+  return vsliq_n_p8(a, b, 3);
+  // CHECK: sli {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+}
+
+poly16x8_t test_vsliq_n_p16(poly16x8_t a, poly16x8_t b) {
+  // CHECK: test_vsliq_n_p16
+  return vsliq_n_p16(a, b, 15);
+  // CHECK: sli {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #15
+}
+
+int8x8_t test_vqshlu_n_s8(int8x8_t a) {
+  // CHECK: test_vqshlu_n_s8
+  return vqshlu_n_s8(a, 3);
+  // CHECK: sqshlu {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+}
+
+int16x4_t test_vqshlu_n_s16(int16x4_t a) {
+  // CHECK: test_vqshlu_n_s16
+  return vqshlu_n_s16(a, 3);
+  // CHECK: sqshlu {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+}
+
+int32x2_t test_vqshlu_n_s32(int32x2_t a) {
+  // CHECK: test_vqshlu_n_s32
+  return vqshlu_n_s32(a, 3);
+  // CHECK: sqshlu {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+}
+
+int8x16_t test_vqshluq_n_s8(int8x16_t a) {
+  // CHECK: test_vqshluq_n_s8
+  return vqshluq_n_s8(a, 3);
+  // CHECK: sqshlu {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+}
+
+int16x8_t test_vqshluq_n_s16(int16x8_t a) {
+  // CHECK: test_vqshluq_n_s16
+  return vqshluq_n_s16(a, 3);
+  // CHECK: sqshlu {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+}
+
+int32x4_t test_vqshluq_n_s32(int32x4_t a) {
+  // CHECK: test_vqshluq_n_s32
+  return vqshluq_n_s32(a, 3);
+  // CHECK: sqshlu {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+}
+
+int64x2_t test_vqshluq_n_s64(int64x2_t a) {
+  // CHECK: test_vqshluq_n_s64
+  return vqshluq_n_s64(a, 3);
+  // CHECK: sqshlu {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+}
+
+int8x8_t test_vshrn_n_s16(int16x8_t a) {
+  // CHECK: test_vshrn_n_s16
+  return vshrn_n_s16(a, 3);
+  // CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+}
+
+int16x4_t test_vshrn_n_s32(int32x4_t a) {
+  // CHECK: test_vshrn_n_s32
+  return vshrn_n_s32(a, 9);
+  // CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+}
+
+int32x2_t test_vshrn_n_s64(int64x2_t a) {
+  // CHECK: test_vshrn_n_s64
+  return vshrn_n_s64(a, 19);
+  // CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+}
+
+uint8x8_t test_vshrn_n_u16(uint16x8_t a) {
+  // CHECK: test_vshrn_n_u16
+  return vshrn_n_u16(a, 3);
+  // CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+}
+
+uint16x4_t test_vshrn_n_u32(uint32x4_t a) {
+  // CHECK: test_vshrn_n_u32
+  return vshrn_n_u32(a, 9);
+  // CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+}
+
+uint32x2_t test_vshrn_n_u64(uint64x2_t a) {
+  // CHECK: test_vshrn_n_u64
+  return vshrn_n_u64(a, 19);
+  // CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+}
+
+int8x16_t test_vshrn_high_n_s16(int8x8_t a, int16x8_t b) {
+  // CHECK: test_vshrn_high_n_s16
+  return vshrn_high_n_s16(a, b, 3);
+  // CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+}
+
+int16x8_t test_vshrn_high_n_s32(int16x4_t a, int32x4_t b) {
+  // CHECK: test_vshrn_high_n_s32
+  return vshrn_high_n_s32(a, b, 9);
+  // CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+}
+
+int32x4_t test_vshrn_high_n_s64(int32x2_t a, int64x2_t b) {
+  // CHECK: test_vshrn_high_n_s64
+  return vshrn_high_n_s64(a, b, 19);
+  // CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+}
+
+uint8x16_t test_vshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
+  // CHECK: test_vshrn_high_n_u16
+  return vshrn_high_n_u16(a, b, 3);
+  // CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+}
+
+uint16x8_t test_vshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
+  // CHECK: test_vshrn_high_n_u32
+  return vshrn_high_n_u32(a, b, 9);
+  // CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+}
+
+uint32x4_t test_vshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
+  // CHECK: test_vshrn_high_n_u64
+  return vshrn_high_n_u64(a, b, 19);
+  // CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+}
+
+int8x8_t test_vqshrun_n_s16(int16x8_t a) {
+  // CHECK: test_vqshrun_n_s16
+  return vqshrun_n_s16(a, 3);
+  // CHECK: sqshrun {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+}
+
+int16x4_t test_vqshrun_n_s32(int32x4_t a) {
+  // CHECK: test_vqshrun_n_s32
+  return vqshrun_n_s32(a, 9);
+  // CHECK: sqshrun {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+}
+
+int32x2_t test_vqshrun_n_s64(int64x2_t a) {
+  // CHECK: test_vqshrun_n_s64
+  return vqshrun_n_s64(a, 19);
+  // CHECK: sqshrun {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+}
+
+int8x16_t test_vqshrun_high_n_s16(int8x8_t a, int16x8_t b) {
+  // CHECK: test_vqshrun_high_n_s16
+  return vqshrun_high_n_s16(a, b, 3);
+  // CHECK: sqshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+}
+
+int16x8_t test_vqshrun_high_n_s32(int16x4_t a, int32x4_t b) {
+  // CHECK: test_vqshrun_high_n_s32
+  return vqshrun_high_n_s32(a, b, 9);
+  // CHECK: sqshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+}
+
+int32x4_t test_vqshrun_high_n_s64(int32x2_t a, int64x2_t b) {
+  // CHECK: test_vqshrun_high_n_s64
+  return vqshrun_high_n_s64(a, b, 19);
+  // CHECK: sqshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+}
+
+int8x8_t test_vrshrn_n_s16(int16x8_t a) {
+  // CHECK: test_vrshrn_n_s16
+  return vrshrn_n_s16(a, 3);
+  // CHECK: rshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+}
+
+int16x4_t test_vrshrn_n_s32(int32x4_t a) {
+  // CHECK: test_vrshrn_n_s32
+  return vrshrn_n_s32(a, 9);
+  // CHECK: rshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+}
+
+int32x2_t test_vrshrn_n_s64(int64x2_t a) {
+  // CHECK: test_vrshrn_n_s64
+  return vrshrn_n_s64(a, 19);
+  // CHECK: rshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+}
+
+uint8x8_t test_vrshrn_n_u16(uint16x8_t a) {
+  // CHECK: test_vrshrn_n_u16
+  return vrshrn_n_u16(a, 3);
+  // CHECK: rshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+}
+
+uint16x4_t test_vrshrn_n_u32(uint32x4_t a) {
+  // CHECK: test_vrshrn_n_u32
+  return vrshrn_n_u32(a, 9);
+  // CHECK: rshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+}
+
+uint32x2_t test_vrshrn_n_u64(uint64x2_t a) {
+  // CHECK: test_vrshrn_n_u64
+  return vrshrn_n_u64(a, 19);
+  // CHECK: rshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+}
+
+int8x16_t test_vrshrn_high_n_s16(int8x8_t a, int16x8_t b) {
+  // CHECK: test_vrshrn_high_n_s16
+  return vrshrn_high_n_s16(a, b, 3);
+  // CHECK: rshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+}
+
+int16x8_t test_vrshrn_high_n_s32(int16x4_t a, int32x4_t b) {
+  // CHECK: test_vrshrn_high_n_s32
+  return vrshrn_high_n_s32(a, b, 9);
+  // CHECK: rshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+}
+
+int32x4_t test_vrshrn_high_n_s64(int32x2_t a, int64x2_t b) {
+  // CHECK: test_vrshrn_high_n_s64
+  return vrshrn_high_n_s64(a, b, 19);
+  // CHECK: rshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+}
+
+uint8x16_t test_vrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
+  // CHECK: test_vrshrn_high_n_u16
+  return vrshrn_high_n_u16(a, b, 3);
+  // CHECK: rshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+}
+
+uint16x8_t test_vrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
+  // CHECK: test_vrshrn_high_n_u32
+  return vrshrn_high_n_u32(a, b, 9);
+  // CHECK: rshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+}
+
+uint32x4_t test_vrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
+  // CHECK: test_vrshrn_high_n_u64
+  return vrshrn_high_n_u64(a, b, 19);
+  // CHECK: rshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+}
+
+int8x8_t test_vqrshrun_n_s16(int16x8_t a) {
+  // CHECK: test_vqrshrun_n_s16
+  return vqrshrun_n_s16(a, 3);
+  // CHECK: sqrshrun {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+}
+
+int16x4_t test_vqrshrun_n_s32(int32x4_t a) {
+  // CHECK: test_vqrshrun_n_s32
+  return vqrshrun_n_s32(a, 9);
+  // CHECK: sqrshrun {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+}
+
+int32x2_t test_vqrshrun_n_s64(int64x2_t a) {
+  // CHECK: test_vqrshrun_n_s64
+  return vqrshrun_n_s64(a, 19);
+  // CHECK: sqrshrun {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+}
+
+int8x16_t test_vqrshrun_high_n_s16(int8x8_t a, int16x8_t b) {
+  // CHECK: test_vqrshrun_high_n_s16
+  return vqrshrun_high_n_s16(a, b, 3);
+  // CHECK: sqrshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+}
+
+int16x8_t test_vqrshrun_high_n_s32(int16x4_t a, int32x4_t b) {
+  // CHECK: test_vqrshrun_high_n_s32
+  return vqrshrun_high_n_s32(a, b, 9);
+  // CHECK: sqrshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+}
+
+int32x4_t test_vqrshrun_high_n_s64(int32x2_t a, int64x2_t b) {
+  // CHECK: test_vqrshrun_high_n_s64
+  return vqrshrun_high_n_s64(a, b, 19);
+  // CHECK: sqrshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+}
+
+int8x8_t test_vqshrn_n_s16(int16x8_t a) {
+  // CHECK: test_vqshrn_n_s16
+  return vqshrn_n_s16(a, 3);
+  // CHECK: sqshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+}
+
+int16x4_t test_vqshrn_n_s32(int32x4_t a) {
+  // CHECK: test_vqshrn_n_s32
+  return vqshrn_n_s32(a, 9);
+  // CHECK: sqshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+}
+
+int32x2_t test_vqshrn_n_s64(int64x2_t a) {
+  // CHECK: test_vqshrn_n_s64
+  return vqshrn_n_s64(a, 19);
+  // CHECK: sqshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+}
+
+uint8x8_t test_vqshrn_n_u16(uint16x8_t a) {
+  // CHECK: test_vqshrn_n_u16
+  return vqshrn_n_u16(a, 3);
+  // CHECK: uqshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+}
+
+uint16x4_t test_vqshrn_n_u32(uint32x4_t a) {
+  // CHECK: test_vqshrn_n_u32
+  return vqshrn_n_u32(a, 9);
+  // CHECK: uqshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+}
+
+uint32x2_t test_vqshrn_n_u64(uint64x2_t a) {
+  // CHECK: test_vqshrn_n_u64
+  return vqshrn_n_u64(a, 19);
+  // CHECK: uqshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+}
+
+int8x16_t test_vqshrn_high_n_s16(int8x8_t a, int16x8_t b) {
+  // CHECK: test_vqshrn_high_n_s16
+  return vqshrn_high_n_s16(a, b, 3);
+  // CHECK: sqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+}
+
+int16x8_t test_vqshrn_high_n_s32(int16x4_t a, int32x4_t b) {
+  // CHECK: test_vqshrn_high_n_s32
+  return vqshrn_high_n_s32(a, b, 9);
+  // CHECK: sqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+}
+
+int32x4_t test_vqshrn_high_n_s64(int32x2_t a, int64x2_t b) {
+  // CHECK: test_vqshrn_high_n_s64
+  return vqshrn_high_n_s64(a, b, 19);
+  // CHECK: sqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+}
+
+uint8x16_t test_vqshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
+  // CHECK: test_vqshrn_high_n_u16
+  return vqshrn_high_n_u16(a, b, 3);
+  // CHECK: uqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+}
+
+uint16x8_t test_vqshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
+  // CHECK: test_vqshrn_high_n_u32
+  return vqshrn_high_n_u32(a, b, 9);
+  // CHECK: uqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+}
+
+uint32x4_t test_vqshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
+  // CHECK: test_vqshrn_high_n_u64
+  return vqshrn_high_n_u64(a, b, 19);
+  // CHECK: uqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+}
+
+int8x8_t test_vqrshrn_n_s16(int16x8_t a) {
+  // CHECK: test_vqrshrn_n_s16
+  return vqrshrn_n_s16(a, 3);
+  // CHECK: sqrshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+}
+
+int16x4_t test_vqrshrn_n_s32(int32x4_t a) {
+  // CHECK: test_vqrshrn_n_s32
+  return vqrshrn_n_s32(a, 9);
+  // CHECK: sqrshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+}
+
+int32x2_t test_vqrshrn_n_s64(int64x2_t a) {
+  // CHECK: test_vqrshrn_n_s64
+  return vqrshrn_n_s64(a, 19);
+  // CHECK: sqrshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+}
+
+uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) {
+  // CHECK: test_vqrshrn_n_u16
+  return vqrshrn_n_u16(a, 3);
+  // CHECK: uqrshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+}
+
+uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) {
+  // CHECK: test_vqrshrn_n_u32
+  return vqrshrn_n_u32(a, 9);
+  // CHECK: uqrshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+}
+
+uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) {
+  // CHECK: test_vqrshrn_n_u64
+  return vqrshrn_n_u64(a, 19);
+  // CHECK: uqrshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+}
+
+int8x16_t test_vqrshrn_high_n_s16(int8x8_t a, int16x8_t b) {
+  // CHECK: test_vqrshrn_high_n_s16
+  return vqrshrn_high_n_s16(a, b, 3);
+  // CHECK: sqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+}
+
+int16x8_t test_vqrshrn_high_n_s32(int16x4_t a, int32x4_t b) {
+  // CHECK: test_vqrshrn_high_n_s32
+  return vqrshrn_high_n_s32(a, b, 9);
+  // CHECK: sqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+}
+
+int32x4_t test_vqrshrn_high_n_s64(int32x2_t a, int64x2_t b) {
+  // CHECK: test_vqrshrn_high_n_s64
+  return vqrshrn_high_n_s64(a, b, 19);
+  // CHECK: sqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+}
+
+uint8x16_t test_vqrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
+  // CHECK: test_vqrshrn_high_n_u16
+  return vqrshrn_high_n_u16(a, b, 3);
+  // CHECK: uqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+}
+
+uint16x8_t test_vqrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
+  // CHECK: test_vqrshrn_high_n_u32
+  return vqrshrn_high_n_u32(a, b, 9);
+  // CHECK: uqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+}
+
+uint32x4_t test_vqrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
+  // CHECK: test_vqrshrn_high_n_u64
+  return vqrshrn_high_n_u64(a, b, 19);
+  // CHECK: uqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+}
+
+int16x8_t test_vshll_n_s8(int8x8_t a) {
+// CHECK: test_vshll_n_s8
+  return vshll_n_s8(a, 3);
+// CHECK: sshll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #3
+}
+
+int32x4_t test_vshll_n_s16(int16x4_t a) {
+// CHECK: test_vshll_n_s16
+  return vshll_n_s16(a, 9);
+// CHECK: sshll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #9
+}
+
+int64x2_t test_vshll_n_s32(int32x2_t a) {
+// CHECK: test_vshll_n_s32
+  return vshll_n_s32(a, 19);
+// CHECK: sshll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #19
+}
+
+uint16x8_t test_vshll_n_u8(uint8x8_t a) {
+// CHECK: test_vshll_n_u8
+  return vshll_n_u8(a, 3);
+// CHECK: ushll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #3
+}
+
+uint32x4_t test_vshll_n_u16(uint16x4_t a) {
+// CHECK: test_vshll_n_u16
+  return vshll_n_u16(a, 9);
+// CHECK: ushll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #9
+}
+
+uint64x2_t test_vshll_n_u32(uint32x2_t a) {
+// CHECK: test_vshll_n_u32
+  return vshll_n_u32(a, 19);
+// CHECK: ushll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #19
+}
+
+int16x8_t test_vshll_high_n_s8(int8x16_t a) {
+// CHECK: test_vshll_high_n_s8
+  return vshll_high_n_s8(a, 3);
+// CHECK: sshll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #3
+}
+
+int32x4_t test_vshll_high_n_s16(int16x8_t a) {
+// CHECK: test_vshll_high_n_s16
+  return vshll_high_n_s16(a, 9);
+// CHECK: sshll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #9
+}
+
+int64x2_t test_vshll_high_n_s32(int32x4_t a) {
+// CHECK: test_vshll_high_n_s32
+  return vshll_high_n_s32(a, 19);
+// CHECK: sshll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #19
+}
+
+uint16x8_t test_vshll_high_n_u8(uint8x16_t a) {
+// CHECK: test_vshll_high_n_u8
+  return vshll_high_n_u8(a, 3);
+// CHECK: ushll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #3
+}
+
+uint32x4_t test_vshll_high_n_u16(uint16x8_t a) {
+// CHECK: test_vshll_high_n_u16
+  return vshll_high_n_u16(a, 9);
+// CHECK: ushll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #9
+}
+
+uint64x2_t test_vshll_high_n_u32(uint32x4_t a) {
+// CHECK: test_vshll_high_n_u32
+  return vshll_high_n_u32(a, 19);
+// CHECK: ushll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #19
+}
+
+int16x8_t test_vmovl_s8(int8x8_t a) {
+// CHECK: test_vmovl_s8
+  return vmovl_s8(a);
+// CHECK: sshll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #0
+}
+
+int32x4_t test_vmovl_s16(int16x4_t a) {
+// CHECK: test_vmovl_s16
+  return vmovl_s16(a);
+// CHECK: sshll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #0
+}
+
+int64x2_t test_vmovl_s32(int32x2_t a) {
+// CHECK: test_vmovl_s32
+  return vmovl_s32(a);
+// CHECK: sshll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #0
+}
+
+uint16x8_t test_vmovl_u8(uint8x8_t a) {
+// CHECK: test_vmovl_u8
+  return vmovl_u8(a);
+// CHECK: ushll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #0
+}
+
+uint32x4_t test_vmovl_u16(uint16x4_t a) {
+// CHECK: test_vmovl_u16
+  return vmovl_u16(a);
+// CHECK: ushll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #0
+}
+
+uint64x2_t test_vmovl_u32(uint32x2_t a) {
+// CHECK: test_vmovl_u32
+  return vmovl_u32(a);
+// CHECK: ushll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #0
+}
+
+int16x8_t test_vmovl_high_s8(int8x16_t a) {
+// CHECK: test_vmovl_high_s8
+  return vmovl_high_s8(a);
+// CHECK: sshll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #0
+}
+
+int32x4_t test_vmovl_high_s16(int16x8_t a) {
+// CHECK: test_vmovl_high_s16
+  return vmovl_high_s16(a);
+// CHECK: sshll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #0
+}
+
+int64x2_t test_vmovl_high_s32(int32x4_t a) {
+// CHECK: test_vmovl_high_s32
+  return vmovl_high_s32(a);
+// CHECK: sshll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #0
+}
+
+uint16x8_t test_vmovl_high_u8(uint8x16_t a) {
+// CHECK: test_vmovl_high_u8
+  return vmovl_high_u8(a);
+// CHECK: ushll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #0
+}
+
+uint32x4_t test_vmovl_high_u16(uint16x8_t a) {
+// CHECK: test_vmovl_high_u16
+  return vmovl_high_u16(a);
+// CHECK: ushll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #0
+}
+
+uint64x2_t test_vmovl_high_u32(uint32x4_t a) {
+// CHECK: test_vmovl_high_u32
+  return vmovl_high_u32(a);
+// CHECK: ushll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #0
+}
+
+float32x2_t test_vcvt_n_f32_s32(int32x2_t a) {
+  // CHECK: test_vcvt_n_f32_s32
+  return vcvt_n_f32_s32(a, 31);
+  // CHECK: scvtf {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
+}
+
+float32x4_t test_vcvtq_n_f32_s32(int32x4_t a) {
+  // CHECK: test_vcvtq_n_f32_s32
+  return vcvtq_n_f32_s32(a, 31);
+  // CHECK: scvtf {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
+}
+
+float64x2_t test_vcvtq_n_f64_s64(int64x2_t a) {
+  // CHECK: test_vcvtq_n_f64_s64
+  return vcvtq_n_f64_s64(a, 50);
+  // CHECK: scvtf {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
+}
+
+float32x2_t test_vcvt_n_f32_u32(uint32x2_t a) {
+  // CHECK: test_vcvt_n_f32_u32
+  return vcvt_n_f32_u32(a, 31);
+  // CHECK: ucvtf {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
+}
+
+float32x4_t test_vcvtq_n_f32_u32(uint32x4_t a) {
+  // CHECK: test_vcvtq_n_f32_u32
+  return vcvtq_n_f32_u32(a, 31);
+  // CHECK: ucvtf {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
+}
+
+float64x2_t test_vcvtq_n_f64_u64(uint64x2_t a) {
+  // CHECK: test_vcvtq_n_f64_u64
+  return vcvtq_n_f64_u64(a, 50);
+  // CHECK: ucvtf {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
+}
+
+int32x2_t test_vcvt_n_s32_f32(float32x2_t a) {
+  // CHECK: test_vcvt_n_s32_f32
+  return vcvt_n_s32_f32(a, 31);
+  // CHECK: fcvtzs {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
+}
+
+int32x4_t test_vcvtq_n_s32_f32(float32x4_t a) {
+  // CHECK: test_vcvtq_n_s32_f32
+  return vcvtq_n_s32_f32(a, 31);
+  // CHECK: fcvtzs {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
+}
+
+int64x2_t test_vcvtq_n_s64_f64(float64x2_t a) {
+  // CHECK: test_vcvtq_n_s64_f64
+  return vcvtq_n_s64_f64(a, 50);
+  // CHECK: fcvtzs {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
+}
+
+uint32x2_t test_vcvt_n_u32_f32(float32x2_t a) {
+  // CHECK: test_vcvt_n_u32_f32
+  return vcvt_n_u32_f32(a, 31);
+  // CHECK: fcvtzu {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
+}
+
+uint32x4_t test_vcvtq_n_u32_f32(float32x4_t a) {
+  // CHECK: test_vcvt_n_u32_f32
+  return vcvtq_n_u32_f32(a, 31);
+  // CHECK: fcvtzu {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
+}
+
+uint64x2_t test_vcvtq_n_u64_f64(float64x2_t a) {
+  // CHECK: test_vcvtq_n_u64_f64
+  return vcvtq_n_u64_f64(a, 50);
+  // CHECK: fcvtzu {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
+}
+
+int16x8_t test_vaddl_s8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vaddl_s8
+  return vaddl_s8(a, b);
+  // CHECK: saddl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+
+int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vaddl_s16
+  return vaddl_s16(a, b);
+  // CHECK: saddl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+
+int64x2_t test_vaddl_s32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vaddl_s32
+  return vaddl_s32(a, b);
+  // CHECK: saddl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+
+uint16x8_t test_vaddl_u8(uint8x8_t a, uint8x8_t b) {
+  // CHECK: test_vaddl_u8
+  return vaddl_u8(a, b);
+  // CHECK: uaddl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+
+uint32x4_t test_vaddl_u16(uint16x4_t a, uint16x4_t b) {
+  // CHECK: test_vaddl_u16
+  return vaddl_u16(a, b);
+  // CHECK: uaddl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+
+uint64x2_t test_vaddl_u32(uint32x2_t a, uint32x2_t b) {
+  // CHECK: test_vaddl_u32
+  return vaddl_u32(a, b);
+  // CHECK: uaddl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+
+int16x8_t test_vaddl_high_s8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vaddl_high_s8
+  return vaddl_high_s8(a, b);
+  // CHECK: saddl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+
+int32x4_t test_vaddl_high_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vaddl_high_s16
+  return vaddl_high_s16(a, b);
+  // CHECK: saddl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+int64x2_t test_vaddl_high_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vaddl_high_s32
+  return vaddl_high_s32(a, b);
+  // CHECK: saddl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+uint16x8_t test_vaddl_high_u8(uint8x16_t a, uint8x16_t b) {
+  // CHECK: test_vaddl_high_u8
+  return vaddl_high_u8(a, b);
+  // CHECK: uaddl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+
+uint32x4_t test_vaddl_high_u16(uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vaddl_high_u16
+  return vaddl_high_u16(a, b);
+  // CHECK: uaddl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+uint64x2_t test_vaddl_high_u32(uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vaddl_high_u32
+  return vaddl_high_u32(a, b);
+  // CHECK: uaddl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int16x8_t test_vaddw_s8(int16x8_t a, int8x8_t b) {
+  // CHECK: test_vaddw_s8
+  return vaddw_s8(a, b);
+  // CHECK: saddw {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8b
+}
+
+int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) {
+  // CHECK: test_vaddw_s16
+  return vaddw_s16(a, b);
+  // CHECK: saddw {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4h
+}
+
+int64x2_t test_vaddw_s32(int64x2_t a, int32x2_t b) {
+  // CHECK: test_vaddw_s32
+  return vaddw_s32(a, b);
+  // CHECK: saddw {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2s
+}
+
+uint16x8_t test_vaddw_u8(uint16x8_t a, uint8x8_t b) {
+  // CHECK: test_vaddw_u8
+  return vaddw_u8(a, b);
+  // CHECK: uaddw {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8b
+}
+
+uint32x4_t test_vaddw_u16(uint32x4_t a, uint16x4_t b) {
+  // CHECK: test_vaddw_u16
+  return vaddw_u16(a, b);
+  // CHECK: uaddw {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4h
+}
+
+uint64x2_t test_vaddw_u32(uint64x2_t a, uint32x2_t b) {
+  // CHECK: test_vaddw_u32
+  return vaddw_u32(a, b);
+  // CHECK: uaddw {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2s
+}
+
+int16x8_t test_vaddw_high_s8(int16x8_t a, int8x16_t b) {
+  // CHECK: test_vaddw_high_s8
+  return vaddw_high_s8(a, b);
+  // CHECK: saddw2 {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.16b
+}
+
+int32x4_t test_vaddw_high_s16(int32x4_t a, int16x8_t b) {
+  // CHECK: test_vaddw_high_s16
+  return vaddw_high_s16(a, b);
+  // CHECK: saddw2 {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.8h
+}
+
+int64x2_t test_vaddw_high_s32(int64x2_t a, int32x4_t b) {
+  // CHECK: test_vaddw_high_s32
+  return vaddw_high_s32(a, b);
+  // CHECK: saddw2 {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.4s
+}
+
+uint16x8_t test_vaddw_high_u8(uint16x8_t a, uint8x16_t b) {
+  // CHECK: test_vaddw_high_u8
+  return vaddw_high_u8(a, b);
+  // CHECK: uaddw2 {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.16b
+}
+
+uint32x4_t test_vaddw_high_u16(uint32x4_t a, uint16x8_t b) {
+  // CHECK: test_vaddw_high_u16
+  return vaddw_high_u16(a, b);
+  // CHECK: uaddw2 {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.8h
+}
+
+uint64x2_t test_vaddw_high_u32(uint64x2_t a, uint32x4_t b) {
+  // CHECK: test_vaddw_high_u32
+  return vaddw_high_u32(a, b);
+  // CHECK: uaddw2 {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.4s
+}
+
+int16x8_t test_vsubl_s8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vsubl_s8
+  return vsubl_s8(a, b);
+  // CHECK: ssubl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+
+int32x4_t test_vsubl_s16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vsubl_s16
+  return vsubl_s16(a, b);
+  // CHECK: ssubl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+
+int64x2_t test_vsubl_s32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vsubl_s32
+  return vsubl_s32(a, b);
+  // CHECK: ssubl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+
+uint16x8_t test_vsubl_u8(uint8x8_t a, uint8x8_t b) {
+  // CHECK: test_vsubl_u8
+  return vsubl_u8(a, b);
+  // CHECK: usubl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+
+uint32x4_t test_vsubl_u16(uint16x4_t a, uint16x4_t b) {
+  // CHECK: test_vsubl_u16
+  return vsubl_u16(a, b);
+  // CHECK: usubl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+
+uint64x2_t test_vsubl_u32(uint32x2_t a, uint32x2_t b) {
+  // CHECK: test_vsubl_u32
+  return vsubl_u32(a, b);
+  // CHECK: usubl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+
+int16x8_t test_vsubl_high_s8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vsubl_high_s8
+  return vsubl_high_s8(a, b);
+  // CHECK: ssubl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+
+int32x4_t test_vsubl_high_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vsubl_high_s16
+  return vsubl_high_s16(a, b);
+  // CHECK: ssubl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+int64x2_t test_vsubl_high_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vsubl_high_s32
+  return vsubl_high_s32(a, b);
+  // CHECK: ssubl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+uint16x8_t test_vsubl_high_u8(uint8x16_t a, uint8x16_t b) {
+  // CHECK: test_vsubl_high_u8
+  return vsubl_high_u8(a, b);
+  // CHECK: usubl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+
+uint32x4_t test_vsubl_high_u16(uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vsubl_high_u16
+  return vsubl_high_u16(a, b);
+  // CHECK: usubl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+uint64x2_t test_vsubl_high_u32(uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vsubl_high_u32
+  return vsubl_high_u32(a, b);
+  // CHECK: usubl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int16x8_t test_vsubw_s8(int16x8_t a, int8x8_t b) {
+  // CHECK: test_vsubw_s8
+  return vsubw_s8(a, b);
+  // CHECK: ssubw {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8b
+}
+
+int32x4_t test_vsubw_s16(int32x4_t a, int16x4_t b) {
+  // CHECK: test_vsubw_s16
+  return vsubw_s16(a, b);
+  // CHECK: ssubw {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4h
+}
+
+int64x2_t test_vsubw_s32(int64x2_t a, int32x2_t b) {
+  // CHECK: test_vsubw_s32
+  return vsubw_s32(a, b);
+  // CHECK: ssubw {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2s
+}
+
+uint16x8_t test_vsubw_u8(uint16x8_t a, uint8x8_t b) {
+  // CHECK: test_vsubw_u8
+  return vsubw_u8(a, b);
+  // CHECK: usubw {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8b
+}
+
+uint32x4_t test_vsubw_u16(uint32x4_t a, uint16x4_t b) {
+  // CHECK: test_vsubw_u16
+  return vsubw_u16(a, b);
+  // CHECK: usubw {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4h
+}
+
+uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) {
+  // CHECK: test_vsubw_u32
+  return vsubw_u32(a, b);
+  // CHECK: usubw {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2s
+}
+
+int16x8_t test_vsubw_high_s8(int16x8_t a, int8x16_t b) {
+  // CHECK: test_vsubw_high_s8
+  return vsubw_high_s8(a, b);
+  // CHECK: ssubw2 {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.16b
+}
+
+int32x4_t test_vsubw_high_s16(int32x4_t a, int16x8_t b) {
+  // CHECK: test_vsubw_high_s16
+  return vsubw_high_s16(a, b);
+  // CHECK: ssubw2 {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.8h
+}
+
+int64x2_t test_vsubw_high_s32(int64x2_t a, int32x4_t b) {
+  // CHECK: test_vsubw_high_s32
+  return vsubw_high_s32(a, b);
+  // CHECK: ssubw2 {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.4s
+}
+
+uint16x8_t test_vsubw_high_u8(uint16x8_t a, uint8x16_t b) {
+  // CHECK: test_vsubw_high_u8
+  return vsubw_high_u8(a, b);
+  // CHECK: usubw2 {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.16b
+}
+
+uint32x4_t test_vsubw_high_u16(uint32x4_t a, uint16x8_t b) {
+  // CHECK: test_vsubw_high_u16
+  return vsubw_high_u16(a, b);
+  // CHECK: usubw2 {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.8h
+}
+
+uint64x2_t test_vsubw_high_u32(uint64x2_t a, uint32x4_t b) {
+  // CHECK: test_vsubw_high_u32
+  return vsubw_high_u32(a, b);
+  // CHECK: usubw2 {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.4s
+}
+
+int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vaddhn_s16
+  return vaddhn_s16(a, b);
+  // CHECK: addhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vaddhn_s32
+  return vaddhn_s32(a, b);
+  // CHECK: addhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) {
+  // CHECK: test_vaddhn_s64
+  return vaddhn_s64(a, b);
+  // CHECK: addhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vaddhn_u16
+  return vaddhn_u16(a, b);
+  // CHECK: addhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vaddhn_u32
+  return vaddhn_u32(a, b);
+  // CHECK: addhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) {
+  // CHECK: test_vaddhn_u64
+  return vaddhn_u64(a, b);
+  // CHECK: addhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+int8x16_t test_vaddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
+  // CHECK: test_vaddhn_high_s16
+  return vaddhn_high_s16(r, a, b);
+  // CHECK: addhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+int16x8_t test_vaddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
+  // CHECK: test_vaddhn_high_s32
+  return vaddhn_high_s32(r, a, b);
+  // CHECK: addhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int32x4_t test_vaddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
+  // CHECK: test_vaddhn_high_s64
+  return vaddhn_high_s64(r, a, b);
+  // CHECK: addhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+uint8x16_t test_vaddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vaddhn_high_u16
+  return vaddhn_high_u16(r, a, b);
+  // CHECK: addhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+uint16x8_t test_vaddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vaddhn_high_u32
+  return vaddhn_high_u32(r, a, b);
+  // CHECK: addhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+uint32x4_t test_vaddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
+  // CHECK: test_vaddhn_high_u64
+  return vaddhn_high_u64(r, a, b);
+  // CHECK: addhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vraddhn_s16
+  return vraddhn_s16(a, b);
+  // CHECK: raddhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vraddhn_s32
+  return vraddhn_s32(a, b);
+  // CHECK: raddhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) {
+  // CHECK: test_vraddhn_s64
+  return vraddhn_s64(a, b);
+  // CHECK: raddhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vraddhn_u16
+  return vraddhn_u16(a, b);
+  // CHECK: raddhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vraddhn_u32
+  return vraddhn_u32(a, b);
+  // CHECK: raddhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) {
+  // CHECK: test_vraddhn_u64
+  return vraddhn_u64(a, b);
+  // CHECK: raddhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+int8x16_t test_vraddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
+  // CHECK: test_vraddhn_high_s16
+  return vraddhn_high_s16(r, a, b);
+  // CHECK: raddhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+int16x8_t test_vraddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
+  // CHECK: test_vraddhn_high_s32
+  return vraddhn_high_s32(r, a, b);
+  // CHECK: raddhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int32x4_t test_vraddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
+  // CHECK: test_vraddhn_high_s64
+  return vraddhn_high_s64(r, a, b);
+  // CHECK: raddhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+uint8x16_t test_vraddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vraddhn_high_u16
+  return vraddhn_high_u16(r, a, b);
+  // CHECK: raddhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+uint16x8_t test_vraddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vraddhn_high_u32
+  return vraddhn_high_u32(r, a, b);
+  // CHECK: raddhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+uint32x4_t test_vraddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
+  // CHECK: test_vraddhn_high_u64
+  return vraddhn_high_u64(r, a, b);
+  // CHECK: raddhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vsubhn_s16
+  return vsubhn_s16(a, b);
+  // CHECK: subhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vsubhn_s32
+  return vsubhn_s32(a, b);
+  // CHECK: subhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) {
+  // CHECK: test_vsubhn_s64
+  return vsubhn_s64(a, b);
+  // CHECK: subhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vsubhn_u16
+  return vsubhn_u16(a, b);
+  // CHECK: subhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vsubhn_u32
+  return vsubhn_u32(a, b);
+  // CHECK: subhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) {
+  // CHECK: test_vsubhn_u64
+  return vsubhn_u64(a, b);
+  // CHECK: subhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+int8x16_t test_vsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
+  // CHECK: test_vsubhn_high_s16
+  return vsubhn_high_s16(r, a, b);
+  // CHECK: subhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+int16x8_t test_vsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
+  // CHECK: test_vsubhn_high_s32
+  return vsubhn_high_s32(r, a, b);
+  // CHECK: subhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int32x4_t test_vsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
+  // CHECK: test_vsubhn_high_s64
+  return vsubhn_high_s64(r, a, b);
+  // CHECK: subhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+uint8x16_t test_vsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vsubhn_high_u16
+  return vsubhn_high_u16(r, a, b);
+  // CHECK: subhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+uint16x8_t test_vsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vsubhn_high_u32
+  return vsubhn_high_u32(r, a, b);
+  // CHECK: subhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+uint32x4_t test_vsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
+  // CHECK: test_vsubhn_high_u64
+  return vsubhn_high_u64(r, a, b);
+  // CHECK: subhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vrsubhn_s16
+  return vrsubhn_s16(a, b);
+  // CHECK: rsubhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vrsubhn_s32
+  return vrsubhn_s32(a, b);
+  // CHECK: rsubhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) {
+  // CHECK: test_vrsubhn_s64
+  return vrsubhn_s64(a, b);
+  // CHECK: rsubhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vrsubhn_u16
+  return vrsubhn_u16(a, b);
+  // CHECK: rsubhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vrsubhn_u32
+  return vrsubhn_u32(a, b);
+  // CHECK: rsubhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+uint32x2_t test_vrsubhn_u64(uint64x2_t a, uint64x2_t b) {
+  // CHECK: test_vrsubhn_u64
+  return vrsubhn_u64(a, b);
+  // CHECK: rsubhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+int8x16_t test_vrsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
+  // CHECK: test_vrsubhn_high_s16
+  return vrsubhn_high_s16(r, a, b);
+  // CHECK: rsubhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+int16x8_t test_vrsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
+  // CHECK: test_vrsubhn_high_s32
+  return vrsubhn_high_s32(r, a, b);
+  // CHECK: rsubhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int32x4_t test_vrsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
+  // CHECK: test_vrsubhn_high_s64
+  return vrsubhn_high_s64(r, a, b);
+  // CHECK: rsubhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+uint8x16_t test_vrsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vrsubhn_high_u16
+  return vrsubhn_high_u16(r, a, b);
+  // CHECK: rsubhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+uint16x8_t test_vrsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vrsubhn_high_u32
+  return vrsubhn_high_u32(r, a, b);
+  // CHECK: rsubhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+uint32x4_t test_vrsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
+  // CHECK: test_vrsubhn_high_u64
+  return vrsubhn_high_u64(r, a, b);
+  // CHECK: rsubhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vabdl_s8
+  return vabdl_s8(a, b);
+  // CHECK: sabdl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vabdl_s16
+  return vabdl_s16(a, b);
+  // CHECK: sabdl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+int64x2_t test_vabdl_s32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vabdl_s32
+  return vabdl_s32(a, b);
+  // CHECK: sabdl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) {
+  // CHECK: test_vabdl_u8
+  return vabdl_u8(a, b);
+  // CHECK: uabdl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+uint32x4_t test_vabdl_u16(uint16x4_t a, uint16x4_t b) {
+  // CHECK: test_vabdl_u16
+  return vabdl_u16(a, b);
+  // CHECK: uabdl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) {
+  // CHECK: test_vabdl_u32
+  return vabdl_u32(a, b);
+  // CHECK: uabdl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+
+int16x8_t test_vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
+  // CHECK: test_vabal_s8
+  return vabal_s8(a, b, c);
+  // CHECK: sabal {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
+  // CHECK: test_vabal_s16
+  return vabal_s16(a, b, c);
+  // CHECK: sabal {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
+  // CHECK: test_vabal_s32
+  return vabal_s32(a, b, c);
+  // CHECK: sabal {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+uint16x8_t test_vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
+  // CHECK: test_vabal_u8
+  return vabal_u8(a, b, c);
+  // CHECK: uabal {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+uint32x4_t test_vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
+  // CHECK: test_vabal_u16
+  return vabal_u16(a, b, c);
+  // CHECK: uabal {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
+  // CHECK: test_vabal_u32
+  return vabal_u32(a, b, c);
+  // CHECK: uabal {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+
+int16x8_t test_vabdl_high_s8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vabdl_high_s8
+  return vabdl_high_s8(a, b);
+  // CHECK: sabdl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+int32x4_t test_vabdl_high_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vabdl_high_s16
+  return vabdl_high_s16(a, b);
+  // CHECK: sabdl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+int64x2_t test_vabdl_high_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vabdl_high_s32
+  return vabdl_high_s32(a, b);
+  // CHECK: sabdl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+uint16x8_t test_vabdl_high_u8(uint8x16_t a, uint8x16_t b) {
+  // CHECK: test_vabdl_high_u8
+  return vabdl_high_u8(a, b);
+  // CHECK: uabdl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+uint32x4_t test_vabdl_high_u16(uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vabdl_high_u16
+  return vabdl_high_u16(a, b);
+  // CHECK: uabdl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+uint64x2_t test_vabdl_high_u32(uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vabdl_high_u32
+  return vabdl_high_u32(a, b);
+  // CHECK: uabdl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int16x8_t test_vabal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
+  // CHECK: test_vabal_high_s8
+  return vabal_high_s8(a, b, c);
+  // CHECK: sabal2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+int32x4_t test_vabal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
+  // CHECK: test_vabal_high_s16
+  return vabal_high_s16(a, b, c);
+  // CHECK: sabal2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+int64x2_t test_vabal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
+  // CHECK: test_vabal_high_s32
+  return vabal_high_s32(a, b, c);
+  // CHECK: sabal2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+uint16x8_t test_vabal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
+  // CHECK: test_vabal_high_u8
+  return vabal_high_u8(a, b, c);
+  // CHECK: uabal2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+uint32x4_t test_vabal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
+  // CHECK: test_vabal_high_u16
+  return vabal_high_u16(a, b, c);
+  // CHECK: uabal2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+uint64x2_t test_vabal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
+  // CHECK: test_vabal_high_u32
+  return vabal_high_u32(a, b, c);
+  // CHECK: uabal2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vmull_s8
+  return vmull_s8(a, b);
+  // CHECK: smull {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vmull_s16
+  return vmull_s16(a, b);
+  // CHECK: smull {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+int64x2_t test_vmull_s32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vmull_s32
+  return vmull_s32(a, b);
+  // CHECK: smull {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) {
+  // CHECK: test_vmull_u8
+  return vmull_u8(a, b);
+  // CHECK: umull {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) {
+  // CHECK: test_vmull_u16
+  return vmull_u16(a, b);
+  // CHECK: umull {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) {
+  // CHECK: test_vmull_u32
+  return vmull_u32(a, b);
+  // CHECK: umull {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+
+int16x8_t test_vmull_high_s8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vmull_high_s8
+  return vmull_high_s8(a, b);
+  // CHECK: smull2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+int32x4_t test_vmull_high_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vmull_high_s16
+  return vmull_high_s16(a, b);
+  // CHECK: smull2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+int64x2_t test_vmull_high_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vmull_high_s32
+  return vmull_high_s32(a, b);
+  // CHECK: smull2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+uint16x8_t test_vmull_high_u8(uint8x16_t a, uint8x16_t b) {
+  // CHECK: test_vmull_high_u8
+  return vmull_high_u8(a, b);
+  // CHECK: umull2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+uint32x4_t test_vmull_high_u16(uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vmull_high_u16
+  return vmull_high_u16(a, b);
+  // CHECK: umull2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+uint64x2_t test_vmull_high_u32(uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vmull_high_u32
+  return vmull_high_u32(a, b);
+  // CHECK: umull2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
+  // CHECK: test_vmlal_s8
+  return vmlal_s8(a, b, c);
+  // CHECK: smlal {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
+  // CHECK: test_vmlal_s16
+  return vmlal_s16(a, b, c);
+  // CHECK: smlal {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
+  // CHECK: test_vmlal_s32
+  return vmlal_s32(a, b, c);
+  // CHECK: smlal {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
+  // CHECK: test_vmlal_u8
+  return vmlal_u8(a, b, c);
+  // CHECK: umlal {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
+  // CHECK: test_vmlal_u16
+  return vmlal_u16(a, b, c);
+  // CHECK: umlal {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
+  // CHECK: test_vmlal_u32
+  return vmlal_u32(a, b, c);
+  // CHECK: umlal {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+
+int16x8_t test_vmlal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
+  // CHECK: test_vmlal_high_s8
+  return vmlal_high_s8(a, b, c);
+  // CHECK: smlal2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+int32x4_t test_vmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
+  // CHECK: test_vmlal_high_s16
+  return vmlal_high_s16(a, b, c);
+  // CHECK: smlal2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+int64x2_t test_vmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
+  // CHECK: test_vmlal_high_s32
+  return vmlal_high_s32(a, b, c);
+  // CHECK: smlal2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+uint16x8_t test_vmlal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
+  // CHECK: test_vmlal_high_u8
+  return vmlal_high_u8(a, b, c);
+  // CHECK: umlal2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+uint32x4_t test_vmlal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
+  // CHECK: test_vmlal_high_u16
+  return vmlal_high_u16(a, b, c);
+  // CHECK: umlal2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+uint64x2_t test_vmlal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
+  // CHECK: test_vmlal_high_u32
+  return vmlal_high_u32(a, b, c);
+  // CHECK: umlal2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
+  // CHECK: test_vmlsl_s8
+  return vmlsl_s8(a, b, c);
+  // CHECK: smlsl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
+  // CHECK: test_vmlsl_s16
+  return vmlsl_s16(a, b, c);
+  // CHECK: smlsl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
+  // CHECK: test_vmlsl_s32
+  return vmlsl_s32(a, b, c);
+  // CHECK: smlsl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
+  // CHECK: test_vmlsl_u8
+  return vmlsl_u8(a, b, c);
+  // CHECK: umlsl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
+  // CHECK: test_vmlsl_u16
+  return vmlsl_u16(a, b, c);
+  // CHECK: umlsl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
+  // CHECK: test_vmlsl_u32
+  return vmlsl_u32(a, b, c);
+  // CHECK: umlsl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+
+int16x8_t test_vmlsl_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
+  // CHECK: test_vmlsl_high_s8
+  return vmlsl_high_s8(a, b, c);
+  // CHECK: smlsl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+int32x4_t test_vmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
+  // CHECK: test_vmlsl_high_s16
+  return vmlsl_high_s16(a, b, c);
+  // CHECK: smlsl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+int64x2_t test_vmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
+  // CHECK: test_vmlsl_high_s32
+  return vmlsl_high_s32(a, b, c);
+  // CHECK: smlsl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+uint16x8_t test_vmlsl_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
+  // CHECK: test_vmlsl_high_u8
+  return vmlsl_high_u8(a, b, c);
+  // CHECK: umlsl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+uint32x4_t test_vmlsl_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
+  // CHECK: test_vmlsl_high_u16
+  return vmlsl_high_u16(a, b, c);
+  // CHECK: umlsl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+uint64x2_t test_vmlsl_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
+  // CHECK: test_vmlsl_high_u32
+  return vmlsl_high_u32(a, b, c);
+  // CHECK: umlsl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vqdmull_s16
+  return vqdmull_s16(a, b);
+  // CHECK: sqdmull {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vqdmull_s32
+  return vqdmull_s32(a, b);
+  // CHECK: sqdmull {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+
+int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
+  // CHECK: test_vqdmlal_s16
+  return vqdmlal_s16(a, b, c);
+  // CHECK: sqdmlal {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+
+int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
+  // CHECK: test_vqdmlal_s32
+  return vqdmlal_s32(a, b, c);
+  // CHECK: sqdmlal {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+
+int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
+  // CHECK: test_vqdmlsl_s16
+  return vqdmlsl_s16(a, b, c);
+  // CHECK: sqdmlsl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+
+int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
+  // CHECK: test_vqdmlsl_s32
+  return vqdmlsl_s32(a, b, c);
+  // CHECK: sqdmlsl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+
+int32x4_t test_vqdmull_high_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vqdmull_high_s16
+  return vqdmull_high_s16(a, b);
+  // CHECK: sqdmull2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+int64x2_t test_vqdmull_high_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vqdmull_high_s32
+  return vqdmull_high_s32(a, b);
+  // CHECK: sqdmull2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int32x4_t test_vqdmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
+  // CHECK: test_vqdmlal_high_s16
+  return vqdmlal_high_s16(a, b, c);
+  // CHECK: sqdmlal2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+int64x2_t test_vqdmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
+  // CHECK: test_vqdmlal_high_s32
+  return vqdmlal_high_s32(a, b, c);
+  // CHECK: sqdmlal2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int32x4_t test_vqdmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
+  // CHECK: test_vqdmlsl_high_s16
+  return vqdmlsl_high_s16(a, b, c);
+  // CHECK: sqdmlsl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+int64x2_t test_vqdmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
+  // CHECK: test_vqdmlsl_high_s32
+  return vqdmlsl_high_s32(a, b, c);
+  // CHECK: sqdmlsl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) {
+  // CHECK: test_vmull_p8
+  return vmull_p8(a, b);
+  // CHECK: pmull {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+
+poly16x8_t test_vmull_high_p8(poly8x16_t a, poly8x16_t b) {
+  // CHECK: test_vmull_high_p8
+  return vmull_high_p8(a, b);
+  // CHECK: pmull2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+
+int64_t test_vaddd_s64(int64_t a, int64_t b) {
+// CHECK: test_vaddd_s64
+  return vaddd_s64(a, b);
+// CHECK: add {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint64_t test_vaddd_u64(uint64_t a, uint64_t b) {
+// CHECK: test_vaddd_u64
+  return vaddd_u64(a, b);
+// CHECK: add {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+int64_t test_vsubd_s64(int64_t a, int64_t b) {
+// CHECK: test_vsubd_s64
+  return vsubd_s64(a, b);
+// CHECK: sub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint64_t test_vsubd_u64(uint64_t a, uint64_t b) {
+// CHECK: test_vsubd_u64
+  return vsubd_u64(a, b);
+// CHECK: sub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+int8_t test_vqaddb_s8(int8_t a, int8_t b) {
+// CHECK: test_vqaddb_s8
+  return vqaddb_s8(a, b);
+// CHECK: sqadd {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}}
+}
+
+int16_t test_vqaddh_s16(int16_t a, int16_t b) {
+// CHECK: test_vqaddh_s16
+  return vqaddh_s16(a, b);
+// CHECK: sqadd {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
+}
+
+int32_t test_vqadds_s32(int32_t a, int32_t b) {
+// CHECK: test_vqadds_s32
+  return vqadds_s32(a, b);
+// CHECK: sqadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+}
+
+int64_t test_vqaddd_s64(int64_t a, int64_t b) {
+// CHECK: test_vqaddd_s64
+  return vqaddd_s64(a, b);
+// CHECK: sqadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint8_t test_vqaddb_u8(uint8_t a, uint8_t b) {
+// CHECK: test_vqaddb_u8
+  return vqaddb_u8(a, b);
+// CHECK: uqadd {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}}
+}
+
+uint16_t test_vqaddh_u16(uint16_t a, uint16_t b) {
+// CHECK: test_vqaddh_u16
+  return vqaddh_u16(a, b);
+// CHECK: uqadd {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
+}
+
+uint32_t test_vqadds_u32(uint32_t a, uint32_t b) {
+// CHECK: test_vqadds_u32
+  return vqadds_u32(a, b);
+// CHECK: uqadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+}
+
+uint64_t test_vqaddd_u64(uint64_t a, uint64_t b) {
+// CHECK: test_vqaddd_u64
+  return vqaddd_u64(a, b);
+// CHECK: uqadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+int8_t test_vqsubb_s8(int8_t a, int8_t b) {
+// CHECK: test_vqsubb_s8
+  return vqsubb_s8(a, b);
+// CHECK: sqsub {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}}
+}
+
+int16_t test_vqsubh_s16(int16_t a, int16_t b) {
+// CHECK: test_vqsubh_s16
+  return vqsubh_s16(a, b);
+// CHECK: sqsub {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
+}
+
+int32_t test_vqsubs_s32(int32_t a, int32_t b) {
+  // CHECK: test_vqsubs_s32
+  return vqsubs_s32(a, b);
+// CHECK: sqsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+}
+
+int64_t test_vqsubd_s64(int64_t a, int64_t b) {
+// CHECK: test_vqsubd_s64
+  return vqsubd_s64(a, b);
+// CHECK: sqsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint8_t test_vqsubb_u8(uint8_t a, uint8_t b) {
+// CHECK: test_vqsubb_u8
+  return vqsubb_u8(a, b);
+// CHECK: uqsub {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}}
+}
+
+uint16_t test_vqsubh_u16(uint16_t a, uint16_t b) {
+// CHECK: test_vqsubh_u16
+  return vqsubh_u16(a, b);
+// CHECK: uqsub {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
+}
+
+uint32_t test_vqsubs_u32(uint32_t a, uint32_t b) {
+// CHECK: test_vqsubs_u32
+  return vqsubs_u32(a, b);
+// CHECK: uqsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+}
+
+uint64_t test_vqsubd_u64(uint64_t a, uint64_t b) {
+// CHECK: test_vqsubd_u64
+  return vqsubd_u64(a, b);
+// CHECK: uqsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+int64_t test_vshld_s64(int64_t a, int64_t b) {
+// CHECK: test_vshld_s64
+  return vshld_s64(a, b);
+// CHECK: sshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint64_t test_vshld_u64(uint64_t a, uint64_t b) {
+// CHECK: test_vshld_u64
+  return vshld_u64(a, b);
+// CHECK: ushl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+// CHECK: test_vqshlb_s8
+int8_t test_vqshlb_s8(int8_t a, int8_t b) {
+  return vqshlb_s8(a, b);
+// CHECK: sqshl {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}}
+}
+
+// CHECK: test_vqshlh_s16
+int16_t test_vqshlh_s16(int16_t a, int16_t b) {
+  return vqshlh_s16(a, b);
+// CHECK: sqshl {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
+}
+
+// CHECK: test_vqshls_s32
+int32_t test_vqshls_s32(int32_t a, int32_t b) {
+  return vqshls_s32(a, b);
+// CHECK: sqshl {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+}
+
+// CHECK: test_vqshld_s64
+int64_t test_vqshld_s64(int64_t a, int64_t b) {
+  return vqshld_s64(a, b);
+// CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+// CHECK: test_vqshlb_u8
+uint8_t test_vqshlb_u8(uint8_t a, uint8_t b) {
+  return vqshlb_u8(a, b);
+// CHECK: uqshl {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}}
+}
+
+// CHECK: test_vqshlh_u16
+uint16_t test_vqshlh_u16(uint16_t a, uint16_t b) {
+  return vqshlh_u16(a, b);
+// CHECK: uqshl {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
+}
+
+// CHECK: test_vqshls_u32
+uint32_t test_vqshls_u32(uint32_t a, uint32_t b) {
+  return vqshls_u32(a, b);
+// CHECK: uqshl {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+}
+
+// CHECK: test_vqshld_u64
+uint64_t test_vqshld_u64(uint64_t a, uint64_t b) {
+  return vqshld_u64(a, b);
+// CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+// CHECK: test_vrshld_s64
+int64_t test_vrshld_s64(int64_t a, int64_t b) {
+  return vrshld_s64(a, b);
+// CHECK: srshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+
+// CHECK: test_vrshld_u64
+uint64_t test_vrshld_u64(uint64_t a, uint64_t b) {
+  return vrshld_u64(a, b);
+// CHECK: urshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+// CHECK: test_vqrshlb_s8
+int8_t test_vqrshlb_s8(int8_t a, int8_t b) {
+  return vqrshlb_s8(a, b);
+// CHECK: sqrshl {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}}
+}
+
+// CHECK: test_vqrshlh_s16
+int16_t test_vqrshlh_s16(int16_t a, int16_t b) {
+  return vqrshlh_s16(a, b);
+// CHECK: sqrshl {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
+}
+
+// CHECK: test_vqrshls_s32
+int32_t test_vqrshls_s32(int32_t a, int32_t b) {
+  return vqrshls_s32(a, b);
+// CHECK: sqrshl {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+}
+
+// CHECK: test_vqrshld_s64
+int64_t test_vqrshld_s64(int64_t a, int64_t b) {
+  return vqrshld_s64(a, b);
+// CHECK: sqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+// CHECK: test_vqrshlb_u8
+uint8_t test_vqrshlb_u8(uint8_t a, uint8_t b) {
+  return vqrshlb_u8(a, b);
+// CHECK: uqrshl {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}}
+}
+
+// CHECK: test_vqrshlh_u16
+uint16_t test_vqrshlh_u16(uint16_t a, uint16_t b) {
+  return vqrshlh_u16(a, b);
+// CHECK: uqrshl {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
+}
+
+// CHECK: test_vqrshls_u32
+uint32_t test_vqrshls_u32(uint32_t a, uint32_t b) {
+  return vqrshls_u32(a, b);
+// CHECK: uqrshl {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+}
+
+// CHECK: test_vqrshld_u64
+uint64_t test_vqrshld_u64(uint64_t a, uint64_t b) {
+  return vqrshld_u64(a, b);
+// CHECK: uqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+// CHECK: test_vpaddd_s64
+int64_t test_vpaddd_s64(int64x2_t a) {
+  return vpaddd_s64(a);
+// CHECK: addp {{d[0-9]+}}, {{v[0-9]+}}.2d
+}
+
+// CHECK: test_vpadds_f32
+float32_t test_vpadds_f32(float32x2_t a) {
+  return vpadds_f32(a);
+// CHECK: faddp {{s[0-9]+}}, {{v[0-9]+}}.2s
+}
+
+// CHECK: test_vpaddd_f64
+float64_t test_vpaddd_f64(float64x2_t a) {
+  return vpaddd_f64(a);
+// CHECK: faddp {{d[0-9]+}}, {{v[0-9]+}}.2d
+}
+
+// CHECK: test_vpmaxnms_f32
+float32_t test_vpmaxnms_f32(float32x2_t a) {
+  return vpmaxnms_f32(a);
+// CHECK: fmaxnmp {{s[0-9]+}}, {{v[0-9]+}}.2s
+}
+
+// CHECK: test_vpmaxnmqd_f64
+float64_t test_vpmaxnmqd_f64(float64x2_t a) {
+  return vpmaxnmqd_f64(a);
+// CHECK: fmaxnmp {{d[0-9]+}}, {{v[0-9]+}}.2d
+}
+
+// CHECK: test_vpmaxs_f32
+float32_t test_vpmaxs_f32(float32x2_t a) {
+  return vpmaxs_f32(a);
+// CHECK: fmaxp {{s[0-9]+}}, {{v[0-9]+}}.2s
+}
+
+// CHECK: test_vpmaxqd_f64
+float64_t test_vpmaxqd_f64(float64x2_t a) {
+  return vpmaxqd_f64(a);
+// CHECK: fmaxp {{d[0-9]+}}, {{v[0-9]+}}.2d
+}
+
+// CHECK: test_vpminnms_f32
+float32_t test_vpminnms_f32(float32x2_t a) {
+  return vpminnms_f32(a);
+// CHECK: fminnmp {{s[0-9]+}}, {{v[0-9]+}}.2s
+}
+
+// CHECK: test_vpminnmqd_f64
+float64_t test_vpminnmqd_f64(float64x2_t a) {
+  return vpminnmqd_f64(a);
+// CHECK: fminnmp {{d[0-9]+}}, {{v[0-9]+}}.2d
+}
+
+// CHECK: test_vpmins_f32
+float32_t test_vpmins_f32(float32x2_t a) {
+  return vpmins_f32(a);
+// CHECK: fminp {{s[0-9]+}}, {{v[0-9]+}}.2s
+}
+
+// CHECK: test_vpminqd_f64
+float64_t test_vpminqd_f64(float64x2_t a) {
+  return vpminqd_f64(a);
+// CHECK: fminp {{d[0-9]+}}, {{v[0-9]+}}.2d
+}
+
+int16_t test_vqdmulhh_s16(int16_t a, int16_t b) {
+// CHECK: test_vqdmulhh_s16
+  return vqdmulhh_s16(a, b);
+// CHECK: sqdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
+}
+
+int32_t test_vqdmulhs_s32(int32_t a, int32_t b) {
+// CHECK: test_vqdmulhs_s32
+  return vqdmulhs_s32(a, b);
+// CHECK: sqdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+}
+
+int16_t test_vqrdmulhh_s16(int16_t a, int16_t b) {
+// CHECK: test_vqrdmulhh_s16
+  return vqrdmulhh_s16(a, b);
+// CHECK: sqrdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
+}
+
+int32_t test_vqrdmulhs_s32(int32_t a, int32_t b) {
+// CHECK: test_vqrdmulhs_s32
+  return vqrdmulhs_s32(a, b);
+// CHECK: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+}
+
+float32_t test_vmulxs_f32(float32_t a, float32_t b) {
+// CHECK: test_vmulxs_f32
+  return vmulxs_f32(a, b);
+// CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+}
+
+float64_t test_vmulxd_f64(float64_t a, float64_t b) {
+// CHECK: test_vmulxd_f64
+  return vmulxd_f64(a, b);
+// CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+float32_t test_vrecpss_f32(float32_t a, float32_t b) {
+// CHECK: test_vrecpss_f32
+  return vrecpss_f32(a, b);
+// CHECK: frecps {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+}
+
+float64_t test_vrecpsd_f64(float64_t a, float64_t b) {
+// CHECK: test_vrecpsd_f64
+  return vrecpsd_f64(a, b);
+// CHECK: frecps {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+float32_t test_vrsqrtss_f32(float32_t a, float32_t b) {
+// CHECK: test_vrsqrtss_f32
+  return vrsqrtss_f32(a, b);
+// CHECK: frsqrts {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+}
+
+float64_t test_vrsqrtsd_f64(float64_t a, float64_t b) {
+// CHECK: test_vrsqrtsd_f64
+  return vrsqrtsd_f64(a, b);
+// CHECK: frsqrts {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+float32_t test_vcvts_f32_s32(int32_t a) {
+// CHECK: test_vcvts_f32_s32
+// CHECK: scvtf {{s[0-9]+}}, {{s[0-9]+}}
+  return vcvts_f32_s32(a);
+}
+
+float64_t test_vcvtd_f64_s64(int64_t a) {
+// CHECK: test_vcvtd_f64_s64
+// CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}}
+  return vcvtd_f64_s64(a);
+}
+
+float32_t test_vcvts_f32_u32(uint32_t a) {
+// CHECK: test_vcvts_f32_u32
+// CHECK: ucvtf {{s[0-9]+}}, {{s[0-9]+}}
+  return vcvts_f32_u32(a);
+}
+
+float64_t test_vcvtd_f64_u64(uint64_t a) {
+// CHECK: test_vcvtd_f64_u64
+// CHECK: ucvtf {{d[0-9]+}}, {{d[0-9]+}}
+  return vcvtd_f64_u64(a);
+}
+
+float32_t test_vrecpes_f32(float32_t a) {
+// CHECK: test_vrecpes_f32
+// CHECK: frecpe {{s[0-9]+}}, {{s[0-9]+}}
+  return vrecpes_f32(a);
+}
+ 
+float64_t test_vrecped_f64(float64_t a) {
+// CHECK: test_vrecped_f64
+// CHECK: frecpe {{d[0-9]+}}, {{d[0-9]+}}
+  return vrecped_f64(a);
+}
+ 
+float32_t test_vrecpxs_f32(float32_t a) {
+// CHECK: test_vrecpxs_f32
+// CHECK: frecpx {{s[0-9]+}}, {{s[0-9]+}}
+  return vrecpxs_f32(a);
+ }
+ 
+float64_t test_vrecpxd_f64(float64_t a) {
+// CHECK: test_vrecpxd_f64
+// CHECK: frecpx {{d[0-9]+}}, {{d[0-9]+}}
+  return vrecpxd_f64(a);
+}
+
+float32_t test_vrsqrtes_f32(float32_t a) {
+// CHECK: vrsqrtes_f32
+// CHECK: frsqrte {{s[0-9]+}}, {{s[0-9]+}}
+  return vrsqrtes_f32(a);
+}
+
+float64_t test_vrsqrted_f64(float64_t a) {
+// CHECK: vrsqrted_f64
+// CHECK: frsqrte {{d[0-9]+}}, {{d[0-9]+}}
+  return vrsqrted_f64(a);
+}
+
+uint8x16_t test_vld1q_u8(uint8_t const *a) {
+  // CHECK: test_vld1q_u8
+  return vld1q_u8(a);
+  // CHECK: ld1 {v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+uint16x8_t test_vld1q_u16(uint16_t const *a) {
+  // CHECK: test_vld1q_u16
+  return vld1q_u16(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+uint32x4_t test_vld1q_u32(uint32_t const *a) {
+  // CHECK: test_vld1q_u32
+  return vld1q_u32(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+uint64x2_t test_vld1q_u64(uint64_t const *a) {
+  // CHECK: test_vld1q_u64
+  return vld1q_u64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+int8x16_t test_vld1q_s8(int8_t const *a) {
+  // CHECK: test_vld1q_s8
+  return vld1q_s8(a);
+  // CHECK: ld1 {v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+int16x8_t test_vld1q_s16(int16_t const *a) {
+  // CHECK: test_vld1q_s16
+  return vld1q_s16(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+int32x4_t test_vld1q_s32(int32_t const *a) {
+  // CHECK: test_vld1q_s32
+  return vld1q_s32(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+int64x2_t test_vld1q_s64(int64_t const *a) {
+  // CHECK: test_vld1q_s64
+  return vld1q_s64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+float16x8_t test_vld1q_f16(float16_t const *a) {
+  // CHECK: test_vld1q_f16
+  return vld1q_f16(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+float32x4_t test_vld1q_f32(float32_t const *a) {
+  // CHECK: test_vld1q_f32
+  return vld1q_f32(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+float64x2_t test_vld1q_f64(float64_t const *a) {
+  // CHECK: test_vld1q_f64
+  return vld1q_f64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+poly8x16_t test_vld1q_p8(poly8_t const *a) {
+  // CHECK: test_vld1q_p8
+  return vld1q_p8(a);
+  // CHECK: ld1 {v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+poly16x8_t test_vld1q_p16(poly16_t const *a) {
+  // CHECK: test_vld1q_p16
+  return vld1q_p16(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+uint8x8_t test_vld1_u8(uint8_t const *a) {
+  // CHECK: test_vld1_u8
+  return vld1_u8(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+uint16x4_t test_vld1_u16(uint16_t const *a) {
+  // CHECK: test_vld1_u16
+  return vld1_u16(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+uint32x2_t test_vld1_u32(uint32_t const *a) {
+  // CHECK: test_vld1_u32
+  return vld1_u32(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+uint64x1_t test_vld1_u64(uint64_t const *a) {
+  // CHECK: test_vld1_u64
+  return vld1_u64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+int8x8_t test_vld1_s8(int8_t const *a) {
+  // CHECK: test_vld1_s8
+  return vld1_s8(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+int16x4_t test_vld1_s16(int16_t const *a) {
+  // CHECK: test_vld1_s16
+  return vld1_s16(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+int32x2_t test_vld1_s32(int32_t const *a) {
+  // CHECK: test_vld1_s32
+  return vld1_s32(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+int64x1_t test_vld1_s64(int64_t const *a) {
+  // CHECK: test_vld1_s64
+  return vld1_s64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+float16x4_t test_vld1_f16(float16_t const *a) {
+  // CHECK: test_vld1_f16
+  return vld1_f16(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+float32x2_t test_vld1_f32(float32_t const *a) {
+  // CHECK: test_vld1_f32
+  return vld1_f32(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+float64x1_t test_vld1_f64(float64_t const *a) {
+  // CHECK: test_vld1_f64
+  return vld1_f64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+poly8x8_t test_vld1_p8(poly8_t const *a) {
+  // CHECK: test_vld1_p8
+  return vld1_p8(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+poly16x4_t test_vld1_p16(poly16_t const *a) {
+  // CHECK: test_vld1_p16
+  return vld1_p16(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+uint8x16x2_t test_vld2q_u8(uint8_t const *a) {
+  // CHECK: test_vld2q_u8
+  return vld2q_u8(a);
+  // CHECK: ld2 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+uint16x8x2_t test_vld2q_u16(uint16_t const *a) {
+  // CHECK: test_vld2q_u16
+  return vld2q_u16(a);
+  // CHECK: ld2 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+uint32x4x2_t test_vld2q_u32(uint32_t const *a) {
+  // CHECK: test_vld2q_u32
+  return vld2q_u32(a);
+  // CHECK: ld2 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+uint64x2x2_t test_vld2q_u64(uint64_t const *a) {
+  // CHECK: test_vld2q_u64
+  return vld2q_u64(a);
+  // CHECK: ld2 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+int8x16x2_t test_vld2q_s8(int8_t const *a) {
+  // CHECK: test_vld2q_s8
+  return vld2q_s8(a);
+  // CHECK: ld2 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+int16x8x2_t test_vld2q_s16(int16_t const *a) {
+  // CHECK: test_vld2q_s16
+  return vld2q_s16(a);
+  // CHECK: ld2 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+int32x4x2_t test_vld2q_s32(int32_t const *a) {
+  // CHECK: test_vld2q_s32
+  return vld2q_s32(a);
+  // CHECK: ld2 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+int64x2x2_t test_vld2q_s64(int64_t const *a) {
+  // CHECK: test_vld2q_s64
+  return vld2q_s64(a);
+  // CHECK: ld2 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+float16x8x2_t test_vld2q_f16(float16_t const *a) {
+  // CHECK: test_vld2q_f16
+  return vld2q_f16(a);
+  // CHECK: ld2 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+float32x4x2_t test_vld2q_f32(float32_t const *a) {
+  // CHECK: test_vld2q_f32
+  return vld2q_f32(a);
+  // CHECK: ld2 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+float64x2x2_t test_vld2q_f64(float64_t const *a) {
+  // CHECK: test_vld2q_f64
+  return vld2q_f64(a);
+  // CHECK: ld2 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+poly8x16x2_t test_vld2q_p8(poly8_t const *a) {
+  // CHECK: test_vld2q_p8
+  return vld2q_p8(a);
+  // CHECK: ld2 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+poly16x8x2_t test_vld2q_p16(poly16_t const *a) {
+  // CHECK: test_vld2q_p16
+  return vld2q_p16(a);
+  // CHECK: ld2 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+uint8x8x2_t test_vld2_u8(uint8_t const *a) {
+  // CHECK: test_vld2_u8
+  return vld2_u8(a);
+  // CHECK: ld2 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+uint16x4x2_t test_vld2_u16(uint16_t const *a) {
+  // CHECK: test_vld2_u16
+  return vld2_u16(a);
+  // CHECK: ld2 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+uint32x2x2_t test_vld2_u32(uint32_t const *a) {
+  // CHECK: test_vld2_u32
+  return vld2_u32(a);
+  // CHECK: ld2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+uint64x1x2_t test_vld2_u64(uint64_t const *a) {
+  // CHECK: test_vld2_u64
+  return vld2_u64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+int8x8x2_t test_vld2_s8(int8_t const *a) {
+  // CHECK: test_vld2_s8
+  return vld2_s8(a);
+  // CHECK: ld2 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+int16x4x2_t test_vld2_s16(int16_t const *a) {
+  // CHECK: test_vld2_s16
+  return vld2_s16(a);
+  // CHECK: ld2 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+int32x2x2_t test_vld2_s32(int32_t const *a) {
+  // CHECK: test_vld2_s32
+  return vld2_s32(a);
+  // CHECK: ld2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+int64x1x2_t test_vld2_s64(int64_t const *a) {
+  // CHECK: test_vld2_s64
+  return vld2_s64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+float16x4x2_t test_vld2_f16(float16_t const *a) {
+  // CHECK: test_vld2_f16
+  return vld2_f16(a);
+  // CHECK: ld2 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+float32x2x2_t test_vld2_f32(float32_t const *a) {
+  // CHECK: test_vld2_f32
+  return vld2_f32(a);
+  // CHECK: ld2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+float64x1x2_t test_vld2_f64(float64_t const *a) {
+  // CHECK: test_vld2_f64
+  return vld2_f64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+poly8x8x2_t test_vld2_p8(poly8_t const *a) {
+  // CHECK: test_vld2_p8
+  return vld2_p8(a);
+  // CHECK: ld2 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+poly16x4x2_t test_vld2_p16(poly16_t const *a) {
+  // CHECK: test_vld2_p16
+  return vld2_p16(a);
+  // CHECK: ld2 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+uint8x16x3_t test_vld3q_u8(uint8_t const *a) {
+  // CHECK: test_vld3q_u8
+  return vld3q_u8(a);
+  // CHECK: ld3 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint16x8x3_t test_vld3q_u16(uint16_t const *a) {
+  // CHECK: test_vld3q_u16
+  return vld3q_u16(a);
+  // CHECK: ld3 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint32x4x3_t test_vld3q_u32(uint32_t const *a) {
+  // CHECK: test_vld3q_u32
+  return vld3q_u32(a);
+  // CHECK: ld3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint64x2x3_t test_vld3q_u64(uint64_t const *a) {
+  // CHECK: test_vld3q_u64
+  return vld3q_u64(a);
+  // CHECK: ld3 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+  // [{{x[0-9]+|sp}}]
+}
+
+int8x16x3_t test_vld3q_s8(int8_t const *a) {
+  // CHECK: test_vld3q_s8
+  return vld3q_s8(a);
+  // CHECK: ld3 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b},
+  // [{{x[0-9]+|sp}}]
+}
+
+int16x8x3_t test_vld3q_s16(int16_t const *a) {
+  // CHECK: test_vld3q_s16
+  return vld3q_s16(a);
+  // CHECK: ld3 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
+  // [{{x[0-9]+|sp}}]
+}
+
+int32x4x3_t test_vld3q_s32(int32_t const *a) {
+  // CHECK: test_vld3q_s32
+  return vld3q_s32(a);
+  // CHECK: ld3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
+  // [{{x[0-9]+|sp}}]
+}
+
+int64x2x3_t test_vld3q_s64(int64_t const *a) {
+  // CHECK: test_vld3q_s64
+  return vld3q_s64(a);
+  // CHECK: ld3 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+  // [{{x[0-9]+|sp}}]
+}
+
+float16x8x3_t test_vld3q_f16(float16_t const *a) {
+  // CHECK: test_vld3q_f16
+  return vld3q_f16(a);
+  // CHECK: ld3 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
+  // [{{x[0-9]+|sp}}]
+}
+
+float32x4x3_t test_vld3q_f32(float32_t const *a) {
+  // CHECK: test_vld3q_f32
+  return vld3q_f32(a);
+  // CHECK: ld3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
+  // [{{x[0-9]+|sp}}]
+}
+
+float64x2x3_t test_vld3q_f64(float64_t const *a) {
+  // CHECK: test_vld3q_f64
+  return vld3q_f64(a);
+  // CHECK: ld3 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+  // [{{x[0-9]+|sp}}]
+}
+
+poly8x16x3_t test_vld3q_p8(poly8_t const *a) {
+  // CHECK: test_vld3q_p8
+  return vld3q_p8(a);
+  // CHECK: ld3 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b},
+  // [{{x[0-9]+|sp}}]
+}
+
+poly16x8x3_t test_vld3q_p16(poly16_t const *a) {
+  // CHECK: test_vld3q_p16
+  return vld3q_p16(a);
+  // CHECK: ld3 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint8x8x3_t test_vld3_u8(uint8_t const *a) {
+  // CHECK: test_vld3_u8
+  return vld3_u8(a);
+  // CHECK: ld3 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint16x4x3_t test_vld3_u16(uint16_t const *a) {
+  // CHECK: test_vld3_u16
+  return vld3_u16(a);
+  // CHECK: ld3 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint32x2x3_t test_vld3_u32(uint32_t const *a) {
+  // CHECK: test_vld3_u32
+  return vld3_u32(a);
+  // CHECK: ld3 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint64x1x3_t test_vld3_u64(uint64_t const *a) {
+  // CHECK: test_vld3_u64
+  return vld3_u64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+  // [{{x[0-9]+|sp}}]
+}
+
+int8x8x3_t test_vld3_s8(int8_t const *a) {
+  // CHECK: test_vld3_s8
+  return vld3_s8(a);
+  // CHECK: ld3 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b},
+  // [{{x[0-9]+|sp}}]
+}
+
+int16x4x3_t test_vld3_s16(int16_t const *a) {
+  // CHECK: test_vld3_s16
+  return vld3_s16(a);
+  // CHECK: ld3 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
+  // [{{x[0-9]+|sp}}]
+}
+
+int32x2x3_t test_vld3_s32(int32_t const *a) {
+  // CHECK: test_vld3_s32
+  return vld3_s32(a);
+  // CHECK: ld3 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
+  // [{{x[0-9]+|sp}}]
+}
+
+int64x1x3_t test_vld3_s64(int64_t const *a) {
+  // CHECK: test_vld3_s64
+  return vld3_s64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+  // [{{x[0-9]+|sp}}]
+}
+
+float16x4x3_t test_vld3_f16(float16_t const *a) {
+  // CHECK: test_vld3_f16
+  return vld3_f16(a);
+  // CHECK: ld3 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
+  // [{{x[0-9]+|sp}}]
+}
+
+float32x2x3_t test_vld3_f32(float32_t const *a) {
+  // CHECK: test_vld3_f32
+  return vld3_f32(a);
+  // CHECK: ld3 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
+  // [{{x[0-9]+|sp}}]
+}
+
+float64x1x3_t test_vld3_f64(float64_t const *a) {
+  // CHECK: test_vld3_f64
+  return vld3_f64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+  // [{{x[0-9]+|sp}}]
+}
+
+poly8x8x3_t test_vld3_p8(poly8_t const *a) {
+  // CHECK: test_vld3_p8
+  return vld3_p8(a);
+  // CHECK: ld3 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b},
+  // [{{x[0-9]+|sp}}]
+}
+
+poly16x4x3_t test_vld3_p16(poly16_t const *a) {
+  // CHECK: test_vld3_p16
+  return vld3_p16(a);
+  // CHECK: ld3 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint8x16x4_t test_vld4q_u8(uint8_t const *a) {
+  // CHECK: test_vld4q_u8
+  return vld4q_u8(a);
+  // CHECK: ld4 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
+  // v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+uint16x8x4_t test_vld4q_u16(uint16_t const *a) {
+  // CHECK: test_vld4q_u16
+  return vld4q_u16(a);
+  // CHECK: ld4 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
+  // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+uint32x4x4_t test_vld4q_u32(uint32_t const *a) {
+  // CHECK: test_vld4q_u32
+  return vld4q_u32(a);
+  // CHECK: ld4 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
+  // v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+uint64x2x4_t test_vld4q_u64(uint64_t const *a) {
+  // CHECK: test_vld4q_u64
+  return vld4q_u64(a);
+  // CHECK: ld4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+  // v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+int8x16x4_t test_vld4q_s8(int8_t const *a) {
+  // CHECK: test_vld4q_s8
+  return vld4q_s8(a);
+  // CHECK: ld4 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
+  // v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+int16x8x4_t test_vld4q_s16(int16_t const *a) {
+  // CHECK: test_vld4q_s16
+  return vld4q_s16(a);
+  // CHECK: ld4 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
+  // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+int32x4x4_t test_vld4q_s32(int32_t const *a) {
+  // CHECK: test_vld4q_s32
+  return vld4q_s32(a);
+  // CHECK: ld4 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
+  // v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+int64x2x4_t test_vld4q_s64(int64_t const *a) {
+  // CHECK: test_vld4q_s64
+  return vld4q_s64(a);
+  // CHECK: ld4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+  // v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+float16x8x4_t test_vld4q_f16(float16_t const *a) {
+  // CHECK: test_vld4q_f16
+  return vld4q_f16(a);
+  // CHECK: ld4 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
+  // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+float32x4x4_t test_vld4q_f32(float32_t const *a) {
+  // CHECK: test_vld4q_f32
+  return vld4q_f32(a);
+  // CHECK: ld4 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
+  // v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+float64x2x4_t test_vld4q_f64(float64_t const *a) {
+  // CHECK: test_vld4q_f64
+  return vld4q_f64(a);
+  // CHECK: ld4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+  // v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+poly8x16x4_t test_vld4q_p8(poly8_t const *a) {
+  // CHECK: test_vld4q_p8
+  return vld4q_p8(a);
+  // CHECK: ld4 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
+  // v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+poly16x8x4_t test_vld4q_p16(poly16_t const *a) {
+  // CHECK: test_vld4q_p16
+  return vld4q_p16(a);
+  // CHECK: ld4 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
+  // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+uint8x8x4_t test_vld4_u8(uint8_t const *a) {
+  // CHECK: test_vld4_u8
+  return vld4_u8(a);
+  // CHECK: ld4 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
+  // v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+uint16x4x4_t test_vld4_u16(uint16_t const *a) {
+  // CHECK: test_vld4_u16
+  return vld4_u16(a);
+  // CHECK: ld4 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
+  // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+uint32x2x4_t test_vld4_u32(uint32_t const *a) {
+  // CHECK: test_vld4_u32
+  return vld4_u32(a);
+  // CHECK: ld4 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
+  // v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+uint64x1x4_t test_vld4_u64(uint64_t const *a) {
+  // CHECK: test_vld4_u64
+  return vld4_u64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+  // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+int8x8x4_t test_vld4_s8(int8_t const *a) {
+  // CHECK: test_vld4_s8
+  return vld4_s8(a);
+  // CHECK: ld4 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
+  // v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+int16x4x4_t test_vld4_s16(int16_t const *a) {
+  // CHECK: test_vld4_s16
+  return vld4_s16(a);
+  // CHECK: ld4 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
+  // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+int32x2x4_t test_vld4_s32(int32_t const *a) {
+  // CHECK: test_vld4_s32
+  return vld4_s32(a);
+  // CHECK: ld4 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
+  // v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+int64x1x4_t test_vld4_s64(int64_t const *a) {
+  // CHECK: test_vld4_s64
+  return vld4_s64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+  // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+float16x4x4_t test_vld4_f16(float16_t const *a) {
+  // CHECK: test_vld4_f16
+  return vld4_f16(a);
+  // CHECK: ld4 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
+  // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+float32x2x4_t test_vld4_f32(float32_t const *a) {
+  // CHECK: test_vld4_f32
+  return vld4_f32(a);
+  // CHECK: ld4 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
+  // v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+float64x1x4_t test_vld4_f64(float64_t const *a) {
+  // CHECK: test_vld4_f64
+  return vld4_f64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+  // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+poly8x8x4_t test_vld4_p8(poly8_t const *a) {
+  // CHECK: test_vld4_p8
+  return vld4_p8(a);
+  // CHECK: ld4 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
+  // v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+poly16x4x4_t test_vld4_p16(poly16_t const *a) {
+  // CHECK: test_vld4_p16
+  return vld4_p16(a);
+  // CHECK: ld4 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
+  // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_u8(uint8_t *a, uint8x16_t b) {
+  // CHECK: test_vst1q_u8
+  vst1q_u8(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_u16(uint16_t *a, uint16x8_t b) {
+  // CHECK: test_vst1q_u16
+  vst1q_u16(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_u32(uint32_t *a, uint32x4_t b) {
+  // CHECK: test_vst1q_u32
+  vst1q_u32(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_u64(uint64_t *a, uint64x2_t b) {
+  // CHECK: test_vst1q_u64
+  vst1q_u64(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_s8(int8_t *a, int8x16_t b) {
+  // CHECK: test_vst1q_s8
+  vst1q_s8(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_s16(int16_t *a, int16x8_t b) {
+  // CHECK: test_vst1q_s16
+  vst1q_s16(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_s32(int32_t *a, int32x4_t b) {
+  // CHECK: test_vst1q_s32
+  vst1q_s32(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_s64(int64_t *a, int64x2_t b) {
+  // CHECK: test_vst1q_s64
+  vst1q_s64(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_f16(float16_t *a, float16x8_t b) {
+  // CHECK: test_vst1q_f16
+  vst1q_f16(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_f32(float32_t *a, float32x4_t b) {
+  // CHECK: test_vst1q_f32
+  vst1q_f32(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_f64(float64_t *a, float64x2_t b) {
+  // CHECK: test_vst1q_f64
+  vst1q_f64(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_p8(poly8_t *a, poly8x16_t b) {
+  // CHECK: test_vst1q_p8
+  vst1q_p8(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_p16(poly16_t *a, poly16x8_t b) {
+  // CHECK: test_vst1q_p16
+  vst1q_p16(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_u8(uint8_t *a, uint8x8_t b) {
+  // CHECK: test_vst1_u8
+  vst1_u8(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_u16(uint16_t *a, uint16x4_t b) {
+  // CHECK: test_vst1_u16
+  vst1_u16(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_u32(uint32_t *a, uint32x2_t b) {
+  // CHECK: test_vst1_u32
+  vst1_u32(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_u64(uint64_t *a, uint64x1_t b) {
+  // CHECK: test_vst1_u64
+  vst1_u64(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_s8(int8_t *a, int8x8_t b) {
+  // CHECK: test_vst1_s8
+  vst1_s8(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_s16(int16_t *a, int16x4_t b) {
+  // CHECK: test_vst1_s16
+  vst1_s16(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_s32(int32_t *a, int32x2_t b) {
+  // CHECK: test_vst1_s32
+  vst1_s32(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_s64(int64_t *a, int64x1_t b) {
+  // CHECK: test_vst1_s64
+  vst1_s64(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_f16(float16_t *a, float16x4_t b) {
+  // CHECK: test_vst1_f16
+  vst1_f16(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_f32(float32_t *a, float32x2_t b) {
+  // CHECK: test_vst1_f32
+  vst1_f32(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_f64(float64_t *a, float64x1_t b) {
+  // CHECK: test_vst1_f64
+  vst1_f64(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_p8(poly8_t *a, poly8x8_t b) {
+  // CHECK: test_vst1_p8
+  vst1_p8(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_p16(poly16_t *a, poly16x4_t b) {
+  // CHECK: test_vst1_p16
+  vst1_p16(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_u8(uint8_t *a, uint8x16x2_t b) {
+  // CHECK: test_vst2q_u8
+  vst2q_u8(a, b);
+  // CHECK: st2 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_u16(uint16_t *a, uint16x8x2_t b) {
+  // CHECK: test_vst2q_u16
+  vst2q_u16(a, b);
+  // CHECK: st2 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_u32(uint32_t *a, uint32x4x2_t b) {
+  // CHECK: test_vst2q_u32
+  vst2q_u32(a, b);
+  // CHECK: st2 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_u64(uint64_t *a, uint64x2x2_t b) {
+  // CHECK: test_vst2q_u64
+  vst2q_u64(a, b);
+  // CHECK: st2 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_s8(int8_t *a, int8x16x2_t b) {
+  // CHECK: test_vst2q_s8
+  vst2q_s8(a, b);
+  // CHECK: st2 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_s16(int16_t *a, int16x8x2_t b) {
+  // CHECK: test_vst2q_s16
+  vst2q_s16(a, b);
+  // CHECK: st2 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_s32(int32_t *a, int32x4x2_t b) {
+  // CHECK: test_vst2q_s32
+  vst2q_s32(a, b);
+  // CHECK: st2 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_s64(int64_t *a, int64x2x2_t b) {
+  // CHECK: test_vst2q_s64
+  vst2q_s64(a, b);
+  // CHECK: st2 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_f16(float16_t *a, float16x8x2_t b) {
+  // CHECK: test_vst2q_f16
+  vst2q_f16(a, b);
+  // CHECK: st2 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_f32(float32_t *a, float32x4x2_t b) {
+  // CHECK: test_vst2q_f32
+  vst2q_f32(a, b);
+  // CHECK: st2 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_f64(float64_t *a, float64x2x2_t b) {
+  // CHECK: test_vst2q_f64
+  vst2q_f64(a, b);
+  // CHECK: st2 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_p8(poly8_t *a, poly8x16x2_t b) {
+  // CHECK: test_vst2q_p8
+  vst2q_p8(a, b);
+  // CHECK: st2 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_p16(poly16_t *a, poly16x8x2_t b) {
+  // CHECK: test_vst2q_p16
+  vst2q_p16(a, b);
+  // CHECK: st2 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_u8(uint8_t *a, uint8x8x2_t b) {
+  // CHECK: test_vst2_u8
+  vst2_u8(a, b);
+  // CHECK: st2 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_u16(uint16_t *a, uint16x4x2_t b) {
+  // CHECK: test_vst2_u16
+  vst2_u16(a, b);
+  // CHECK: st2 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_u32(uint32_t *a, uint32x2x2_t b) {
+  // CHECK: test_vst2_u32
+  vst2_u32(a, b);
+  // CHECK: st2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_u64(uint64_t *a, uint64x1x2_t b) {
+  // CHECK: test_vst2_u64
+  vst2_u64(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_s8(int8_t *a, int8x8x2_t b) {
+  // CHECK: test_vst2_s8
+  vst2_s8(a, b);
+  // CHECK: st2 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_s16(int16_t *a, int16x4x2_t b) {
+  // CHECK: test_vst2_s16
+  vst2_s16(a, b);
+  // CHECK: st2 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_s32(int32_t *a, int32x2x2_t b) {
+  // CHECK: test_vst2_s32
+  vst2_s32(a, b);
+  // CHECK: st2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_s64(int64_t *a, int64x1x2_t b) {
+  // CHECK: test_vst2_s64
+  vst2_s64(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_f16(float16_t *a, float16x4x2_t b) {
+  // CHECK: test_vst2_f16
+  vst2_f16(a, b);
+  // CHECK: st2 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_f32(float32_t *a, float32x2x2_t b) {
+  // CHECK: test_vst2_f32
+  vst2_f32(a, b);
+  // CHECK: st2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_f64(float64_t *a, float64x1x2_t b) {
+  // CHECK: test_vst2_f64
+  vst2_f64(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_p8(poly8_t *a, poly8x8x2_t b) {
+  // CHECK: test_vst2_p8
+  vst2_p8(a, b);
+  // CHECK: st2 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_p16(poly16_t *a, poly16x4x2_t b) {
+  // CHECK: test_vst2_p16
+  vst2_p16(a, b);
+  // CHECK: st2 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_u8(uint8_t *a, uint8x16x3_t b) {
+  // CHECK: test_vst3q_u8
+  vst3q_u8(a, b);
+  // CHECK: st3 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_u16(uint16_t *a, uint16x8x3_t b) {
+  // CHECK: test_vst3q_u16
+  vst3q_u16(a, b);
+  // CHECK: st3 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_u32(uint32_t *a, uint32x4x3_t b) {
+  // CHECK: test_vst3q_u32
+  vst3q_u32(a, b);
+  // CHECK: st3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_u64(uint64_t *a, uint64x2x3_t b) {
+  // CHECK: test_vst3q_u64
+  vst3q_u64(a, b);
+  // CHECK: st3 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_s8(int8_t *a, int8x16x3_t b) {
+  // CHECK: test_vst3q_s8
+  vst3q_s8(a, b);
+  // CHECK: st3 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_s16(int16_t *a, int16x8x3_t b) {
+  // CHECK: test_vst3q_s16
+  vst3q_s16(a, b);
+  // CHECK: st3 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_s32(int32_t *a, int32x4x3_t b) {
+  // CHECK: test_vst3q_s32
+  vst3q_s32(a, b);
+  // CHECK: st3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_s64(int64_t *a, int64x2x3_t b) {
+  // CHECK: test_vst3q_s64
+  vst3q_s64(a, b);
+  // CHECK: st3 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_f16(float16_t *a, float16x8x3_t b) {
+  // CHECK: test_vst3q_f16
+  vst3q_f16(a, b);
+  // CHECK: st3 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_f32(float32_t *a, float32x4x3_t b) {
+  // CHECK: test_vst3q_f32
+  vst3q_f32(a, b);
+  // CHECK: st3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_f64(float64_t *a, float64x2x3_t b) {
+  // CHECK: test_vst3q_f64
+  vst3q_f64(a, b);
+  // CHECK: st3 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_p8(poly8_t *a, poly8x16x3_t b) {
+  // CHECK: test_vst3q_p8
+  vst3q_p8(a, b);
+  // CHECK: st3 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_p16(poly16_t *a, poly16x8x3_t b) {
+  // CHECK: test_vst3q_p16
+  vst3q_p16(a, b);
+  // CHECK: st3 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_u8(uint8_t *a, uint8x8x3_t b) {
+  // CHECK: test_vst3_u8
+  vst3_u8(a, b);
+  // CHECK: st3 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_u16(uint16_t *a, uint16x4x3_t b) {
+  // CHECK: test_vst3_u16
+  vst3_u16(a, b);
+  // CHECK: st3 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_u32(uint32_t *a, uint32x2x3_t b) {
+  // CHECK: test_vst3_u32
+  vst3_u32(a, b);
+  // CHECK: st3 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_u64(uint64_t *a, uint64x1x3_t b) {
+  // CHECK: test_vst3_u64
+  vst3_u64(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_s8(int8_t *a, int8x8x3_t b) {
+  // CHECK: test_vst3_s8
+  vst3_s8(a, b);
+  // CHECK: st3 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_s16(int16_t *a, int16x4x3_t b) {
+  // CHECK: test_vst3_s16
+  vst3_s16(a, b);
+  // CHECK: st3 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_s32(int32_t *a, int32x2x3_t b) {
+  // CHECK: test_vst3_s32
+  vst3_s32(a, b);
+  // CHECK: st3 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_s64(int64_t *a, int64x1x3_t b) {
+  // CHECK: test_vst3_s64
+  vst3_s64(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_f16(float16_t *a, float16x4x3_t b) {
+  // CHECK: test_vst3_f16
+  vst3_f16(a, b);
+  // CHECK: st3 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_f32(float32_t *a, float32x2x3_t b) {
+  // CHECK: test_vst3_f32
+  vst3_f32(a, b);
+  // CHECK: st3 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_f64(float64_t *a, float64x1x3_t b) {
+  // CHECK: test_vst3_f64
+  vst3_f64(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_p8(poly8_t *a, poly8x8x3_t b) {
+  // CHECK: test_vst3_p8
+  vst3_p8(a, b);
+  // CHECK: st3 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_p16(poly16_t *a, poly16x4x3_t b) {
+  // CHECK: test_vst3_p16
+  vst3_p16(a, b);
+  // CHECK: st3 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_u8(uint8_t *a, uint8x16x4_t b) {
+  // CHECK: test_vst4q_u8
+  vst4q_u8(a, b);
+  // CHECK: st4 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
+  // v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_u16(uint16_t *a, uint16x8x4_t b) {
+  // CHECK: test_vst4q_u16
+  vst4q_u16(a, b);
+  // CHECK: st4 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
+  // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_u32(uint32_t *a, uint32x4x4_t b) {
+  // CHECK: test_vst4q_u32
+  vst4q_u32(a, b);
+  // CHECK: st4 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
+  // v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_u64(uint64_t *a, uint64x2x4_t b) {
+  // CHECK: test_vst4q_u64
+  vst4q_u64(a, b);
+  // CHECK: st4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+  // v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_s8(int8_t *a, int8x16x4_t b) {
+  // CHECK: test_vst4q_s8
+  vst4q_s8(a, b);
+  // CHECK: st4 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
+  // v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_s16(int16_t *a, int16x8x4_t b) {
+  // CHECK: test_vst4q_s16
+  vst4q_s16(a, b);
+  // CHECK: st4 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
+  // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_s32(int32_t *a, int32x4x4_t b) {
+  // CHECK: test_vst4q_s32
+  vst4q_s32(a, b);
+  // CHECK: st4 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
+  // v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_s64(int64_t *a, int64x2x4_t b) {
+  // CHECK: test_vst4q_s64
+  vst4q_s64(a, b);
+  // CHECK: st4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+  // v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_f16(float16_t *a, float16x8x4_t b) {
+  // CHECK: test_vst4q_f16
+  vst4q_f16(a, b);
+  // CHECK: st4 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
+  // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_f32(float32_t *a, float32x4x4_t b) {
+  // CHECK: test_vst4q_f32
+  vst4q_f32(a, b);
+  // CHECK: st4 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
+  // v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_f64(float64_t *a, float64x2x4_t b) {
+  // CHECK: test_vst4q_f64
+  vst4q_f64(a, b);
+  // CHECK: st4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+  // v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_p8(poly8_t *a, poly8x16x4_t b) {
+  // CHECK: test_vst4q_p8
+  vst4q_p8(a, b);
+  // CHECK: st4 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
+  // v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_p16(poly16_t *a, poly16x8x4_t b) {
+  // CHECK: test_vst4q_p16
+  vst4q_p16(a, b);
+  // CHECK: st4 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
+  // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_u8(uint8_t *a, uint8x8x4_t b) {
+  // CHECK: test_vst4_u8
+  vst4_u8(a, b);
+  // CHECK: st4 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
+  // v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_u16(uint16_t *a, uint16x4x4_t b) {
+  // CHECK: test_vst4_u16
+  vst4_u16(a, b);
+  // CHECK: st4 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
+  // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_u32(uint32_t *a, uint32x2x4_t b) {
+  // CHECK: test_vst4_u32
+  vst4_u32(a, b);
+  // CHECK: st4 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
+  // v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_u64(uint64_t *a, uint64x1x4_t b) {
+  // CHECK: test_vst4_u64
+  vst4_u64(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+  // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_s8(int8_t *a, int8x8x4_t b) {
+  // CHECK: test_vst4_s8
+  vst4_s8(a, b);
+  // CHECK: st4 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
+  // v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_s16(int16_t *a, int16x4x4_t b) {
+  // CHECK: test_vst4_s16
+  vst4_s16(a, b);
+  // CHECK: st4 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
+  // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_s32(int32_t *a, int32x2x4_t b) {
+  // CHECK: test_vst4_s32
+  vst4_s32(a, b);
+  // CHECK: st4 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
+  // v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_s64(int64_t *a, int64x1x4_t b) {
+  // CHECK: test_vst4_s64
+  vst4_s64(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+  // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_f16(float16_t *a, float16x4x4_t b) {
+  // CHECK: test_vst4_f16
+  vst4_f16(a, b);
+  // CHECK: st4 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
+  // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_f32(float32_t *a, float32x2x4_t b) {
+  // CHECK: test_vst4_f32
+  vst4_f32(a, b);
+  // CHECK: st4 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
+  // v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_f64(float64_t *a, float64x1x4_t b) {
+  // CHECK: test_vst4_f64
+  vst4_f64(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+  // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_p8(poly8_t *a, poly8x8x4_t b) {
+  // CHECK: test_vst4_p8
+  vst4_p8(a, b);
+  // CHECK: st4 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
+  // v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_p16(poly16_t *a, poly16x4x4_t b) {
+  // CHECK: test_vst4_p16
+  vst4_p16(a, b);
+  // CHECK: st4 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
+  // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+uint8x16x2_t test_vld1q_u8_x2(uint8_t const *a) {
+  // CHECK-LABEL: test_vld1q_u8_x2
+  return vld1q_u8_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+uint16x8x2_t test_vld1q_u16_x2(uint16_t const *a) {
+  // CHECK-LABEL: test_vld1q_u16_x2
+  return vld1q_u16_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+uint32x4x2_t test_vld1q_u32_x2(uint32_t const *a) {
+  // CHECK-LABEL: test_vld1q_u32_x2
+  return vld1q_u32_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+uint64x2x2_t test_vld1q_u64_x2(uint64_t const *a) {
+  // CHECK-LABEL: test_vld1q_u64_x2
+  return vld1q_u64_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+int8x16x2_t test_vld1q_s8_x2(int8_t const *a) {
+  // CHECK-LABEL: test_vld1q_s8_x2
+  return vld1q_s8_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+int16x8x2_t test_vld1q_s16_x2(int16_t const *a) {
+  // CHECK-LABEL: test_vld1q_s16_x2
+  return vld1q_s16_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+int32x4x2_t test_vld1q_s32_x2(int32_t const *a) {
+  // CHECK-LABEL: test_vld1q_s32_x2
+  return vld1q_s32_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+int64x2x2_t test_vld1q_s64_x2(int64_t const *a) {
+  // CHECK-LABEL: test_vld1q_s64_x2
+  return vld1q_s64_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+float16x8x2_t test_vld1q_f16_x2(float16_t const *a) {
+  // CHECK-LABEL: test_vld1q_f16_x2
+  return vld1q_f16_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+float32x4x2_t test_vld1q_f32_x2(float32_t const *a) {
+  // CHECK-LABEL: test_vld1q_f32_x2
+  return vld1q_f32_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+float64x2x2_t test_vld1q_f64_x2(float64_t const *a) {
+  // CHECK-LABEL: test_vld1q_f64_x2
+  return vld1q_f64_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+poly8x16x2_t test_vld1q_p8_x2(poly8_t const *a) {
+  // CHECK-LABEL: test_vld1q_p8_x2
+  return vld1q_p8_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+poly16x8x2_t test_vld1q_p16_x2(poly16_t const *a) {
+  // CHECK-LABEL: test_vld1q_p16_x2
+  return vld1q_p16_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+poly64x2x2_t test_vld1q_p64_x2(poly64_t const *a) {
+  // CHECK-LABEL: test_vld1q_p64_x2
+  return vld1q_p64_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+uint8x8x2_t test_vld1_u8_x2(uint8_t const *a) {
+  // CHECK-LABEL: test_vld1_u8_x2
+  return vld1_u8_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+uint16x4x2_t test_vld1_u16_x2(uint16_t const *a) {
+  // CHECK-LABEL: test_vld1_u16_x2
+  return vld1_u16_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+uint32x2x2_t test_vld1_u32_x2(uint32_t const *a) {
+  // CHECK-LABEL: test_vld1_u32_x2
+  return vld1_u32_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+uint64x1x2_t test_vld1_u64_x2(uint64_t const *a) {
+  // CHECK-LABEL: test_vld1_u64_x2
+  return vld1_u64_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+int8x8x2_t test_vld1_s8_x2(int8_t const *a) {
+  // CHECK-LABEL: test_vld1_s8_x2
+  return vld1_s8_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+int16x4x2_t test_vld1_s16_x2(int16_t const *a) {
+  // CHECK-LABEL: test_vld1_s16_x2
+  return vld1_s16_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+int32x2x2_t test_vld1_s32_x2(int32_t const *a) {
+  // CHECK-LABEL: test_vld1_s32_x2
+  return vld1_s32_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+int64x1x2_t test_vld1_s64_x2(int64_t const *a) {
+  // CHECK-LABEL: test_vld1_s64_x2
+  return vld1_s64_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+float16x4x2_t test_vld1_f16_x2(float16_t const *a) {
+  // CHECK-LABEL: test_vld1_f16_x2
+  return vld1_f16_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+float32x2x2_t test_vld1_f32_x2(float32_t const *a) {
+  // CHECK-LABEL: test_vld1_f32_x2
+  return vld1_f32_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+float64x1x2_t test_vld1_f64_x2(float64_t const *a) {
+  // CHECK-LABEL: test_vld1_f64_x2
+  return vld1_f64_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+poly8x8x2_t test_vld1_p8_x2(poly8_t const *a) {
+  // CHECK-LABEL: test_vld1_p8_x2
+  return vld1_p8_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+poly16x4x2_t test_vld1_p16_x2(poly16_t const *a) {
+  // CHECK-LABEL: test_vld1_p16_x2
+  return vld1_p16_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+poly64x1x2_t test_vld1_p64_x2(poly64_t const *a) {
+  // CHECK-LABEL: test_vld1_p64_x2
+  return vld1_p64_x2(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+uint8x16x3_t test_vld1q_u8_x3(uint8_t const *a) {
+  // CHECK-LABEL: test_vld1q_u8_x3
+  return vld1q_u8_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint16x8x3_t test_vld1q_u16_x3(uint16_t const *a) {
+  // CHECK-LABEL: test_vld1q_u16_x3
+  return vld1q_u16_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint32x4x3_t test_vld1q_u32_x3(uint32_t const *a) {
+  // CHECK-LABEL: test_vld1q_u32_x3
+  return vld1q_u32_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint64x2x3_t test_vld1q_u64_x3(uint64_t const *a) {
+  // CHECK-LABEL: test_vld1q_u64_x3
+  return vld1q_u64_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+  // [{{x[0-9]+|sp}}]
+}
+
+int8x16x3_t test_vld1q_s8_x3(int8_t const *a) {
+  // CHECK-LABEL: test_vld1q_s8_x3
+  return vld1q_s8_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b},
+  // [{{x[0-9]+|sp}}]
+}
+
+int16x8x3_t test_vld1q_s16_x3(int16_t const *a) {
+  // CHECK-LABEL: test_vld1q_s16_x3
+  return vld1q_s16_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
+  // [{{x[0-9]+|sp}}]
+}
+
+int32x4x3_t test_vld1q_s32_x3(int32_t const *a) {
+  // CHECK-LABEL: test_vld1q_s32_x3
+  return vld1q_s32_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
+  // [{{x[0-9]+|sp}}]
+}
+
+int64x2x3_t test_vld1q_s64_x3(int64_t const *a) {
+  // CHECK-LABEL: test_vld1q_s64_x3
+  return vld1q_s64_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+  // [{{x[0-9]+|sp}}]
+}
+
+float16x8x3_t test_vld1q_f16_x3(float16_t const *a) {
+  // CHECK-LABEL: test_vld1q_f16_x3
+  return vld1q_f16_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
+  // [{{x[0-9]+|sp}}]
+}
+
+float32x4x3_t test_vld1q_f32_x3(float32_t const *a) {
+  // CHECK-LABEL: test_vld1q_f32_x3
+  return vld1q_f32_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
+  // [{{x[0-9]+|sp}}]
+}
+
+float64x2x3_t test_vld1q_f64_x3(float64_t const *a) {
+  // CHECK-LABEL: test_vld1q_f64_x3
+  return vld1q_f64_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+  // [{{x[0-9]+|sp}}]
+}
+
+poly8x16x3_t test_vld1q_p8_x3(poly8_t const *a) {
+  // CHECK-LABEL: test_vld1q_p8_x3
+  return vld1q_p8_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b},
+  // [{{x[0-9]+|sp}}]
+}
+
+poly16x8x3_t test_vld1q_p16_x3(poly16_t const *a) {
+  // CHECK-LABEL: test_vld1q_p16_x3
+  return vld1q_p16_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
+  // [{{x[0-9]+|sp}}]
+}
+
+poly64x2x3_t test_vld1q_p64_x3(poly64_t const *a) {
+  // CHECK-LABEL: test_vld1q_p64_x3
+  return vld1q_p64_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint8x8x3_t test_vld1_u8_x3(uint8_t const *a) {
+  // CHECK-LABEL: test_vld1_u8_x3
+  return vld1_u8_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint16x4x3_t test_vld1_u16_x3(uint16_t const *a) {
+  // CHECK-LABEL: test_vld1_u16_x3
+  return vld1_u16_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint32x2x3_t test_vld1_u32_x3(uint32_t const *a) {
+  // CHECK-LABEL: test_vld1_u32_x3
+  return vld1_u32_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint64x1x3_t test_vld1_u64_x3(uint64_t const *a) {
+  // CHECK-LABEL: test_vld1_u64_x3
+  return vld1_u64_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+  // [{{x[0-9]+|sp}}]
+}
+
+int8x8x3_t test_vld1_s8_x3(int8_t const *a) {
+  // CHECK-LABEL: test_vld1_s8_x3
+  return vld1_s8_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b},
+  // [{{x[0-9]+|sp}}]
+}
+
+int16x4x3_t test_vld1_s16_x3(int16_t const *a) {
+  // CHECK-LABEL: test_vld1_s16_x3
+  return vld1_s16_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
+  // [{{x[0-9]+|sp}}]
+}
+
+int32x2x3_t test_vld1_s32_x3(int32_t const *a) {
+  // CHECK-LABEL: test_vld1_s32_x3
+  return vld1_s32_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
+  // [{{x[0-9]+|sp}}]
+}
+
+int64x1x3_t test_vld1_s64_x3(int64_t const *a) {
+  // CHECK-LABEL: test_vld1_s64_x3
+  return vld1_s64_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+  // [{{x[0-9]+|sp}}]
+}
+
+float16x4x3_t test_vld1_f16_x3(float16_t const *a) {
+  // CHECK-LABEL: test_vld1_f16_x3
+  return vld1_f16_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
+  // [{{x[0-9]+|sp}}]
+}
+
+float32x2x3_t test_vld1_f32_x3(float32_t const *a) {
+  // CHECK-LABEL: test_vld1_f32_x3
+  return vld1_f32_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
+  // [{{x[0-9]+|sp}}]
+}
+
+float64x1x3_t test_vld1_f64_x3(float64_t const *a) {
+  // CHECK-LABEL: test_vld1_f64_x3
+  return vld1_f64_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+  // [{{x[0-9]+|sp}}]
+}
+
+poly8x8x3_t test_vld1_p8_x3(poly8_t const *a) {
+  // CHECK-LABEL: test_vld1_p8_x3
+  return vld1_p8_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b},
+  // [{{x[0-9]+|sp}}]
+}
+
+poly16x4x3_t test_vld1_p16_x3(poly16_t const *a) {
+  // CHECK-LABEL: test_vld1_p16_x3
+  return vld1_p16_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
+  // [{{x[0-9]+|sp}}]
+}
+
+poly64x1x3_t test_vld1_p64_x3(poly64_t const *a) {
+  // CHECK-LABEL: test_vld1_p64_x3
+  return vld1_p64_x3(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint8x16x4_t test_vld1q_u8_x4(uint8_t const *a) {
+  // CHECK-LABEL: test_vld1q_u8_x4
+  return vld1q_u8_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
+  // v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+uint16x8x4_t test_vld1q_u16_x4(uint16_t const *a) {
+  // CHECK-LABEL: test_vld1q_u16_x4
+  return vld1q_u16_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
+  // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+uint32x4x4_t test_vld1q_u32_x4(uint32_t const *a) {
+  // CHECK-LABEL: test_vld1q_u32_x4
+  return vld1q_u32_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
+  // v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+uint64x2x4_t test_vld1q_u64_x4(uint64_t const *a) {
+  // CHECK-LABEL: test_vld1q_u64_x4
+  return vld1q_u64_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+  // v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+int8x16x4_t test_vld1q_s8_x4(int8_t const *a) {
+  // CHECK-LABEL: test_vld1q_s8_x4
+  return vld1q_s8_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
+  // v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+int16x8x4_t test_vld1q_s16_x4(int16_t const *a) {
+  // CHECK-LABEL: test_vld1q_s16_x4
+  return vld1q_s16_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
+  // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+int32x4x4_t test_vld1q_s32_x4(int32_t const *a) {
+  // CHECK-LABEL: test_vld1q_s32_x4
+  return vld1q_s32_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
+  // v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+int64x2x4_t test_vld1q_s64_x4(int64_t const *a) {
+  // CHECK-LABEL: test_vld1q_s64_x4
+  return vld1q_s64_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+  // v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+float16x8x4_t test_vld1q_f16_x4(float16_t const *a) {
+  // CHECK-LABEL: test_vld1q_f16_x4
+  return vld1q_f16_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
+  // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+float32x4x4_t test_vld1q_f32_x4(float32_t const *a) {
+  // CHECK-LABEL: test_vld1q_f32_x4
+  return vld1q_f32_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
+  // v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+float64x2x4_t test_vld1q_f64_x4(float64_t const *a) {
+  // CHECK-LABEL: test_vld1q_f64_x4
+  return vld1q_f64_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+  // v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+poly8x16x4_t test_vld1q_p8_x4(poly8_t const *a) {
+  // CHECK-LABEL: test_vld1q_p8_x4
+  return vld1q_p8_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
+  // v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+poly16x8x4_t test_vld1q_p16_x4(poly16_t const *a) {
+  // CHECK-LABEL: test_vld1q_p16_x4
+  return vld1q_p16_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
+  // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+poly64x2x4_t test_vld1q_p64_x4(poly64_t const *a) {
+  // CHECK-LABEL: test_vld1q_p64_x4
+  return vld1q_p64_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+  // v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+uint8x8x4_t test_vld1_u8_x4(uint8_t const *a) {
+  // CHECK-LABEL: test_vld1_u8_x4
+  return vld1_u8_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
+  // v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+uint16x4x4_t test_vld1_u16_x4(uint16_t const *a) {
+  // CHECK-LABEL: test_vld1_u16_x4
+  return vld1_u16_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
+  // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+uint32x2x4_t test_vld1_u32_x4(uint32_t const *a) {
+  // CHECK-LABEL: test_vld1_u32_x4
+  return vld1_u32_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
+  // v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+uint64x1x4_t test_vld1_u64_x4(uint64_t const *a) {
+  // CHECK-LABEL: test_vld1_u64_x4
+  return vld1_u64_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+  // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+int8x8x4_t test_vld1_s8_x4(int8_t const *a) {
+  // CHECK-LABEL: test_vld1_s8_x4
+  return vld1_s8_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
+  // v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+int16x4x4_t test_vld1_s16_x4(int16_t const *a) {
+  // CHECK-LABEL: test_vld1_s16_x4
+  return vld1_s16_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
+  // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+int32x2x4_t test_vld1_s32_x4(int32_t const *a) {
+  // CHECK-LABEL: test_vld1_s32_x4
+  return vld1_s32_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
+  // v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+int64x1x4_t test_vld1_s64_x4(int64_t const *a) {
+  // CHECK-LABEL: test_vld1_s64_x4
+  return vld1_s64_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+  // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+float16x4x4_t test_vld1_f16_x4(float16_t const *a) {
+  // CHECK-LABEL: test_vld1_f16_x4
+  return vld1_f16_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
+  // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+float32x2x4_t test_vld1_f32_x4(float32_t const *a) {
+  // CHECK-LABEL: test_vld1_f32_x4
+  return vld1_f32_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
+  // v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+float64x1x4_t test_vld1_f64_x4(float64_t const *a) {
+  // CHECK-LABEL: test_vld1_f64_x4
+  return vld1_f64_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+  // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+poly8x8x4_t test_vld1_p8_x4(poly8_t const *a) {
+  // CHECK-LABEL: test_vld1_p8_x4
+  return vld1_p8_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
+  // v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+poly16x4x4_t test_vld1_p16_x4(poly16_t const *a) {
+  // CHECK-LABEL: test_vld1_p16_x4
+  return vld1_p16_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
+  // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+poly64x1x4_t test_vld1_p64_x4(poly64_t const *a) {
+  // CHECK-LABEL: test_vld1_p64_x4
+  return vld1_p64_x4(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+  // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_u8_x2(uint8_t *a, uint8x16x2_t b) {
+  // CHECK: test_vst1q_u8_x2
+  vst1q_u8_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_u16_x2(uint16_t *a, uint16x8x2_t b) {
+  // CHECK: test_vst1q_u16_x2
+  vst1q_u16_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_u32_x2(uint32_t *a, uint32x4x2_t b) {
+  // CHECK: test_vst1q_u32_x2
+  vst1q_u32_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_u64_x2(uint64_t *a, uint64x2x2_t b) {
+  // CHECK: test_vst1q_u64_x2
+  vst1q_u64_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_s8_x2(int8_t *a, int8x16x2_t b) {
+  // CHECK: test_vst1q_s8_x2
+  vst1q_s8_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_s16_x2(int16_t *a, int16x8x2_t b) {
+  // CHECK: test_vst1q_s16_x2
+  vst1q_s16_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_s32_x2(int32_t *a, int32x4x2_t b) {
+  // CHECK: test_vst1q_s32_x2
+  vst1q_s32_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_s64_x2(int64_t *a, int64x2x2_t b) {
+  // CHECK: test_vst1q_s64_x2
+  vst1q_s64_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_f16_x2(float16_t *a, float16x8x2_t b) {
+  // CHECK: test_vst1q_f16_x2
+  vst1q_f16_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_f32_x2(float32_t *a, float32x4x2_t b) {
+  // CHECK: test_vst1q_f32_x2
+  vst1q_f32_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_f64_x2(float64_t *a, float64x2x2_t b) {
+  // CHECK: test_vst1q_f64_x2
+  vst1q_f64_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_p8_x2(poly8_t *a, poly8x16x2_t b) {
+  // CHECK: test_vst1q_p8_x2
+  vst1q_p8_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_p16_x2(poly16_t *a, poly16x8x2_t b) {
+  // CHECK: test_vst1q_p16_x2
+  vst1q_p16_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_p64_x2(poly64_t *a, poly64x2x2_t b) {
+  // CHECK: test_vst1q_p64_x2
+  vst1q_p64_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_u8_x2(uint8_t *a, uint8x8x2_t b) {
+  // CHECK: test_vst1_u8_x2
+  vst1_u8_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_u16_x2(uint16_t *a, uint16x4x2_t b) {
+  // CHECK: test_vst1_u16_x2
+  vst1_u16_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_u32_x2(uint32_t *a, uint32x2x2_t b) {
+  // CHECK: test_vst1_u32_x2
+  vst1_u32_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_u64_x2(uint64_t *a, uint64x1x2_t b) {
+  // CHECK: test_vst1_u64_x2
+  vst1_u64_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_s8_x2(int8_t *a, int8x8x2_t b) {
+  // CHECK: test_vst1_s8_x2
+  vst1_s8_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_s16_x2(int16_t *a, int16x4x2_t b) {
+  // CHECK: test_vst1_s16_x2
+  vst1_s16_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_s32_x2(int32_t *a, int32x2x2_t b) {
+  // CHECK: test_vst1_s32_x2
+  vst1_s32_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_s64_x2(int64_t *a, int64x1x2_t b) {
+  // CHECK: test_vst1_s64_x2
+  vst1_s64_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_f16_x2(float16_t *a, float16x4x2_t b) {
+  // CHECK: test_vst1_f16_x2
+  vst1_f16_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_f32_x2(float32_t *a, float32x2x2_t b) {
+  // CHECK: test_vst1_f32_x2
+  vst1_f32_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_f64_x2(float64_t *a, float64x1x2_t b) {
+  // CHECK: test_vst1_f64_x2
+  vst1_f64_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_p8_x2(poly8_t *a, poly8x8x2_t b) {
+  // CHECK: test_vst1_p8_x2
+  vst1_p8_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_p16_x2(poly16_t *a, poly16x4x2_t b) {
+  // CHECK: test_vst1_p16_x2
+  vst1_p16_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_p64_x2(poly64_t *a, poly64x1x2_t b) {
+  // CHECK: test_vst1_p64_x2
+  vst1_p64_x2(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_u8_x3(uint8_t *a, uint8x16x3_t b) {
+  // CHECK: test_vst1q_u8_x3
+  vst1q_u8_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_u16_x3(uint16_t *a, uint16x8x3_t b) {
+  // CHECK: test_vst1q_u16_x3
+  vst1q_u16_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_u32_x3(uint32_t *a, uint32x4x3_t b) {
+  // CHECK: test_vst1q_u32_x3
+  vst1q_u32_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_u64_x3(uint64_t *a, uint64x2x3_t b) {
+  // CHECK: test_vst1q_u64_x3
+  vst1q_u64_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_s8_x3(int8_t *a, int8x16x3_t b) {
+  // CHECK: test_vst1q_s8_x3
+  vst1q_s8_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_s16_x3(int16_t *a, int16x8x3_t b) {
+  // CHECK: test_vst1q_s16_x3
+  vst1q_s16_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_s32_x3(int32_t *a, int32x4x3_t b) {
+  // CHECK: test_vst1q_s32_x3
+  vst1q_s32_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_s64_x3(int64_t *a, int64x2x3_t b) {
+  // CHECK: test_vst1q_s64_x3
+  vst1q_s64_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_f16_x3(float16_t *a, float16x8x3_t b) {
+  // CHECK: test_vst1q_f16_x3
+  vst1q_f16_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_f32_x3(float32_t *a, float32x4x3_t b) {
+  // CHECK: test_vst1q_f32_x3
+  vst1q_f32_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_f64_x3(float64_t *a, float64x2x3_t b) {
+  // CHECK: test_vst1q_f64_x3
+  vst1q_f64_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_p8_x3(poly8_t *a, poly8x16x3_t b) {
+  // CHECK: test_vst1q_p8_x3
+  vst1q_p8_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_p16_x3(poly16_t *a, poly16x8x3_t b) {
+  // CHECK: test_vst1q_p16_x3
+  vst1q_p16_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_p64_x3(poly64_t *a, poly64x2x3_t b) {
+  // CHECK: test_vst1q_p64_x3
+  vst1q_p64_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_u8_x3(uint8_t *a, uint8x8x3_t b) {
+  // CHECK: test_vst1_u8_x3
+  vst1_u8_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_u16_x3(uint16_t *a, uint16x4x3_t b) {
+  // CHECK: test_vst1_u16_x3
+  vst1_u16_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_u32_x3(uint32_t *a, uint32x2x3_t b) {
+  // CHECK: test_vst1_u32_x3
+  vst1_u32_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_u64_x3(uint64_t *a, uint64x1x3_t b) {
+  // CHECK: test_vst1_u64_x3
+  vst1_u64_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_s8_x3(int8_t *a, int8x8x3_t b) {
+  // CHECK: test_vst1_s8_x3
+  vst1_s8_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_s16_x3(int16_t *a, int16x4x3_t b) {
+  // CHECK: test_vst1_s16_x3
+  vst1_s16_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_s32_x3(int32_t *a, int32x2x3_t b) {
+  // CHECK: test_vst1_s32_x3
+  vst1_s32_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_s64_x3(int64_t *a, int64x1x3_t b) {
+  // CHECK: test_vst1_s64_x3
+  vst1_s64_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_f16_x3(float16_t *a, float16x4x3_t b) {
+  // CHECK: test_vst1_f16_x3
+  vst1_f16_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_f32_x3(float32_t *a, float32x2x3_t b) {
+  // CHECK: test_vst1_f32_x3
+  vst1_f32_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_f64_x3(float64_t *a, float64x1x3_t b) {
+  // CHECK: test_vst1_f64_x3
+  vst1_f64_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_p8_x3(poly8_t *a, poly8x8x3_t b) {
+  // CHECK: test_vst1_p8_x3
+  vst1_p8_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_p16_x3(poly16_t *a, poly16x4x3_t b) {
+  // CHECK: test_vst1_p16_x3
+  vst1_p16_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_p64_x3(poly64_t *a, poly64x1x3_t b) {
+  // CHECK: test_vst1_p64_x3
+  vst1_p64_x3(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+  // [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_u8_x4(uint8_t *a, uint8x16x4_t b) {
+  // CHECK: test_vst1q_u8_x4
+  vst1q_u8_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
+  // v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_u16_x4(uint16_t *a, uint16x8x4_t b) {
+  // CHECK: test_vst1q_u16_x4
+  vst1q_u16_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
+  // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_u32_x4(uint32_t *a, uint32x4x4_t b) {
+  // CHECK: test_vst1q_u32_x4
+  vst1q_u32_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
+  // v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_u64_x4(uint64_t *a, uint64x2x4_t b) {
+  // CHECK: test_vst1q_u64_x4
+  vst1q_u64_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+  // v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_s8_x4(int8_t *a, int8x16x4_t b) {
+  // CHECK: test_vst1q_s8_x4
+  vst1q_s8_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
+  // v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_s16_x4(int16_t *a, int16x8x4_t b) {
+  // CHECK: test_vst1q_s16_x4
+  vst1q_s16_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
+  // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_s32_x4(int32_t *a, int32x4x4_t b) {
+  // CHECK: test_vst1q_s32_x4
+  vst1q_s32_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
+  // v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_s64_x4(int64_t *a, int64x2x4_t b) {
+  // CHECK: test_vst1q_s64_x4
+  vst1q_s64_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+  // v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_f16_x4(float16_t *a, float16x8x4_t b) {
+  // CHECK: test_vst1q_f16_x4
+  vst1q_f16_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
+  // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_f32_x4(float32_t *a, float32x4x4_t b) {
+  // CHECK: test_vst1q_f32_x4
+  vst1q_f32_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
+  // v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_f64_x4(float64_t *a, float64x2x4_t b) {
+  // CHECK: test_vst1q_f64_x4
+  vst1q_f64_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+  // v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_p8_x4(poly8_t *a, poly8x16x4_t b) {
+  // CHECK: test_vst1q_p8_x4
+  vst1q_p8_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
+  // v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_p16_x4(poly16_t *a, poly16x8x4_t b) {
+  // CHECK: test_vst1q_p16_x4
+  vst1q_p16_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
+  // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_p64_x4(poly64_t *a, poly64x2x4_t b) {
+  // CHECK: test_vst1q_p64_x4
+  vst1q_p64_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+  // v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_u8_x4(uint8_t *a, uint8x8x4_t b) {
+  // CHECK: test_vst1_u8_x4
+  vst1_u8_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
+  // v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_u16_x4(uint16_t *a, uint16x4x4_t b) {
+  // CHECK: test_vst1_u16_x4
+  vst1_u16_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
+  // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_u32_x4(uint32_t *a, uint32x2x4_t b) {
+  // CHECK: test_vst1_u32_x4
+  vst1_u32_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
+  // v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_u64_x4(uint64_t *a, uint64x1x4_t b) {
+  // CHECK: test_vst1_u64_x4
+  vst1_u64_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+  // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_s8_x4(int8_t *a, int8x8x4_t b) {
+  // CHECK: test_vst1_s8_x4
+  vst1_s8_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
+  // v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_s16_x4(int16_t *a, int16x4x4_t b) {
+  // CHECK: test_vst1_s16_x4
+  vst1_s16_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
+  // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_s32_x4(int32_t *a, int32x2x4_t b) {
+  // CHECK: test_vst1_s32_x4
+  vst1_s32_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
+  // v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_s64_x4(int64_t *a, int64x1x4_t b) {
+  // CHECK: test_vst1_s64_x4
+  vst1_s64_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+  // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_f16_x4(float16_t *a, float16x4x4_t b) {
+  // CHECK: test_vst1_f16_x4
+  vst1_f16_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
+  // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_f32_x4(float32_t *a, float32x2x4_t b) {
+  // CHECK: test_vst1_f32_x4
+  vst1_f32_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
+  // v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_f64_x4(float64_t *a, float64x1x4_t b) {
+  // CHECK: test_vst1_f64_x4
+  vst1_f64_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+  // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_p8_x4(poly8_t *a, poly8x8x4_t b) {
+  // CHECK: test_vst1_p8_x4
+  vst1_p8_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
+  // v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_p16_x4(poly16_t *a, poly16x4x4_t b) {
+  // CHECK: test_vst1_p16_x4
+  vst1_p16_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
+  // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_p64_x4(poly64_t *a, poly64x1x4_t b) {
+  // CHECK: test_vst1_p64_x4
+  vst1_p64_x4(a, b);
+  // CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+  // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+int64_t test_vceqd_s64(int64_t a, int64_t b) {
+// CHECK: test_vceqd_s64
+// CHECK: cmeq {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  return (int64_t)vceqd_s64(a, b);
+}
+
+uint64_t test_vceqd_u64(uint64_t a, uint64_t b) {
+// CHECK: test_vceqd_u64
+// CHECK: cmeq {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  return (int64_t)vceqd_u64(a, b);
+} 
+
+int64_t test_vceqzd_s64(int64_t a) {
+// CHECK: test_vceqzd_s64
+// CHECK: cmeq {{d[0-9]+}}, {{d[0-9]+}}, #0x0
+  return (int64_t)vceqzd_s64(a);
+}
+
+int64_t test_vceqzd_u64(int64_t a) {
+// CHECK: test_vceqzd_u64
+// CHECK: cmeq {{d[0-9]+}}, {{d[0-9]+}}, #0x0
+  return (int64_t)vceqzd_u64(a);
+}
+
+int64_t test_vcged_s64(int64_t a, int64_t b) {
+// CHECK: test_vcged_s64
+// CHECK: cmge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  return (int64_t)vcged_s64(a, b);
+}
+
+uint64_t test_vcged_u64(uint64_t a, uint64_t b) {
+// CHECK: test_vcged_u64
+// CHECK: cmhs {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+    return (uint64_t)vcged_u64(a, b);
+}
+
+int64_t test_vcgezd_s64(int64_t a) {
+// CHECK: test_vcgezd_s64
+// CHECK: cmge {{d[0-9]+}}, {{d[0-9]+}}, #0x0
+  return (int64_t)vcgezd_s64(a);
+}
+
+int64_t test_vcgtd_s64(int64_t a, int64_t b) {
+// CHECK: test_vcgtd_s64
+// CHECK: cmgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  return (int64_t)vcgtd_s64(a, b);
+}
+
+uint64_t test_vcgtd_u64(uint64_t a, uint64_t b) {
+// CHECK: test_vcgtd_u64
+// CHECK: cmhi {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  return (uint64_t)vcgtd_u64(a, b);
+}
+
+int64_t test_vcgtzd_s64(int64_t a) {
+// CHECK: test_vcgtzd_s64
+// CHECK: cmgt {{d[0-9]+}}, {{d[0-9]+}}, #0x0
+  return (int64_t)vcgtzd_s64(a);
+}
+
+int64_t test_vcled_s64(int64_t a, int64_t b) {
+// CHECK: test_vcled_s64
+// CHECK: cmge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  return (int64_t)vcled_s64(a, b);
+}
+
+uint64_t test_vcled_u64(uint64_t a, uint64_t b) {
+// CHECK: test_vcled_u64
+// CHECK: cmhs {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  return (uint64_t)vcled_u64(a, 0);
+}
+
+int64_t test_vclezd_s64(int64_t a) {
+// CHECK: test_vclezd_s64
+// CHECK: cmle {{d[0-9]+}}, {{d[0-9]+}}, #0x0
+  return (int64_t)vclezd_s64(a);
+}
+
+int64_t test_vcltd_s64(int64_t a, int64_t b) {
+// CHECK: test_vcltd_s64
+// CHECK: cmgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  return (int64_t)vcltd_s64(a, b);
+}
+
+uint64_t test_vcltd_u64(uint64_t a, uint64_t b) {
+// CHECK: test_vcltd_u64
+// CHECK: cmhi {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  return (uint64_t)vcltd_u64(a, b);
+}
+
+int64_t test_vcltzd_s64(int64_t a) {
+// CHECK: test_vcltzd_s64
+// CHECK: cmlt {{d[0-9]+}}, {{d[0-9]+}}, #0x0
+  return (int64_t)vcltzd_s64(a);
+}
+
+int64_t test_vtstd_s64(int64_t a, int64_t b) {
+// CHECK: test_vtstd_s64
+// CHECK: cmtst {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  return (int64_t)vtstd_s64(a, b);
+}
+
+uint64_t test_vtstd_u64(uint64_t a, uint64_t b) {
+// CHECK: test_vtstd_u64
+// CHECK: cmtst {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  return (uint64_t)vtstd_u64(a, b);
+}
+
+int64_t test_vabsd_s64(int64_t a) {
+// CHECK: test_vabsd_s64
+// CHECK: abs {{d[0-9]+}}, {{d[0-9]+}}
+  return (int64_t)vabsd_s64(a);
+}
+
+int8_t test_vqabsb_s8(int8_t a) {
+// CHECK: test_vqabsb_s8
+// CHECK: sqabs {{b[0-9]+}}, {{b[0-9]+}}
+  return (int8_t)vqabsb_s8(a);
+}
+
+int16_t test_vqabsh_s16(int16_t a) {
+// CHECK: test_vqabsh_s16
+// CHECK: sqabs {{h[0-9]+}}, {{h[0-9]+}}
+  return (int16_t)vqabsh_s16(a);
+}
+
+int32_t test_vqabss_s32(int32_t a) {
+// CHECK: test_vqabss_s32
+// CHECK: sqabs {{s[0-9]+}}, {{s[0-9]+}}
+  return (int32_t)vqabss_s32(a);
+}
+
+int64_t test_vqabsd_s64(int64_t a) {
+// CHECK: test_vqabsd_s64
+// CHECK: sqabs {{d[0-9]+}}, {{d[0-9]+}}
+  return (int64_t)vqabsd_s64(a);
+}
+
+int64_t test_vnegd_s64(int64_t a) {
+// CHECK: test_vnegd_s64
+// CHECK: neg {{d[0-9]+}}, {{d[0-9]+}}
+  return (int64_t)vnegd_s64(a);
+}
+
+int8_t test_vqnegb_s8(int8_t a) {
+// CHECK: test_vqnegb_s8
+// CHECK: sqneg {{b[0-9]+}}, {{b[0-9]+}}
+  return (int8_t)vqnegb_s8(a);
+}
+
+int16_t test_vqnegh_s16(int16_t a) {
+// CHECK: test_vqnegh_s16
+// CHECK: sqneg {{h[0-9]+}}, {{h[0-9]+}}
+  return (int16_t)vqnegh_s16(a);
+}
+
+int32_t test_vqnegs_s32(int32_t a) {
+// CHECK: test_vqnegs_s32
+// CHECK: sqneg {{s[0-9]+}}, {{s[0-9]+}}
+  return (int32_t)vqnegs_s32(a);
+}
+
+int64_t test_vqnegd_s64(int64_t a) {
+// CHECK: test_vqnegd_s64
+// CHECK: sqneg {{d[0-9]+}}, {{d[0-9]+}}
+  return (int64_t)vqnegd_s64(a);
+}
+
+int8_t test_vuqaddb_s8(int8_t a, int8_t b) {
+// CHECK: test_vuqaddb_s8
+// CHECK: suqadd {{b[0-9]+}}, {{b[0-9]+}}
+  return (int8_t)vuqaddb_s8(a, b);
+}
+
+int16_t test_vuqaddh_s16(int16_t a, int16_t b) {
+// CHECK: test_vuqaddh_s16
+// CHECK: suqadd {{h[0-9]+}}, {{h[0-9]+}}
+  return (int16_t)vuqaddh_s16(a, b);
+}
+
+int32_t test_vuqadds_s32(int32_t a, int32_t b) {
+// CHECK: test_vuqadds_s32
+// CHECK: suqadd {{s[0-9]+}}, {{s[0-9]+}}
+  return (int32_t)vuqadds_s32(a, b);
+}
+
+int64_t test_vuqaddd_s64(int64_t a, int64_t b) {
+// CHECK: test_vuqaddd_s64
+// CHECK: suqadd {{d[0-9]+}}, {{d[0-9]+}}
+  return (int64_t)vuqaddd_s64(a, b);
+}
+
+uint8_t test_vsqaddb_u8(uint8_t a, uint8_t b) {
+// CHECK: test_vsqaddb_u8
+// CHECK: usqadd {{b[0-9]+}}, {{b[0-9]+}}
+  return (uint8_t)vsqaddb_u8(a, b);
+}
+
+uint16_t test_vsqaddh_u16(uint16_t a, uint16_t b) {
+// CHECK: test_vsqaddh_u16
+// CHECK: usqadd {{h[0-9]+}}, {{h[0-9]+}}
+  return (uint16_t)vsqaddh_u16(a, b);
+}
+
+uint32_t test_vsqadds_u32(uint32_t a, uint32_t b) {
+// CHECK: test_vsqadds_u32
+// CHECK: usqadd {{s[0-9]+}}, {{s[0-9]+}}
+  return (uint32_t)vsqadds_u32(a, b);
+}
+
+uint64_t test_vsqaddd_u64(uint64_t a, uint64_t b) {
+// CHECK: test_vsqaddd_u64
+// CHECK: usqadd {{d[0-9]+}}, {{d[0-9]+}}
+  return (uint64_t)vsqaddd_u64(a, b);
+}
+
+int32_t test_vqdmlalh_s16(int32_t a, int16_t b, int16_t c) {
+// CHECK: test_vqdmlalh_s16
+// CHECK: sqdmlal {{s[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
+  return (int32_t)vqdmlalh_s16(a, b, c);
+}
+
+int64_t test_vqdmlals_s32(int64_t a, int32_t b, int32_t c) {
+// CHECK: test_vqdmlals_s32
+// CHECK: sqdmlal {{d[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  return (int64_t)vqdmlals_s32(a, b, c);
+}
+
+int32_t test_vqdmlslh_s16(int32_t a, int16_t b, int16_t c) {
+// CHECK: test_vqdmlslh_s16
+// CHECK: sqdmlsl {{s[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
+  return (int32_t)vqdmlslh_s16(a, b, c);
+}
+
+int64_t test_vqdmlsls_s32(int64_t a, int32_t b, int32_t c) {
+// CHECK: test_vqdmlsls_s32
+// CHECK: sqdmlsl {{d[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  return (int64_t)vqdmlsls_s32(a, b, c);
+}
+
+int32_t test_vqdmullh_s16(int16_t a, int16_t b) {
+// CHECK: test_vqdmullh_s16
+// CHECK: sqdmull {{s[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
+  return (int32_t)vqdmullh_s16(a, b);
+}
+
+int64_t test_vqdmulls_s32(int32_t a, int32_t b) {
+// CHECK: test_vqdmulls_s32
+// CHECK: sqdmull {{d[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  return (int64_t)vqdmulls_s32(a, b);
+}
+
+int8_t test_vqmovunh_s16(int16_t a) {
+// CHECK: test_vqmovunh_s16
+// CHECK: sqxtun {{b[0-9]+}}, {{h[0-9]+}}
+  return (int8_t)vqmovunh_s16(a);
+}
+
+int16_t test_vqmovuns_s32(int32_t a) {
+// CHECK: test_vqmovuns_s32
+// CHECK: sqxtun {{h[0-9]+}}, {{s[0-9]+}}
+  return (int16_t)vqmovuns_s32(a);
+}
+
+int32_t test_vqmovund_s64(int64_t a) {
+// CHECK: test_vqmovund_s64
+// CHECK: sqxtun {{s[0-9]+}}, {{d[0-9]+}}
+  return (int32_t)vqmovund_s64(a);
+}
+
+int8_t test_vqmovnh_s16(int16_t a) {
+// CHECK: test_vqmovnh_s16
+// CHECK: sqxtn {{b[0-9]+}}, {{h[0-9]+}}
+  return (int8_t)vqmovnh_s16(a);
+}
+
+int16_t test_vqmovns_s32(int32_t a) {
+// CHECK: test_vqmovns_s32
+// CHECK: sqxtn {{h[0-9]+}}, {{s[0-9]+}}
+  return (int16_t)vqmovns_s32(a);
+}
+
+int32_t test_vqmovnd_s64(int64_t a) {
+// CHECK: test_vqmovnd_s64
+// CHECK: sqxtn {{s[0-9]+}}, {{d[0-9]+}}
+  return (int32_t)vqmovnd_s64(a);
+}
+
+int8_t test_vqmovnh_u16(int16_t a) {
+// CHECK: test_vqmovnh_u16
+// CHECK: uqxtn {{b[0-9]+}}, {{h[0-9]+}}
+  return (int8_t)vqmovnh_u16(a);
+}
+
+int16_t test_vqmovns_u32(int32_t a) {
+// CHECK: test_vqmovns_u32
+// CHECK: uqxtn {{h[0-9]+}}, {{s[0-9]+}}
+  return (int16_t)vqmovns_u32(a);
+}
+
+int32_t test_vqmovnd_u64(int64_t a) {
+// CHECK: test_vqmovnd_u64
+// CHECK: uqxtn {{s[0-9]+}}, {{d[0-9]+}}
+  return (int32_t)vqmovnd_u64(a);
+}
+
+uint32_t test_vceqs_f32(float32_t a, float32_t b) {
+// CHECK: test_vceqs_f32
+// CHECK: fcmeq {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  return (uint32_t)vceqs_f32(a, b);
+}
+
+uint64_t test_vceqd_f64(float64_t a, float64_t b) {
+// CHECK: test_vceqd_f64
+// CHECK: fcmeq {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  return (uint64_t)vceqd_f64(a, b);
+}
+
+uint32_t test_vceqzs_f32(float32_t a) {
+// CHECK: test_vceqzs_f32
+// CHECK: fcmeq {{s[0-9]+}}, {{s[0-9]+}}, #0.0
+  return (uint32_t)vceqzs_f32(a);
+}
+
+uint64_t test_vceqzd_f64(float64_t a) {
+// CHECK: test_vceqzd_f64
+// CHECK: fcmeq {{d[0-9]+}}, {{d[0-9]+}}, #0.0
+  return (uint64_t)vceqzd_f64(a);
+}
+
+uint32_t test_vcges_f32(float32_t a, float32_t b) {
+// CHECK: test_vcges_f32
+// CHECK: fcmge {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  return (uint32_t)vcges_f32(a, b);
+}
+
+uint64_t test_vcged_f64(float64_t a, float64_t b) {
+// CHECK: test_vcged_f64
+// CHECK: fcmge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  return (uint64_t)vcged_f64(a, b);
+}
+
+uint32_t test_vcgezs_f32(float32_t a) {
+// CHECK: test_vcgezs_f32
+// CHECK: fcmge {{s[0-9]+}}, {{s[0-9]+}}, #0.0
+  return (uint32_t)vcgezs_f32(a);
+}
+
+uint64_t test_vcgezd_f64(float64_t a) {
+// CHECK: test_vcgezd_f64
+// CHECK: fcmge {{d[0-9]+}}, {{d[0-9]+}}, #0.0
+  return (uint64_t)vcgezd_f64(a);
+}
+
+uint32_t test_vcgts_f32(float32_t a, float32_t b) {
+// CHECK: test_vcgts_f32
+// CHECK: fcmgt {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  return (uint32_t)vcgts_f32(a, b);
+}
+
+uint64_t test_vcgtd_f64(float64_t a, float64_t b) {
+// CHECK: test_vcgtd_f64
+// CHECK: fcmgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  return (uint64_t)vcgtd_f64(a, b);
+}
+
+uint32_t test_vcgtzs_f32(float32_t a) {
+// CHECK: test_vcgtzs_f32
+// CHECK: fcmgt {{s[0-9]+}}, {{s[0-9]+}}, #0.0
+  return (uint32_t)vcgtzs_f32(a);
+}
+
+uint64_t test_vcgtzd_f64(float64_t a) {
+// CHECK: test_vcgtzd_f64
+// CHECK: fcmgt {{d[0-9]+}}, {{d[0-9]+}}, #0.0
+  return (uint64_t)vcgtzd_f64(a);
+}
+
+uint32_t test_vcles_f32(float32_t a, float32_t b) {
+// CHECK: test_vcles_f32
+// CHECK: fcmge {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  return (uint32_t)vcles_f32(a, b);
+}
+
+uint64_t test_vcled_f64(float64_t a, float64_t b) {
+// CHECK: test_vcled_f64
+// CHECK: fcmge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  return (uint64_t)vcled_f64(a, b);
+}
+
+uint32_t test_vclezs_f32(float32_t a) {
+// CHECK: test_vclezs_f32
+// CHECK: fcmle {{s[0-9]+}}, {{s[0-9]+}}, #0.0
+  return (uint32_t)vclezs_f32(a);
+}
+
+uint64_t test_vclezd_f64(float64_t a) {
+// CHECK: test_vclezd_f64
+// CHECK: fcmle {{d[0-9]+}}, {{d[0-9]+}}, #0.0
+  return (uint64_t)vclezd_f64(a);
+}
+
+uint32_t test_vclts_f32(float32_t a, float32_t b) {
+// CHECK: test_vclts_f32
+// CHECK: fcmgt {{s[0-9]+}}, s1, s0
+  return (uint32_t)vclts_f32(a, b);
+}
+
+uint64_t test_vcltd_f64(float64_t a, float64_t b) {
+// CHECK: test_vcltd_f64
+// CHECK: fcmgt {{d[0-9]+}}, d1, d0
+  return (uint64_t)vcltd_f64(a, b);
+}
+
+uint32_t test_vcltzs_f32(float32_t a) {
+// CHECK: test_vcltzs_f32
+// CHECK: fcmlt {{s[0-9]+}}, {{s[0-9]+}}, #0.0
+  return (uint32_t)vcltzs_f32(a);
+}
+
+uint64_t test_vcltzd_f64(float64_t a) {
+// CHECK: test_vcltzd_f64
+// CHECK: fcmlt {{d[0-9]+}}, {{d[0-9]+}}, #0.0
+  return (uint64_t)vcltzd_f64(a);
+}
+
+uint32_t test_vcages_f32(float32_t a, float32_t b) {
+// CHECK: test_vcages_f32
+// CHECK: facge {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  return (uint32_t)vcages_f32(a, b);
+}
+
+uint64_t test_vcaged_f64(float64_t a, float64_t b) {
+// CHECK: test_vcaged_f64
+// CHECK: facge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  return (uint64_t)vcaged_f64(a, b);
+}
+
+uint32_t test_vcagts_f32(float32_t a, float32_t b) {
+// CHECK: test_vcagts_f32
+// CHECK: facgt {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  return (uint32_t)vcagts_f32(a, b);
+}
+
+uint64_t test_vcagtd_f64(float64_t a, float64_t b) {
+// CHECK: test_vcagtd_f64
+// CHECK: facgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  return (uint64_t)vcagtd_f64(a, b);
+}
+
+uint32_t test_vcales_f32(float32_t a, float32_t b) {
+// CHECK: test_vcales_f32
+// CHECK: facge {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  return (uint32_t)vcales_f32(a, b);
+}
+
+uint64_t test_vcaled_f64(float64_t a, float64_t b) {
+// CHECK: test_vcaled_f64
+// CHECK: facge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  return (uint64_t)vcaled_f64(a, b);
+}
+
+uint32_t test_vcalts_f32(float32_t a, float32_t b) {
+// CHECK: test_vcalts_f32
+// CHECK: facgt {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  return (uint32_t)vcalts_f32(a, b);
+}
+
+uint64_t test_vcaltd_f64(float64_t a, float64_t b) {
+// CHECK: test_vcaltd_f64
+// CHECK: facgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  return (uint64_t)vcaltd_f64(a, b);
+}
+
+int64_t test_vshrd_n_s64(int64_t a) {
+// CHECK-LABEL: test_vshrd_n_s64
+// CHECK: sshr {{d[0-9]+}}, {{d[0-9]+}}, #1
+  return (int64_t)vshrd_n_s64(a, 1);
+}
+
+int64x1_t test_vshr_n_s64(int64x1_t a) {
+// CHECK-LABEL: test_vshr_n_s64
+// CHECK: sshr {{d[0-9]+}}, {{d[0-9]+}}, #1
+  return vshr_n_s64(a, 1);
+}
+
+uint64_t test_vshrd_n_u64(uint64_t a) {
+// CHECK-LABEL: test_vshrd_n_u64
+// CHECK: ushr {{d[0-9]+}}, {{d[0-9]+}}, #64
+  return (uint64_t)vshrd_n_u64(a, 64);
+}
+
+uint64x1_t test_vshr_n_u64(uint64x1_t a) {
+// CHECK-LABEL: test_vshr_n_u64
+// CHECK: ushr {{d[0-9]+}}, {{d[0-9]+}}, #1
+  return vshr_n_u64(a, 1);
+}
+
+int64_t test_vrshrd_n_s64(int64_t a) {
+// CHECK-LABEL: test_vrshrd_n_s64
+// CHECK: srshr {{d[0-9]+}}, {{d[0-9]+}}, #63
+  return (int64_t)vrshrd_n_s64(a, 63);
+}
+
+int64x1_t test_vrshr_n_s64(int64x1_t a) {
+// CHECK: test_vrshr_n_s64
+// CHECK: srshr d{{[0-9]+}}, d{{[0-9]+}}, #1
+  return vrshr_n_s64(a, 1);
+}
+
+uint64_t test_vrshrd_n_u64(uint64_t a) {
+// CHECK-LABEL: test_vrshrd_n_u64
+// CHECK: urshr {{d[0-9]+}}, {{d[0-9]+}}, #63
+  return (uint64_t)vrshrd_n_u64(a, 63);
+}
+
+uint64x1_t test_vrshr_n_u64(uint64x1_t a) {
+// CHECK: test_vrshr_n_u64
+// CHECK: urshr d{{[0-9]+}}, d{{[0-9]+}}, #1
+  return vrshr_n_u64(a, 1);
+}
+
+int64_t test_vsrad_n_s64(int64_t a, int64_t b) {
+// CHECK-LABEL: test_vsrad_n_s64
+// CHECK: ssra {{d[0-9]+}}, {{d[0-9]+}}, #63
+  return (int64_t)vsrad_n_s64(a, b, 63);
+}
+
+int64x1_t test_vsra_n_s64(int64x1_t a, int64x1_t b) {
+// CHECK: test_vsra_n_s64
+// CHECK: ssra d{{[0-9]+}}, d{{[0-9]+}}, #1
+  return vsra_n_s64(a, b, 1);
+}
+
+uint64_t test_vsrad_n_u64(uint64_t a, uint64_t b) {
+// CHECK-LABEL: test_vsrad_n_u64
+// CHECK: usra {{d[0-9]+}}, {{d[0-9]+}}, #63
+  return (uint64_t)vsrad_n_u64(a, b, 63);
+}
+
+uint64x1_t test_vsra_n_u64(uint64x1_t a, uint64x1_t b) {
+// CHECK: test_vsra_n_u64
+// CHECK: usra d{{[0-9]+}}, d{{[0-9]+}}, #1
+  return vsra_n_u64(a, b, 1);
+}
+
+int64_t test_vrsrad_n_s64(int64_t a, int64_t b) {
+// CHECK-LABEL: test_vrsrad_n_s64
+// CHECK: srsra {{d[0-9]+}}, {{d[0-9]+}}, #63
+  return (int64_t)vrsrad_n_s64(a, b, 63);
+}
+
+int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) {
+// CHECK: test_vrsra_n_s64
+// CHECK: srsra d{{[0-9]+}}, d{{[0-9]+}}, #1
+  return vrsra_n_s64(a, b, 1);
+}
+
+uint64_t test_vrsrad_n_u64(uint64_t a, uint64_t b) {
+// CHECK-LABEL: test_vrsrad_n_u64
+// CHECK: ursra {{d[0-9]+}}, {{d[0-9]+}}, #63
+  return (uint64_t)vrsrad_n_u64(a, b, 63);
+}
+
+uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) {
+// CHECK: test_vrsra_n_u64
+// CHECK: ursra d{{[0-9]+}}, d{{[0-9]+}}, #1
+  return vrsra_n_u64(a, b, 1);
+}
+
+int64_t test_vshld_n_s64(int64_t a) {
+// CHECK-LABEL: test_vshld_n_s64
+// CHECK: shl {{d[0-9]+}}, {{d[0-9]+}}, #0
+  return (int64_t)vshld_n_s64(a, 0);
+}
+int64x1_t test_vshl_n_s64(int64x1_t a) {
+// CHECK: test_vshl_n_s64
+// CHECK: shl d{{[0-9]+}}, d{{[0-9]+}}, #1
+  return vshl_n_s64(a, 1);
+}
+
+uint64_t test_vshld_n_u64(uint64_t a) {
+// CHECK-LABEL: test_vshld_n_u64
+// CHECK: shl {{d[0-9]+}}, {{d[0-9]+}}, #63
+  return (uint64_t)vshld_n_u64(a, 63);
+}
+
+uint64x1_t test_vshl_n_u64(uint64x1_t a) {
+// CHECK: test_vshl_n_u64
+// CHECK: shl d{{[0-9]+}}, d{{[0-9]+}}, #1
+  return vshl_n_u64(a, 1);
+}
+
+int8_t test_vqshlb_n_s8(int8_t a) {
+// CHECK-LABEL: test_vqshlb_n_s8
+// CHECK: sqshl {{b[0-9]+}}, {{b[0-9]+}}, #7
+  return (int8_t)vqshlb_n_s8(a, 7);
+}
+
+int16_t test_vqshlh_n_s16(int16_t a) {
+// CHECK-LABEL: test_vqshlh_n_s16
+// CHECK: sqshl {{h[0-9]+}}, {{h[0-9]+}}, #15
+  return (int16_t)vqshlh_n_s16(a, 15);
+}
+
+int32_t test_vqshls_n_s32(int32_t a) {
+// CHECK-LABEL: test_vqshls_n_s32
+// CHECK: sqshl {{s[0-9]+}}, {{s[0-9]+}}, #31
+  return (int32_t)vqshls_n_s32(a, 31);
+}
+
+int64_t test_vqshld_n_s64(int64_t a) {
+// CHECK-LABEL: test_vqshld_n_s64
+// CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, #63
+  return (int64_t)vqshld_n_s64(a, 63);
+}
+
+int64x1_t test_vqshl_n_s64(int64x1_t a) {
+// CHECK: test_vqshl_n_s64
+// CHECK: sqshl d{{[0-9]+}}, d{{[0-9]+}}, #1
+  return vqshl_n_s64(a, 1);
+}
+
+uint8_t test_vqshlb_n_u8(uint8_t a) {
+// CHECK-LABEL: test_vqshlb_n_u8
+// CHECK: uqshl {{b[0-9]+}}, {{b[0-9]+}}, #7
+  return (uint8_t)vqshlb_n_u8(a, 7);
+}
+
+uint16_t test_vqshlh_n_u16(uint16_t a) {
+// CHECK-LABEL: test_vqshlh_n_u16
+// CHECK: uqshl {{h[0-9]+}}, {{h[0-9]+}}, #15
+  return (uint16_t)vqshlh_n_u16(a, 15);
+}
+
+uint32_t test_vqshls_n_u32(uint32_t a) {
+// CHECK-LABEL: test_vqshls_n_u32
+// CHECK: uqshl {{s[0-9]+}}, {{s[0-9]+}}, #31
+  return (uint32_t)vqshls_n_u32(a, 31);
+}
+
+uint64_t test_vqshld_n_u64(uint64_t a) {
+// CHECK-LABEL: test_vqshld_n_u64
+// CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, #63
+  return (uint64_t)vqshld_n_u64(a, 63);
+}
+
+uint64x1_t test_vqshl_n_u64(uint64x1_t a) {
+// CHECK: test_vqshl_n_u64
+// CHECK: uqshl d{{[0-9]+}}, d{{[0-9]+}}, #1
+  return vqshl_n_u64(a, 1);
+}
+
+int8_t test_vqshlub_n_s8(int8_t a) {
+// CHECK-LABEL: test_vqshlub_n_s8
+// CHECK: sqshlu {{b[0-9]+}}, {{b[0-9]+}}, #7
+  return (int8_t)vqshlub_n_s8(a, 7);
+}
+
+int16_t test_vqshluh_n_s16(int16_t a) {
+// CHECK-LABEL: test_vqshluh_n_s16
+// CHECK: sqshlu {{h[0-9]+}}, {{h[0-9]+}}, #15
+  return (int16_t)vqshluh_n_s16(a, 15);
+}
+
+int32_t test_vqshlus_n_s32(int32_t a) {
+// CHECK-LABEL: test_vqshlus_n_s32
+// CHECK: sqshlu {{s[0-9]+}}, {{s[0-9]+}}, #31
+  return (int32_t)vqshlus_n_s32(a, 31);
+}
+
+int64_t test_vqshlud_n_s64(int64_t a) {
+// CHECK-LABEL: test_vqshlud_n_s64
+// CHECK: sqshlu {{d[0-9]+}}, {{d[0-9]+}}, #63
+  return (int64_t)vqshlud_n_s64(a, 63);
+}
+
+uint64x1_t test_vqshlu_n_s64(int64x1_t a) {
+// CHECK: test_vqshlu_n_s64
+// CHECK: sqshlu d{{[0-9]+}}, d{{[0-9]+}}, #1
+  return vqshlu_n_s64(a, 1);
+}
+
+int64_t test_vsrid_n_s64(int64_t a, int64_t b) {
+// CHECK-LABEL: test_vsrid_n_s64
+// CHECK: sri {{d[0-9]+}}, {{d[0-9]+}}, #63
+  return (int64_t)vsrid_n_s64(a, b, 63);
+}
+
+int64x1_t test_vsri_n_s64(int64x1_t a, int64x1_t b) {
+// CHECK: test_vsri_n_s64
+// CHECK: sri d{{[0-9]+}}, d{{[0-9]+}}, #1
+  return vsri_n_s64(a, b, 1);
+}
+
+uint64_t test_vsrid_n_u64(uint64_t a, uint64_t b) {
+// CHECK-LABEL: test_vsrid_n_u64
+// CHECK: sri {{d[0-9]+}}, {{d[0-9]+}}, #63
+  return (uint64_t)vsrid_n_u64(a, b, 63);
+}
+
+uint64x1_t test_vsri_n_u64(uint64x1_t a, uint64x1_t b) {
+// CHECK: test_vsri_n_u64
+// CHECK: sri d{{[0-9]+}}, d{{[0-9]+}}, #1
+  return vsri_n_u64(a, b, 1);
+}
+
+int64_t test_vslid_n_s64(int64_t a, int64_t b) {
+// CHECK-LABEL: test_vslid_n_s64
+// CHECK: sli {{d[0-9]+}}, {{d[0-9]+}}, #63
+  return (int64_t)vslid_n_s64(a, b, 63);
+}
+
+int64x1_t test_vsli_n_s64(int64x1_t a, int64x1_t b) {
+// CHECK: test_vsli_n_s64
+// CHECK: sli d{{[0-9]+}}, d{{[0-9]+}}, #1
+  return vsli_n_s64(a, b, 1);
+}
+
+uint64_t test_vslid_n_u64(uint64_t a, uint64_t b) {
+// CHECK-LABEL: test_vslid_n_u64
+// CHECK: sli {{d[0-9]+}}, {{d[0-9]+}}, #63
+  return (uint64_t)vslid_n_u64(a, b, 63);
+}
+
+uint64x1_t test_vsli_n_u64(uint64x1_t a, uint64x1_t b) {
+// CHECK: test_vsli_n_u64
+// CHECK: sli d{{[0-9]+}}, d{{[0-9]+}}, #1
+  return vsli_n_u64(a, b, 1);
+}
+
+int8_t test_vqshrnh_n_s16(int16_t a) {
+// CHECK-LABEL: test_vqshrnh_n_s16
+// CHECK: sqshrn {{b[0-9]+}}, {{h[0-9]+}}, #8
+  return (int8_t)vqshrnh_n_s16(a, 8);
+}
+
+int16_t test_vqshrns_n_s32(int32_t a) {
+// CHECK-LABEL: test_vqshrns_n_s32
+// CHECK: sqshrn {{h[0-9]+}}, {{s[0-9]+}}, #16
+  return (int16_t)vqshrns_n_s32(a, 16);
+}
+
+int32_t test_vqshrnd_n_s64(int64_t a) {
+// CHECK-LABEL: test_vqshrnd_n_s64
+// CHECK: sqshrn {{s[0-9]+}}, {{d[0-9]+}}, #32
+  return (int32_t)vqshrnd_n_s64(a, 32);
+}
+
+uint8_t test_vqshrnh_n_u16(uint16_t a) {
+// CHECK-LABEL: test_vqshrnh_n_u16
+// CHECK: uqshrn {{b[0-9]+}}, {{h[0-9]+}}, #8
+  return (uint8_t)vqshrnh_n_u16(a, 8);
+}
+
+uint16_t test_vqshrns_n_u32(uint32_t a) {
+// CHECK-LABEL: test_vqshrns_n_u32
+// CHECK: uqshrn {{h[0-9]+}}, {{s[0-9]+}}, #16
+  return (uint16_t)vqshrns_n_u32(a, 16);
+}
+
+uint32_t test_vqshrnd_n_u64(uint64_t a) {
+// CHECK-LABEL: test_vqshrnd_n_u64
+// CHECK: uqshrn {{s[0-9]+}}, {{d[0-9]+}}, #32
+  return (uint32_t)vqshrnd_n_u64(a, 32);
+}
+
+int8_t test_vqrshrnh_n_s16(int16_t a) {
+// CHECK-LABEL: test_vqrshrnh_n_s16
+// CHECK: sqrshrn {{b[0-9]+}}, {{h[0-9]+}}, #8
+  return (int8_t)vqrshrnh_n_s16(a, 8);
+}
+
+int16_t test_vqrshrns_n_s32(int32_t a) {
+// CHECK-LABEL: test_vqrshrns_n_s32
+// CHECK: sqrshrn {{h[0-9]+}}, {{s[0-9]+}}, #16
+  return (int16_t)vqrshrns_n_s32(a, 16);
+}
+
+int32_t test_vqrshrnd_n_s64(int64_t a) {
+// CHECK-LABEL: test_vqrshrnd_n_s64
+// CHECK: sqrshrn {{s[0-9]+}}, {{d[0-9]+}}, #32
+  return (int32_t)vqrshrnd_n_s64(a, 32);
+}
+
+uint8_t test_vqrshrnh_n_u16(uint16_t a) {
+// CHECK-LABEL: test_vqrshrnh_n_u16
+// CHECK: uqrshrn {{b[0-9]+}}, {{h[0-9]+}}, #8
+  return (uint8_t)vqrshrnh_n_u16(a, 8);
+}
+
+uint16_t test_vqrshrns_n_u32(uint32_t a) {
+// CHECK-LABEL: test_vqrshrns_n_u32
+// CHECK: uqrshrn {{h[0-9]+}}, {{s[0-9]+}}, #16
+  return (uint16_t)vqrshrns_n_u32(a, 16);
+}
+
+uint32_t test_vqrshrnd_n_u64(uint64_t a) {
+// CHECK-LABEL: test_vqrshrnd_n_u64
+// CHECK: uqrshrn {{s[0-9]+}}, {{d[0-9]+}}, #32
+  return (uint32_t)vqrshrnd_n_u64(a, 32);
+}
+
+int8_t test_vqshrunh_n_s16(int16_t a) {
+// CHECK-LABEL: test_vqshrunh_n_s16
+// CHECK: sqshrun {{b[0-9]+}}, {{h[0-9]+}}, #8
+  return (int8_t)vqshrunh_n_s16(a, 8);
+}
+
+int16_t test_vqshruns_n_s32(int32_t a) {
+// CHECK-LABEL: test_vqshruns_n_s32
+// CHECK: sqshrun {{h[0-9]+}}, {{s[0-9]+}}, #16
+  return (int16_t)vqshruns_n_s32(a, 16);
+}
+
+int32_t test_vqshrund_n_s64(int64_t a) {
+// CHECK-LABEL: test_vqshrund_n_s64
+// CHECK: sqshrun {{s[0-9]+}}, {{d[0-9]+}}, #32
+  return (int32_t)vqshrund_n_s64(a, 32);
+}
+
+int8_t test_vqrshrunh_n_s16(int16_t a) {
+// CHECK-LABEL: test_vqrshrunh_n_s16
+// CHECK: sqrshrun {{b[0-9]+}}, {{h[0-9]+}}, #8
+  return (int8_t)vqrshrunh_n_s16(a, 8);
+}
+
+int16_t test_vqrshruns_n_s32(int32_t a) {
+// CHECK-LABEL: test_vqrshruns_n_s32
+// CHECK: sqrshrun {{h[0-9]+}}, {{s[0-9]+}}, #16
+  return (int16_t)vqrshruns_n_s32(a, 16);
+}
+
+int32_t test_vqrshrund_n_s64(int64_t a) {
+// CHECK-LABEL: test_vqrshrund_n_s64
+// CHECK: sqrshrun {{s[0-9]+}}, {{d[0-9]+}}, #32
+  return (int32_t)vqrshrund_n_s64(a, 32);
+}
+
+float32_t test_vcvts_n_f32_s32(int32_t a) {
+// CHECK: test_vcvts_n_f32_s32
+// CHECK: scvtf {{s[0-9]+}}, {{s[0-9]+}}, #1
+  return vcvts_n_f32_s32(a, 1);
+}
+
+float64_t test_vcvtd_n_f64_s64(int64_t a) {
+// CHECK: test_vcvtd_n_f64_s64
+// CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}}, #1
+  return vcvtd_n_f64_s64(a, 1);
+}
+
+float32_t test_vcvts_n_f32_u32(uint32_t a) {
+// CHECK: test_vcvts_n_f32_u32
+// CHECK: ucvtf {{s[0-9]+}}, {{s[0-9]+}}, #32
+  return vcvts_n_f32_u32(a, 32);
+}
+
+float64_t test_vcvtd_n_f64_u64(uint64_t a) {
+// CHECK: test_vcvtd_n_f64_u64
+// CHECK: ucvtf {{d[0-9]+}}, {{d[0-9]+}}, #64
+  return vcvtd_n_f64_u64(a, 64);
+}
+
+int32_t test_vcvts_n_s32_f32(float32_t a) {
+// CHECK: test_vcvts_n_s32_f32
+// CHECK: fcvtzs {{s[0-9]+}}, {{s[0-9]+}}, #1
+  return (int32_t)vcvts_n_s32_f32(a, 1);
+}
+
+int64_t test_vcvtd_n_s64_f64(float64_t a) {
+// CHECK: test_vcvtd_n_s64_f64
+// CHECK: fcvtzs {{d[0-9]+}}, {{d[0-9]+}}, #1
+  return (int64_t)vcvtd_n_s64_f64(a, 1);
+}
+
+uint32_t test_vcvts_n_u32_f32(float32_t a) {
+// CHECK: test_vcvts_n_u32_f32
+// CHECK: fcvtzu {{s[0-9]+}}, {{s[0-9]+}}, #32
+  return (uint32_t)vcvts_n_u32_f32(a, 32);
+}
+
+uint64_t test_vcvtd_n_u64_f64(float64_t a) {
+// CHECK: test_vcvtd_n_u64_f64
+// CHECK: fcvtzu {{d[0-9]+}}, {{d[0-9]+}}, #64
+  return (uint64_t)vcvtd_n_u64_f64(a, 64);
+}
+
+// CHECK-LABEL: test_vreinterpret_s8_s16
+// CHECK-NEXT: ret
+int8x8_t test_vreinterpret_s8_s16(int16x4_t a) {
+  return vreinterpret_s8_s16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s8_s32
+// CHECK-NEXT: ret
+int8x8_t test_vreinterpret_s8_s32(int32x2_t a) {
+  return vreinterpret_s8_s32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s8_s64
+// CHECK-NEXT: ret
+int8x8_t test_vreinterpret_s8_s64(int64x1_t a) {
+  return vreinterpret_s8_s64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s8_u8
+// CHECK-NEXT: ret
+int8x8_t test_vreinterpret_s8_u8(uint8x8_t a) {
+  return vreinterpret_s8_u8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s8_u16
+// CHECK-NEXT: ret
+int8x8_t test_vreinterpret_s8_u16(uint16x4_t a) {
+  return vreinterpret_s8_u16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s8_u32
+// CHECK-NEXT: ret
+int8x8_t test_vreinterpret_s8_u32(uint32x2_t a) {
+  return vreinterpret_s8_u32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s8_u64
+// CHECK-NEXT: ret
+int8x8_t test_vreinterpret_s8_u64(uint64x1_t a) {
+  return vreinterpret_s8_u64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s8_f16
+// CHECK-NEXT: ret
+int8x8_t test_vreinterpret_s8_f16(float16x4_t a) {
+  return vreinterpret_s8_f16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s8_f32
+// CHECK-NEXT: ret
+int8x8_t test_vreinterpret_s8_f32(float32x2_t a) {
+  return vreinterpret_s8_f32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s8_f64
+// CHECK-NEXT: ret
+int8x8_t test_vreinterpret_s8_f64(float64x1_t a) {
+  return vreinterpret_s8_f64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s8_p8
+// CHECK-NEXT: ret
+int8x8_t test_vreinterpret_s8_p8(poly8x8_t a) {
+  return vreinterpret_s8_p8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s8_p16
+// CHECK-NEXT: ret
+int8x8_t test_vreinterpret_s8_p16(poly16x4_t a) {
+  return vreinterpret_s8_p16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s8_p64
+// CHECK-NEXT: ret
+int8x8_t test_vreinterpret_s8_p64(poly64x1_t a) {
+  return vreinterpret_s8_p64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s16_s8
+// CHECK-NEXT: ret
+int16x4_t test_vreinterpret_s16_s8(int8x8_t a) {
+  return vreinterpret_s16_s8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s16_s32
+// CHECK-NEXT: ret
+int16x4_t test_vreinterpret_s16_s32(int32x2_t a) {
+  return vreinterpret_s16_s32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s16_s64
+// CHECK-NEXT: ret
+int16x4_t test_vreinterpret_s16_s64(int64x1_t a) {
+  return vreinterpret_s16_s64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s16_u8
+// CHECK-NEXT: ret
+int16x4_t test_vreinterpret_s16_u8(uint8x8_t a) {
+  return vreinterpret_s16_u8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s16_u16
+// CHECK-NEXT: ret
+int16x4_t test_vreinterpret_s16_u16(uint16x4_t a) {
+  return vreinterpret_s16_u16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s16_u32
+// CHECK-NEXT: ret
+int16x4_t test_vreinterpret_s16_u32(uint32x2_t a) {
+  return vreinterpret_s16_u32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s16_u64
+// CHECK-NEXT: ret
+int16x4_t test_vreinterpret_s16_u64(uint64x1_t a) {
+  return vreinterpret_s16_u64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s16_f16
+// CHECK-NEXT: ret
+int16x4_t test_vreinterpret_s16_f16(float16x4_t a) {
+  return vreinterpret_s16_f16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s16_f32
+// CHECK-NEXT: ret
+int16x4_t test_vreinterpret_s16_f32(float32x2_t a) {
+  return vreinterpret_s16_f32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s16_f64
+// CHECK-NEXT: ret
+int16x4_t test_vreinterpret_s16_f64(float64x1_t a) {
+  return vreinterpret_s16_f64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s16_p8
+// CHECK-NEXT: ret
+int16x4_t test_vreinterpret_s16_p8(poly8x8_t a) {
+  return vreinterpret_s16_p8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s16_p16
+// CHECK-NEXT: ret
+int16x4_t test_vreinterpret_s16_p16(poly16x4_t a) {
+  return vreinterpret_s16_p16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s16_p64
+// CHECK-NEXT: ret
+int16x4_t test_vreinterpret_s16_p64(poly64x1_t a) {
+  return vreinterpret_s16_p64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s32_s8
+// CHECK-NEXT: ret
+int32x2_t test_vreinterpret_s32_s8(int8x8_t a) {
+  return vreinterpret_s32_s8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s32_s16
+// CHECK-NEXT: ret
+int32x2_t test_vreinterpret_s32_s16(int16x4_t a) {
+  return vreinterpret_s32_s16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s32_s64
+// CHECK-NEXT: ret
+int32x2_t test_vreinterpret_s32_s64(int64x1_t a) {
+  return vreinterpret_s32_s64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s32_u8
+// CHECK-NEXT: ret
+int32x2_t test_vreinterpret_s32_u8(uint8x8_t a) {
+  return vreinterpret_s32_u8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s32_u16
+// CHECK-NEXT: ret
+int32x2_t test_vreinterpret_s32_u16(uint16x4_t a) {
+  return vreinterpret_s32_u16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s32_u32
+// CHECK-NEXT: ret
+int32x2_t test_vreinterpret_s32_u32(uint32x2_t a) {
+  return vreinterpret_s32_u32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s32_u64
+// CHECK-NEXT: ret
+int32x2_t test_vreinterpret_s32_u64(uint64x1_t a) {
+  return vreinterpret_s32_u64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s32_f16
+// CHECK-NEXT: ret
+int32x2_t test_vreinterpret_s32_f16(float16x4_t a) {
+  return vreinterpret_s32_f16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s32_f32
+// CHECK-NEXT: ret
+int32x2_t test_vreinterpret_s32_f32(float32x2_t a) {
+  return vreinterpret_s32_f32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s32_f64
+// CHECK-NEXT: ret
+int32x2_t test_vreinterpret_s32_f64(float64x1_t a) {
+  return vreinterpret_s32_f64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s32_p8
+// CHECK-NEXT: ret
+int32x2_t test_vreinterpret_s32_p8(poly8x8_t a) {
+  return vreinterpret_s32_p8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s32_p16
+// CHECK-NEXT: ret
+int32x2_t test_vreinterpret_s32_p16(poly16x4_t a) {
+  return vreinterpret_s32_p16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s32_p64
+// CHECK-NEXT: ret
+int32x2_t test_vreinterpret_s32_p64(poly64x1_t a) {
+  return vreinterpret_s32_p64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s64_s8
+// CHECK-NEXT: ret
+int64x1_t test_vreinterpret_s64_s8(int8x8_t a) {
+  return vreinterpret_s64_s8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s64_s16
+// CHECK-NEXT: ret
+int64x1_t test_vreinterpret_s64_s16(int16x4_t a) {
+  return vreinterpret_s64_s16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s64_s32
+// CHECK-NEXT: ret
+int64x1_t test_vreinterpret_s64_s32(int32x2_t a) {
+  return vreinterpret_s64_s32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s64_u8
+// CHECK-NEXT: ret
+int64x1_t test_vreinterpret_s64_u8(uint8x8_t a) {
+  return vreinterpret_s64_u8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s64_u16
+// CHECK-NEXT: ret
+int64x1_t test_vreinterpret_s64_u16(uint16x4_t a) {
+  return vreinterpret_s64_u16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s64_u32
+// CHECK-NEXT: ret
+int64x1_t test_vreinterpret_s64_u32(uint32x2_t a) {
+  return vreinterpret_s64_u32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s64_u64
+// CHECK-NEXT: ret
+int64x1_t test_vreinterpret_s64_u64(uint64x1_t a) {
+  return vreinterpret_s64_u64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s64_f16
+// CHECK-NEXT: ret
+int64x1_t test_vreinterpret_s64_f16(float16x4_t a) {
+  return vreinterpret_s64_f16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s64_f32
+// CHECK-NEXT: ret
+int64x1_t test_vreinterpret_s64_f32(float32x2_t a) {
+  return vreinterpret_s64_f32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s64_f64
+// CHECK-NEXT: ret
+int64x1_t test_vreinterpret_s64_f64(float64x1_t a) {
+  return vreinterpret_s64_f64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s64_p8
+// CHECK-NEXT: ret
+int64x1_t test_vreinterpret_s64_p8(poly8x8_t a) {
+  return vreinterpret_s64_p8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s64_p16
+// CHECK-NEXT: ret
+int64x1_t test_vreinterpret_s64_p16(poly16x4_t a) {
+  return vreinterpret_s64_p16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_s64_p64
+// CHECK-NEXT: ret
+int64x1_t test_vreinterpret_s64_p64(poly64x1_t a) {
+  return vreinterpret_s64_p64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u8_s8
+// CHECK-NEXT: ret
+uint8x8_t test_vreinterpret_u8_s8(int8x8_t a) {
+  return vreinterpret_u8_s8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u8_s16
+// CHECK-NEXT: ret
+uint8x8_t test_vreinterpret_u8_s16(int16x4_t a) {
+  return vreinterpret_u8_s16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u8_s32
+// CHECK-NEXT: ret
+uint8x8_t test_vreinterpret_u8_s32(int32x2_t a) {
+  return vreinterpret_u8_s32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u8_s64
+// CHECK-NEXT: ret
+uint8x8_t test_vreinterpret_u8_s64(int64x1_t a) {
+  return vreinterpret_u8_s64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u8_u16
+// CHECK-NEXT: ret
+uint8x8_t test_vreinterpret_u8_u16(uint16x4_t a) {
+  return vreinterpret_u8_u16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u8_u32
+// CHECK-NEXT: ret
+uint8x8_t test_vreinterpret_u8_u32(uint32x2_t a) {
+  return vreinterpret_u8_u32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u8_u64
+// CHECK-NEXT: ret
+uint8x8_t test_vreinterpret_u8_u64(uint64x1_t a) {
+  return vreinterpret_u8_u64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u8_f16
+// CHECK-NEXT: ret
+uint8x8_t test_vreinterpret_u8_f16(float16x4_t a) {
+  return vreinterpret_u8_f16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u8_f32
+// CHECK-NEXT: ret
+uint8x8_t test_vreinterpret_u8_f32(float32x2_t a) {
+  return vreinterpret_u8_f32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u8_f64
+// CHECK-NEXT: ret
+uint8x8_t test_vreinterpret_u8_f64(float64x1_t a) {
+  return vreinterpret_u8_f64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u8_p8
+// CHECK-NEXT: ret
+uint8x8_t test_vreinterpret_u8_p8(poly8x8_t a) {
+  return vreinterpret_u8_p8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u8_p16
+// CHECK-NEXT: ret
+uint8x8_t test_vreinterpret_u8_p16(poly16x4_t a) {
+  return vreinterpret_u8_p16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u8_p64
+// CHECK-NEXT: ret
+uint8x8_t test_vreinterpret_u8_p64(poly64x1_t a) {
+  return vreinterpret_u8_p64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u16_s8
+// CHECK-NEXT: ret
+uint16x4_t test_vreinterpret_u16_s8(int8x8_t a) {
+  return vreinterpret_u16_s8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u16_s16
+// CHECK-NEXT: ret
+uint16x4_t test_vreinterpret_u16_s16(int16x4_t a) {
+  return vreinterpret_u16_s16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u16_s32
+// CHECK-NEXT: ret
+uint16x4_t test_vreinterpret_u16_s32(int32x2_t a) {
+  return vreinterpret_u16_s32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u16_s64
+// CHECK-NEXT: ret
+uint16x4_t test_vreinterpret_u16_s64(int64x1_t a) {
+  return vreinterpret_u16_s64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u16_u8
+// CHECK-NEXT: ret
+uint16x4_t test_vreinterpret_u16_u8(uint8x8_t a) {
+  return vreinterpret_u16_u8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u16_u32
+// CHECK-NEXT: ret
+uint16x4_t test_vreinterpret_u16_u32(uint32x2_t a) {
+  return vreinterpret_u16_u32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u16_u64
+// CHECK-NEXT: ret
+uint16x4_t test_vreinterpret_u16_u64(uint64x1_t a) {
+  return vreinterpret_u16_u64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u16_f16
+// CHECK-NEXT: ret
+uint16x4_t test_vreinterpret_u16_f16(float16x4_t a) {
+  return vreinterpret_u16_f16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u16_f32
+// CHECK-NEXT: ret
+uint16x4_t test_vreinterpret_u16_f32(float32x2_t a) {
+  return vreinterpret_u16_f32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u16_f64
+// CHECK-NEXT: ret
+uint16x4_t test_vreinterpret_u16_f64(float64x1_t a) {
+  return vreinterpret_u16_f64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u16_p8
+// CHECK-NEXT: ret
+uint16x4_t test_vreinterpret_u16_p8(poly8x8_t a) {
+  return vreinterpret_u16_p8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u16_p16
+// CHECK-NEXT: ret
+uint16x4_t test_vreinterpret_u16_p16(poly16x4_t a) {
+  return vreinterpret_u16_p16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u16_p64
+// CHECK-NEXT: ret
+uint16x4_t test_vreinterpret_u16_p64(poly64x1_t a) {
+  return vreinterpret_u16_p64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u32_s8
+// CHECK-NEXT: ret
+uint32x2_t test_vreinterpret_u32_s8(int8x8_t a) {
+  return vreinterpret_u32_s8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u32_s16
+// CHECK-NEXT: ret
+uint32x2_t test_vreinterpret_u32_s16(int16x4_t a) {
+  return vreinterpret_u32_s16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u32_s32
+// CHECK-NEXT: ret
+uint32x2_t test_vreinterpret_u32_s32(int32x2_t a) {
+  return vreinterpret_u32_s32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u32_s64
+// CHECK-NEXT: ret
+uint32x2_t test_vreinterpret_u32_s64(int64x1_t a) {
+  return vreinterpret_u32_s64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u32_u8
+// CHECK-NEXT: ret
+uint32x2_t test_vreinterpret_u32_u8(uint8x8_t a) {
+  return vreinterpret_u32_u8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u32_u16
+// CHECK-NEXT: ret
+uint32x2_t test_vreinterpret_u32_u16(uint16x4_t a) {
+  return vreinterpret_u32_u16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u32_u64
+// CHECK-NEXT: ret
+uint32x2_t test_vreinterpret_u32_u64(uint64x1_t a) {
+  return vreinterpret_u32_u64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u32_f16
+// CHECK-NEXT: ret
+uint32x2_t test_vreinterpret_u32_f16(float16x4_t a) {
+  return vreinterpret_u32_f16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u32_f32
+// CHECK-NEXT: ret
+uint32x2_t test_vreinterpret_u32_f32(float32x2_t a) {
+  return vreinterpret_u32_f32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u32_f64
+// CHECK-NEXT: ret
+uint32x2_t test_vreinterpret_u32_f64(float64x1_t a) {
+  return vreinterpret_u32_f64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u32_p8
+// CHECK-NEXT: ret
+uint32x2_t test_vreinterpret_u32_p8(poly8x8_t a) {
+  return vreinterpret_u32_p8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u32_p16
+// CHECK-NEXT: ret
+uint32x2_t test_vreinterpret_u32_p16(poly16x4_t a) {
+  return vreinterpret_u32_p16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u32_p64
+// CHECK-NEXT: ret
+uint32x2_t test_vreinterpret_u32_p64(poly64x1_t a) {
+  return vreinterpret_u32_p64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u64_s8
+// CHECK-NEXT: ret
+uint64x1_t test_vreinterpret_u64_s8(int8x8_t a) {
+  return vreinterpret_u64_s8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u64_s16
+// CHECK-NEXT: ret
+uint64x1_t test_vreinterpret_u64_s16(int16x4_t a) {
+  return vreinterpret_u64_s16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u64_s32
+// CHECK-NEXT: ret
+uint64x1_t test_vreinterpret_u64_s32(int32x2_t a) {
+  return vreinterpret_u64_s32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u64_s64
+// CHECK-NEXT: ret
+uint64x1_t test_vreinterpret_u64_s64(int64x1_t a) {
+  return vreinterpret_u64_s64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u64_u8
+// CHECK-NEXT: ret
+uint64x1_t test_vreinterpret_u64_u8(uint8x8_t a) {
+  return vreinterpret_u64_u8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u64_u16
+// CHECK-NEXT: ret
+uint64x1_t test_vreinterpret_u64_u16(uint16x4_t a) {
+  return vreinterpret_u64_u16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u64_u32
+// CHECK-NEXT: ret
+uint64x1_t test_vreinterpret_u64_u32(uint32x2_t a) {
+  return vreinterpret_u64_u32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u64_f16
+// CHECK-NEXT: ret
+uint64x1_t test_vreinterpret_u64_f16(float16x4_t a) {
+  return vreinterpret_u64_f16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u64_f32
+// CHECK-NEXT: ret
+uint64x1_t test_vreinterpret_u64_f32(float32x2_t a) {
+  return vreinterpret_u64_f32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u64_f64
+// CHECK-NEXT: ret
+uint64x1_t test_vreinterpret_u64_f64(float64x1_t a) {
+  return vreinterpret_u64_f64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u64_p8
+// CHECK-NEXT: ret
+uint64x1_t test_vreinterpret_u64_p8(poly8x8_t a) {
+  return vreinterpret_u64_p8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u64_p16
+// CHECK-NEXT: ret
+uint64x1_t test_vreinterpret_u64_p16(poly16x4_t a) {
+  return vreinterpret_u64_p16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_u64_p64
+// CHECK-NEXT: ret
+uint64x1_t test_vreinterpret_u64_p64(poly64x1_t a) {
+  return vreinterpret_u64_p64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f16_s8
+// CHECK-NEXT: ret
+float16x4_t test_vreinterpret_f16_s8(int8x8_t a) {
+  return vreinterpret_f16_s8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f16_s16
+// CHECK-NEXT: ret
+float16x4_t test_vreinterpret_f16_s16(int16x4_t a) {
+  return vreinterpret_f16_s16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f16_s32
+// CHECK-NEXT: ret
+float16x4_t test_vreinterpret_f16_s32(int32x2_t a) {
+  return vreinterpret_f16_s32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f16_s64
+// CHECK-NEXT: ret
+float16x4_t test_vreinterpret_f16_s64(int64x1_t a) {
+  return vreinterpret_f16_s64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f16_u8
+// CHECK-NEXT: ret
+float16x4_t test_vreinterpret_f16_u8(uint8x8_t a) {
+  return vreinterpret_f16_u8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f16_u16
+// CHECK-NEXT: ret
+float16x4_t test_vreinterpret_f16_u16(uint16x4_t a) {
+  return vreinterpret_f16_u16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f16_u32
+// CHECK-NEXT: ret
+float16x4_t test_vreinterpret_f16_u32(uint32x2_t a) {
+  return vreinterpret_f16_u32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f16_u64
+// CHECK-NEXT: ret
+float16x4_t test_vreinterpret_f16_u64(uint64x1_t a) {
+  return vreinterpret_f16_u64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f16_f32
+// CHECK-NEXT: ret
+float16x4_t test_vreinterpret_f16_f32(float32x2_t a) {
+  return vreinterpret_f16_f32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f16_f64
+// CHECK-NEXT: ret
+float16x4_t test_vreinterpret_f16_f64(float64x1_t a) {
+  return vreinterpret_f16_f64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f16_p8
+// CHECK-NEXT: ret
+float16x4_t test_vreinterpret_f16_p8(poly8x8_t a) {
+  return vreinterpret_f16_p8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f16_p16
+// CHECK-NEXT: ret
+float16x4_t test_vreinterpret_f16_p16(poly16x4_t a) {
+  return vreinterpret_f16_p16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f16_p64
+// CHECK-NEXT: ret
+float16x4_t test_vreinterpret_f16_p64(poly64x1_t a) {
+  return vreinterpret_f16_p64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f32_s8
+// CHECK-NEXT: ret
+float32x2_t test_vreinterpret_f32_s8(int8x8_t a) {
+  return vreinterpret_f32_s8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f32_s16
+// CHECK-NEXT: ret
+float32x2_t test_vreinterpret_f32_s16(int16x4_t a) {
+  return vreinterpret_f32_s16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f32_s32
+// CHECK-NEXT: ret
+float32x2_t test_vreinterpret_f32_s32(int32x2_t a) {
+  return vreinterpret_f32_s32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f32_s64
+// CHECK-NEXT: ret
+float32x2_t test_vreinterpret_f32_s64(int64x1_t a) {
+  return vreinterpret_f32_s64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f32_u8
+// CHECK-NEXT: ret
+float32x2_t test_vreinterpret_f32_u8(uint8x8_t a) {
+  return vreinterpret_f32_u8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f32_u16
+// CHECK-NEXT: ret
+float32x2_t test_vreinterpret_f32_u16(uint16x4_t a) {
+  return vreinterpret_f32_u16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f32_u32
+// CHECK-NEXT: ret
+float32x2_t test_vreinterpret_f32_u32(uint32x2_t a) {
+  return vreinterpret_f32_u32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f32_u64
+// CHECK-NEXT: ret
+float32x2_t test_vreinterpret_f32_u64(uint64x1_t a) {
+  return vreinterpret_f32_u64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f32_f16
+// CHECK-NEXT: ret
+float32x2_t test_vreinterpret_f32_f16(float16x4_t a) {
+  return vreinterpret_f32_f16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f32_f64
+// CHECK-NEXT: ret
+float32x2_t test_vreinterpret_f32_f64(float64x1_t a) {
+  return vreinterpret_f32_f64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f32_p8
+// CHECK-NEXT: ret
+float32x2_t test_vreinterpret_f32_p8(poly8x8_t a) {
+  return vreinterpret_f32_p8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f32_p16
+// CHECK-NEXT: ret
+float32x2_t test_vreinterpret_f32_p16(poly16x4_t a) {
+  return vreinterpret_f32_p16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f32_p64
+// CHECK-NEXT: ret
+float32x2_t test_vreinterpret_f32_p64(poly64x1_t a) {
+  return vreinterpret_f32_p64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f64_s8
+// CHECK-NEXT: ret
+float64x1_t test_vreinterpret_f64_s8(int8x8_t a) {
+  return vreinterpret_f64_s8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f64_s16
+// CHECK-NEXT: ret
+float64x1_t test_vreinterpret_f64_s16(int16x4_t a) {
+  return vreinterpret_f64_s16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f64_s32
+// CHECK-NEXT: ret
+float64x1_t test_vreinterpret_f64_s32(int32x2_t a) {
+  return vreinterpret_f64_s32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f64_s64
+// CHECK-NEXT: ret
+float64x1_t test_vreinterpret_f64_s64(int64x1_t a) {
+  return vreinterpret_f64_s64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f64_u8
+// CHECK-NEXT: ret
+float64x1_t test_vreinterpret_f64_u8(uint8x8_t a) {
+  return vreinterpret_f64_u8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f64_u16
+// CHECK-NEXT: ret
+float64x1_t test_vreinterpret_f64_u16(uint16x4_t a) {
+  return vreinterpret_f64_u16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f64_u32
+// CHECK-NEXT: ret
+float64x1_t test_vreinterpret_f64_u32(uint32x2_t a) {
+  return vreinterpret_f64_u32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f64_u64
+// CHECK-NEXT: ret
+float64x1_t test_vreinterpret_f64_u64(uint64x1_t a) {
+  return vreinterpret_f64_u64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f64_f16
+// CHECK-NEXT: ret
+float64x1_t test_vreinterpret_f64_f16(float16x4_t a) {
+  return vreinterpret_f64_f16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f64_f32
+// CHECK-NEXT: ret
+float64x1_t test_vreinterpret_f64_f32(float32x2_t a) {
+  return vreinterpret_f64_f32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f64_p8
+// CHECK-NEXT: ret
+float64x1_t test_vreinterpret_f64_p8(poly8x8_t a) {
+  return vreinterpret_f64_p8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f64_p16
+// CHECK-NEXT: ret
+float64x1_t test_vreinterpret_f64_p16(poly16x4_t a) {
+  return vreinterpret_f64_p16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_f64_p64
+// CHECK-NEXT: ret
+float64x1_t test_vreinterpret_f64_p64(poly64x1_t a) {
+  return vreinterpret_f64_p64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p8_s8
+// CHECK-NEXT: ret
+poly8x8_t test_vreinterpret_p8_s8(int8x8_t a) {
+  return vreinterpret_p8_s8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p8_s16
+// CHECK-NEXT: ret
+poly8x8_t test_vreinterpret_p8_s16(int16x4_t a) {
+  return vreinterpret_p8_s16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p8_s32
+// CHECK-NEXT: ret
+poly8x8_t test_vreinterpret_p8_s32(int32x2_t a) {
+  return vreinterpret_p8_s32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p8_s64
+// CHECK-NEXT: ret
+poly8x8_t test_vreinterpret_p8_s64(int64x1_t a) {
+  return vreinterpret_p8_s64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p8_u8
+// CHECK-NEXT: ret
+poly8x8_t test_vreinterpret_p8_u8(uint8x8_t a) {
+  return vreinterpret_p8_u8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p8_u16
+// CHECK-NEXT: ret
+poly8x8_t test_vreinterpret_p8_u16(uint16x4_t a) {
+  return vreinterpret_p8_u16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p8_u32
+// CHECK-NEXT: ret
+poly8x8_t test_vreinterpret_p8_u32(uint32x2_t a) {
+  return vreinterpret_p8_u32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p8_u64
+// CHECK-NEXT: ret
+poly8x8_t test_vreinterpret_p8_u64(uint64x1_t a) {
+  return vreinterpret_p8_u64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p8_f16
+// CHECK-NEXT: ret
+poly8x8_t test_vreinterpret_p8_f16(float16x4_t a) {
+  return vreinterpret_p8_f16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p8_f32
+// CHECK-NEXT: ret
+poly8x8_t test_vreinterpret_p8_f32(float32x2_t a) {
+  return vreinterpret_p8_f32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p8_f64
+// CHECK-NEXT: ret
+poly8x8_t test_vreinterpret_p8_f64(float64x1_t a) {
+  return vreinterpret_p8_f64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p8_p16
+// CHECK-NEXT: ret
+poly8x8_t test_vreinterpret_p8_p16(poly16x4_t a) {
+  return vreinterpret_p8_p16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p8_p64
+// CHECK-NEXT: ret
+poly8x8_t test_vreinterpret_p8_p64(poly64x1_t a) {
+  return vreinterpret_p8_p64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p16_s8
+// CHECK-NEXT: ret
+poly16x4_t test_vreinterpret_p16_s8(int8x8_t a) {
+  return vreinterpret_p16_s8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p16_s16
+// CHECK-NEXT: ret
+poly16x4_t test_vreinterpret_p16_s16(int16x4_t a) {
+  return vreinterpret_p16_s16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p16_s32
+// CHECK-NEXT: ret
+poly16x4_t test_vreinterpret_p16_s32(int32x2_t a) {
+  return vreinterpret_p16_s32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p16_s64
+// CHECK-NEXT: ret
+poly16x4_t test_vreinterpret_p16_s64(int64x1_t a) {
+  return vreinterpret_p16_s64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p16_u8
+// CHECK-NEXT: ret
+poly16x4_t test_vreinterpret_p16_u8(uint8x8_t a) {
+  return vreinterpret_p16_u8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p16_u16
+// CHECK-NEXT: ret
+poly16x4_t test_vreinterpret_p16_u16(uint16x4_t a) {
+  return vreinterpret_p16_u16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p16_u32
+// CHECK-NEXT: ret
+poly16x4_t test_vreinterpret_p16_u32(uint32x2_t a) {
+  return vreinterpret_p16_u32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p16_u64
+// CHECK-NEXT: ret
+poly16x4_t test_vreinterpret_p16_u64(uint64x1_t a) {
+  return vreinterpret_p16_u64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p16_f16
+// CHECK-NEXT: ret
+poly16x4_t test_vreinterpret_p16_f16(float16x4_t a) {
+  return vreinterpret_p16_f16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p16_f32
+// CHECK-NEXT: ret
+poly16x4_t test_vreinterpret_p16_f32(float32x2_t a) {
+  return vreinterpret_p16_f32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p16_f64
+// CHECK-NEXT: ret
+poly16x4_t test_vreinterpret_p16_f64(float64x1_t a) {
+  return vreinterpret_p16_f64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p16_p8
+// CHECK-NEXT: ret
+poly16x4_t test_vreinterpret_p16_p8(poly8x8_t a) {
+  return vreinterpret_p16_p8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p16_p64
+// CHECK-NEXT: ret
+poly16x4_t test_vreinterpret_p16_p64(poly64x1_t a) {
+  return vreinterpret_p16_p64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p64_s8
+// CHECK-NEXT: ret
+poly64x1_t test_vreinterpret_p64_s8(int8x8_t a) {
+  return vreinterpret_p64_s8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p64_s16
+// CHECK-NEXT: ret
+poly64x1_t test_vreinterpret_p64_s16(int16x4_t a) {
+  return vreinterpret_p64_s16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p64_s32
+// CHECK-NEXT: ret
+poly64x1_t test_vreinterpret_p64_s32(int32x2_t a) {
+  return vreinterpret_p64_s32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p64_s64
+// CHECK-NEXT: ret
+poly64x1_t test_vreinterpret_p64_s64(int64x1_t a) {
+  return vreinterpret_p64_s64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p64_u8
+// CHECK-NEXT: ret
+poly64x1_t test_vreinterpret_p64_u8(uint8x8_t a) {
+  return vreinterpret_p64_u8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p64_u16
+// CHECK-NEXT: ret
+poly64x1_t test_vreinterpret_p64_u16(uint16x4_t a) {
+  return vreinterpret_p64_u16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p64_u32
+// CHECK-NEXT: ret
+poly64x1_t test_vreinterpret_p64_u32(uint32x2_t a) {
+  return vreinterpret_p64_u32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p64_u64
+// CHECK-NEXT: ret
+poly64x1_t test_vreinterpret_p64_u64(uint64x1_t a) {
+  return vreinterpret_p64_u64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p64_f16
+// CHECK-NEXT: ret
+poly64x1_t test_vreinterpret_p64_f16(float16x4_t a) {
+  return vreinterpret_p64_f16(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p64_f32
+// CHECK-NEXT: ret
+poly64x1_t test_vreinterpret_p64_f32(float32x2_t a) {
+  return vreinterpret_p64_f32(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p64_f64
+// CHECK-NEXT: ret
+poly64x1_t test_vreinterpret_p64_f64(float64x1_t a) {
+  return vreinterpret_p64_f64(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p64_p8
+// CHECK-NEXT: ret
+poly64x1_t test_vreinterpret_p64_p8(poly8x8_t a) {
+  return vreinterpret_p64_p8(a);
+}
+
+// CHECK-LABEL: test_vreinterpret_p64_p16
+// CHECK-NEXT: ret
+poly64x1_t test_vreinterpret_p64_p16(poly16x4_t a) {
+  return vreinterpret_p64_p16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s8_s16
+// CHECK-NEXT: ret
+int8x16_t test_vreinterpretq_s8_s16(int16x8_t a) {
+  return vreinterpretq_s8_s16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s8_s32
+// CHECK-NEXT: ret
+int8x16_t test_vreinterpretq_s8_s32(int32x4_t a) {
+  return vreinterpretq_s8_s32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s8_s64
+// CHECK-NEXT: ret
+int8x16_t test_vreinterpretq_s8_s64(int64x2_t a) {
+  return vreinterpretq_s8_s64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s8_u8
+// CHECK-NEXT: ret
+int8x16_t test_vreinterpretq_s8_u8(uint8x16_t a) {
+  return vreinterpretq_s8_u8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s8_u16
+// CHECK-NEXT: ret
+int8x16_t test_vreinterpretq_s8_u16(uint16x8_t a) {
+  return vreinterpretq_s8_u16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s8_u32
+// CHECK-NEXT: ret
+int8x16_t test_vreinterpretq_s8_u32(uint32x4_t a) {
+  return vreinterpretq_s8_u32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s8_u64
+// CHECK-NEXT: ret
+int8x16_t test_vreinterpretq_s8_u64(uint64x2_t a) {
+  return vreinterpretq_s8_u64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s8_f16
+// CHECK-NEXT: ret
+int8x16_t test_vreinterpretq_s8_f16(float16x8_t a) {
+  return vreinterpretq_s8_f16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s8_f32
+// CHECK-NEXT: ret
+int8x16_t test_vreinterpretq_s8_f32(float32x4_t a) {
+  return vreinterpretq_s8_f32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s8_f64
+// CHECK-NEXT: ret
+int8x16_t test_vreinterpretq_s8_f64(float64x2_t a) {
+  return vreinterpretq_s8_f64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s8_p8
+// CHECK-NEXT: ret
+int8x16_t test_vreinterpretq_s8_p8(poly8x16_t a) {
+  return vreinterpretq_s8_p8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s8_p16
+// CHECK-NEXT: ret
+int8x16_t test_vreinterpretq_s8_p16(poly16x8_t a) {
+  return vreinterpretq_s8_p16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s8_p64
+// CHECK-NEXT: ret
+int8x16_t test_vreinterpretq_s8_p64(poly64x2_t a) {
+  return vreinterpretq_s8_p64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s16_s8
+// CHECK-NEXT: ret
+int16x8_t test_vreinterpretq_s16_s8(int8x16_t a) {
+  return vreinterpretq_s16_s8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s16_s32
+// CHECK-NEXT: ret
+int16x8_t test_vreinterpretq_s16_s32(int32x4_t a) {
+  return vreinterpretq_s16_s32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s16_s64
+// CHECK-NEXT: ret
+int16x8_t test_vreinterpretq_s16_s64(int64x2_t a) {
+  return vreinterpretq_s16_s64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s16_u8
+// CHECK-NEXT: ret
+int16x8_t test_vreinterpretq_s16_u8(uint8x16_t a) {
+  return vreinterpretq_s16_u8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s16_u16
+// CHECK-NEXT: ret
+int16x8_t test_vreinterpretq_s16_u16(uint16x8_t a) {
+  return vreinterpretq_s16_u16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s16_u32
+// CHECK-NEXT: ret
+int16x8_t test_vreinterpretq_s16_u32(uint32x4_t a) {
+  return vreinterpretq_s16_u32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s16_u64
+// CHECK-NEXT: ret
+int16x8_t test_vreinterpretq_s16_u64(uint64x2_t a) {
+  return vreinterpretq_s16_u64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s16_f16
+// CHECK-NEXT: ret
+int16x8_t test_vreinterpretq_s16_f16(float16x8_t a) {
+  return vreinterpretq_s16_f16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s16_f32
+// CHECK-NEXT: ret
+int16x8_t test_vreinterpretq_s16_f32(float32x4_t a) {
+  return vreinterpretq_s16_f32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s16_f64
+// CHECK-NEXT: ret
+int16x8_t test_vreinterpretq_s16_f64(float64x2_t a) {
+  return vreinterpretq_s16_f64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s16_p8
+// CHECK-NEXT: ret
+int16x8_t test_vreinterpretq_s16_p8(poly8x16_t a) {
+  return vreinterpretq_s16_p8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s16_p16
+// CHECK-NEXT: ret
+int16x8_t test_vreinterpretq_s16_p16(poly16x8_t a) {
+  return vreinterpretq_s16_p16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s16_p64
+// CHECK-NEXT: ret
+int16x8_t test_vreinterpretq_s16_p64(poly64x2_t a) {
+  return vreinterpretq_s16_p64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s32_s8
+// CHECK-NEXT: ret
+int32x4_t test_vreinterpretq_s32_s8(int8x16_t a) {
+  return vreinterpretq_s32_s8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s32_s16
+// CHECK-NEXT: ret
+int32x4_t test_vreinterpretq_s32_s16(int16x8_t a) {
+  return vreinterpretq_s32_s16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s32_s64
+// CHECK-NEXT: ret
+int32x4_t test_vreinterpretq_s32_s64(int64x2_t a) {
+  return vreinterpretq_s32_s64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s32_u8
+// CHECK-NEXT: ret
+int32x4_t test_vreinterpretq_s32_u8(uint8x16_t a) {
+  return vreinterpretq_s32_u8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s32_u16
+// CHECK-NEXT: ret
+int32x4_t test_vreinterpretq_s32_u16(uint16x8_t a) {
+  return vreinterpretq_s32_u16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s32_u32
+// CHECK-NEXT: ret
+int32x4_t test_vreinterpretq_s32_u32(uint32x4_t a) {
+  return vreinterpretq_s32_u32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s32_u64
+// CHECK-NEXT: ret
+int32x4_t test_vreinterpretq_s32_u64(uint64x2_t a) {
+  return vreinterpretq_s32_u64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s32_f16
+// CHECK-NEXT: ret
+int32x4_t test_vreinterpretq_s32_f16(float16x8_t a) {
+  return vreinterpretq_s32_f16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s32_f32
+// CHECK-NEXT: ret
+int32x4_t test_vreinterpretq_s32_f32(float32x4_t a) {
+  return vreinterpretq_s32_f32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s32_f64
+// CHECK-NEXT: ret
+int32x4_t test_vreinterpretq_s32_f64(float64x2_t a) {
+  return vreinterpretq_s32_f64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s32_p8
+// CHECK-NEXT: ret
+int32x4_t test_vreinterpretq_s32_p8(poly8x16_t a) {
+  return vreinterpretq_s32_p8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s32_p16
+// CHECK-NEXT: ret
+int32x4_t test_vreinterpretq_s32_p16(poly16x8_t a) {
+  return vreinterpretq_s32_p16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s32_p64
+// CHECK-NEXT: ret
+int32x4_t test_vreinterpretq_s32_p64(poly64x2_t a) {
+  return vreinterpretq_s32_p64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s64_s8
+// CHECK-NEXT: ret
+int64x2_t test_vreinterpretq_s64_s8(int8x16_t a) {
+  return vreinterpretq_s64_s8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s64_s16
+// CHECK-NEXT: ret
+int64x2_t test_vreinterpretq_s64_s16(int16x8_t a) {
+  return vreinterpretq_s64_s16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s64_s32
+// CHECK-NEXT: ret
+int64x2_t test_vreinterpretq_s64_s32(int32x4_t a) {
+  return vreinterpretq_s64_s32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s64_u8
+// CHECK-NEXT: ret
+int64x2_t test_vreinterpretq_s64_u8(uint8x16_t a) {
+  return vreinterpretq_s64_u8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s64_u16
+// CHECK-NEXT: ret
+int64x2_t test_vreinterpretq_s64_u16(uint16x8_t a) {
+  return vreinterpretq_s64_u16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s64_u32
+// CHECK-NEXT: ret
+int64x2_t test_vreinterpretq_s64_u32(uint32x4_t a) {
+  return vreinterpretq_s64_u32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s64_u64
+// CHECK-NEXT: ret
+int64x2_t test_vreinterpretq_s64_u64(uint64x2_t a) {
+  return vreinterpretq_s64_u64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s64_f16
+// CHECK-NEXT: ret
+int64x2_t test_vreinterpretq_s64_f16(float16x8_t a) {
+  return vreinterpretq_s64_f16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s64_f32
+// CHECK-NEXT: ret
+int64x2_t test_vreinterpretq_s64_f32(float32x4_t a) {
+  return vreinterpretq_s64_f32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s64_f64
+// CHECK-NEXT: ret
+int64x2_t test_vreinterpretq_s64_f64(float64x2_t a) {
+  return vreinterpretq_s64_f64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s64_p8
+// CHECK-NEXT: ret
+int64x2_t test_vreinterpretq_s64_p8(poly8x16_t a) {
+  return vreinterpretq_s64_p8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s64_p16
+// CHECK-NEXT: ret
+int64x2_t test_vreinterpretq_s64_p16(poly16x8_t a) {
+  return vreinterpretq_s64_p16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_s64_p64
+// CHECK-NEXT: ret
+int64x2_t test_vreinterpretq_s64_p64(poly64x2_t a) {
+  return vreinterpretq_s64_p64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u8_s8
+// CHECK-NEXT: ret
+uint8x16_t test_vreinterpretq_u8_s8(int8x16_t a) {
+  return vreinterpretq_u8_s8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u8_s16
+// CHECK-NEXT: ret
+uint8x16_t test_vreinterpretq_u8_s16(int16x8_t a) {
+  return vreinterpretq_u8_s16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u8_s32
+// CHECK-NEXT: ret
+uint8x16_t test_vreinterpretq_u8_s32(int32x4_t a) {
+  return vreinterpretq_u8_s32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u8_s64
+// CHECK-NEXT: ret
+uint8x16_t test_vreinterpretq_u8_s64(int64x2_t a) {
+  return vreinterpretq_u8_s64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u8_u16
+// CHECK-NEXT: ret
+uint8x16_t test_vreinterpretq_u8_u16(uint16x8_t a) {
+  return vreinterpretq_u8_u16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u8_u32
+// CHECK-NEXT: ret
+uint8x16_t test_vreinterpretq_u8_u32(uint32x4_t a) {
+  return vreinterpretq_u8_u32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u8_u64
+// CHECK-NEXT: ret
+uint8x16_t test_vreinterpretq_u8_u64(uint64x2_t a) {
+  return vreinterpretq_u8_u64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u8_f16
+// CHECK-NEXT: ret
+uint8x16_t test_vreinterpretq_u8_f16(float16x8_t a) {
+  return vreinterpretq_u8_f16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u8_f32
+// CHECK-NEXT: ret
+uint8x16_t test_vreinterpretq_u8_f32(float32x4_t a) {
+  return vreinterpretq_u8_f32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u8_f64
+// CHECK-NEXT: ret
+uint8x16_t test_vreinterpretq_u8_f64(float64x2_t a) {
+  return vreinterpretq_u8_f64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u8_p8
+// CHECK-NEXT: ret
+uint8x16_t test_vreinterpretq_u8_p8(poly8x16_t a) {
+  return vreinterpretq_u8_p8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u8_p16
+// CHECK-NEXT: ret
+uint8x16_t test_vreinterpretq_u8_p16(poly16x8_t a) {
+  return vreinterpretq_u8_p16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u8_p64
+// CHECK-NEXT: ret
+uint8x16_t test_vreinterpretq_u8_p64(poly64x2_t a) {
+  return vreinterpretq_u8_p64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u16_s8
+// CHECK-NEXT: ret
+uint16x8_t test_vreinterpretq_u16_s8(int8x16_t a) {
+  return vreinterpretq_u16_s8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u16_s16
+// CHECK-NEXT: ret
+uint16x8_t test_vreinterpretq_u16_s16(int16x8_t a) {
+  return vreinterpretq_u16_s16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u16_s32
+// CHECK-NEXT: ret
+uint16x8_t test_vreinterpretq_u16_s32(int32x4_t a) {
+  return vreinterpretq_u16_s32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u16_s64
+// CHECK-NEXT: ret
+uint16x8_t test_vreinterpretq_u16_s64(int64x2_t a) {
+  return vreinterpretq_u16_s64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u16_u8
+// CHECK-NEXT: ret
+uint16x8_t test_vreinterpretq_u16_u8(uint8x16_t a) {
+  return vreinterpretq_u16_u8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u16_u32
+// CHECK-NEXT: ret
+uint16x8_t test_vreinterpretq_u16_u32(uint32x4_t a) {
+  return vreinterpretq_u16_u32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u16_u64
+// CHECK-NEXT: ret
+uint16x8_t test_vreinterpretq_u16_u64(uint64x2_t a) {
+  return vreinterpretq_u16_u64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u16_f16
+// CHECK-NEXT: ret
+uint16x8_t test_vreinterpretq_u16_f16(float16x8_t a) {
+  return vreinterpretq_u16_f16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u16_f32
+// CHECK-NEXT: ret
+uint16x8_t test_vreinterpretq_u16_f32(float32x4_t a) {
+  return vreinterpretq_u16_f32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u16_f64
+// CHECK-NEXT: ret
+uint16x8_t test_vreinterpretq_u16_f64(float64x2_t a) {
+  return vreinterpretq_u16_f64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u16_p8
+// CHECK-NEXT: ret
+uint16x8_t test_vreinterpretq_u16_p8(poly8x16_t a) {
+  return vreinterpretq_u16_p8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u16_p16
+// CHECK-NEXT: ret
+uint16x8_t test_vreinterpretq_u16_p16(poly16x8_t a) {
+  return vreinterpretq_u16_p16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u16_p64
+// CHECK-NEXT: ret
+uint16x8_t test_vreinterpretq_u16_p64(poly64x2_t a) {
+  return vreinterpretq_u16_p64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u32_s8
+// CHECK-NEXT: ret
+uint32x4_t test_vreinterpretq_u32_s8(int8x16_t a) {
+  return vreinterpretq_u32_s8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u32_s16
+// CHECK-NEXT: ret
+uint32x4_t test_vreinterpretq_u32_s16(int16x8_t a) {
+  return vreinterpretq_u32_s16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u32_s32
+// CHECK-NEXT: ret
+uint32x4_t test_vreinterpretq_u32_s32(int32x4_t a) {
+  return vreinterpretq_u32_s32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u32_s64
+// CHECK-NEXT: ret
+uint32x4_t test_vreinterpretq_u32_s64(int64x2_t a) {
+  return vreinterpretq_u32_s64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u32_u8
+// CHECK-NEXT: ret
+uint32x4_t test_vreinterpretq_u32_u8(uint8x16_t a) {
+  return vreinterpretq_u32_u8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u32_u16
+// CHECK-NEXT: ret
+uint32x4_t test_vreinterpretq_u32_u16(uint16x8_t a) {
+  return vreinterpretq_u32_u16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u32_u64
+// CHECK-NEXT: ret
+uint32x4_t test_vreinterpretq_u32_u64(uint64x2_t a) {
+  return vreinterpretq_u32_u64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u32_f16
+// CHECK-NEXT: ret
+uint32x4_t test_vreinterpretq_u32_f16(float16x8_t a) {
+  return vreinterpretq_u32_f16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u32_f32
+// CHECK-NEXT: ret
+uint32x4_t test_vreinterpretq_u32_f32(float32x4_t a) {
+  return vreinterpretq_u32_f32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u32_f64
+// CHECK-NEXT: ret
+uint32x4_t test_vreinterpretq_u32_f64(float64x2_t a) {
+  return vreinterpretq_u32_f64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u32_p8
+// CHECK-NEXT: ret
+uint32x4_t test_vreinterpretq_u32_p8(poly8x16_t a) {
+  return vreinterpretq_u32_p8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u32_p16
+// CHECK-NEXT: ret
+uint32x4_t test_vreinterpretq_u32_p16(poly16x8_t a) {
+  return vreinterpretq_u32_p16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u32_p64
+// CHECK-NEXT: ret
+uint32x4_t test_vreinterpretq_u32_p64(poly64x2_t a) {
+  return vreinterpretq_u32_p64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u64_s8
+// CHECK-NEXT: ret
+uint64x2_t test_vreinterpretq_u64_s8(int8x16_t a) {
+  return vreinterpretq_u64_s8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u64_s16
+// CHECK-NEXT: ret
+uint64x2_t test_vreinterpretq_u64_s16(int16x8_t a) {
+  return vreinterpretq_u64_s16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u64_s32
+// CHECK-NEXT: ret
+uint64x2_t test_vreinterpretq_u64_s32(int32x4_t a) {
+  return vreinterpretq_u64_s32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u64_s64
+// CHECK-NEXT: ret
+uint64x2_t test_vreinterpretq_u64_s64(int64x2_t a) {
+  return vreinterpretq_u64_s64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u64_u8
+// CHECK-NEXT: ret
+uint64x2_t test_vreinterpretq_u64_u8(uint8x16_t a) {
+  return vreinterpretq_u64_u8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u64_u16
+// CHECK-NEXT: ret
+uint64x2_t test_vreinterpretq_u64_u16(uint16x8_t a) {
+  return vreinterpretq_u64_u16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u64_u32
+// CHECK-NEXT: ret
+uint64x2_t test_vreinterpretq_u64_u32(uint32x4_t a) {
+  return vreinterpretq_u64_u32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u64_f16
+// CHECK-NEXT: ret
+uint64x2_t test_vreinterpretq_u64_f16(float16x8_t a) {
+  return vreinterpretq_u64_f16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u64_f32
+// CHECK-NEXT: ret
+uint64x2_t test_vreinterpretq_u64_f32(float32x4_t a) {
+  return vreinterpretq_u64_f32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u64_f64
+// CHECK-NEXT: ret
+uint64x2_t test_vreinterpretq_u64_f64(float64x2_t a) {
+  return vreinterpretq_u64_f64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u64_p8
+// CHECK-NEXT: ret
+uint64x2_t test_vreinterpretq_u64_p8(poly8x16_t a) {
+  return vreinterpretq_u64_p8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u64_p16
+// CHECK-NEXT: ret
+uint64x2_t test_vreinterpretq_u64_p16(poly16x8_t a) {
+  return vreinterpretq_u64_p16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_u64_p64
+// CHECK-NEXT: ret
+uint64x2_t test_vreinterpretq_u64_p64(poly64x2_t a) {
+  return vreinterpretq_u64_p64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f16_s8
+// CHECK-NEXT: ret
+float16x8_t test_vreinterpretq_f16_s8(int8x16_t a) {
+  return vreinterpretq_f16_s8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f16_s16
+// CHECK-NEXT: ret
+float16x8_t test_vreinterpretq_f16_s16(int16x8_t a) {
+  return vreinterpretq_f16_s16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f16_s32
+// CHECK-NEXT: ret
+float16x8_t test_vreinterpretq_f16_s32(int32x4_t a) {
+  return vreinterpretq_f16_s32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f16_s64
+// CHECK-NEXT: ret
+float16x8_t test_vreinterpretq_f16_s64(int64x2_t a) {
+  return vreinterpretq_f16_s64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f16_u8
+// CHECK-NEXT: ret
+float16x8_t test_vreinterpretq_f16_u8(uint8x16_t a) {
+  return vreinterpretq_f16_u8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f16_u16
+// CHECK-NEXT: ret
+float16x8_t test_vreinterpretq_f16_u16(uint16x8_t a) {
+  return vreinterpretq_f16_u16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f16_u32
+// CHECK-NEXT: ret
+float16x8_t test_vreinterpretq_f16_u32(uint32x4_t a) {
+  return vreinterpretq_f16_u32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f16_u64
+// CHECK-NEXT: ret
+float16x8_t test_vreinterpretq_f16_u64(uint64x2_t a) {
+  return vreinterpretq_f16_u64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f16_f32
+// CHECK-NEXT: ret
+float16x8_t test_vreinterpretq_f16_f32(float32x4_t a) {
+  return vreinterpretq_f16_f32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f16_f64
+// CHECK-NEXT: ret
+float16x8_t test_vreinterpretq_f16_f64(float64x2_t a) {
+  return vreinterpretq_f16_f64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f16_p8
+// CHECK-NEXT: ret
+float16x8_t test_vreinterpretq_f16_p8(poly8x16_t a) {
+  return vreinterpretq_f16_p8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f16_p16
+// CHECK-NEXT: ret
+float16x8_t test_vreinterpretq_f16_p16(poly16x8_t a) {
+  return vreinterpretq_f16_p16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f16_p64
+// CHECK-NEXT: ret
+float16x8_t test_vreinterpretq_f16_p64(poly64x2_t a) {
+  return vreinterpretq_f16_p64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f32_s8
+// CHECK-NEXT: ret
+float32x4_t test_vreinterpretq_f32_s8(int8x16_t a) {
+  return vreinterpretq_f32_s8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f32_s16
+// CHECK-NEXT: ret
+float32x4_t test_vreinterpretq_f32_s16(int16x8_t a) {
+  return vreinterpretq_f32_s16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f32_s32
+// CHECK-NEXT: ret
+float32x4_t test_vreinterpretq_f32_s32(int32x4_t a) {
+  return vreinterpretq_f32_s32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f32_s64
+// CHECK-NEXT: ret
+float32x4_t test_vreinterpretq_f32_s64(int64x2_t a) {
+  return vreinterpretq_f32_s64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f32_u8
+// CHECK-NEXT: ret
+float32x4_t test_vreinterpretq_f32_u8(uint8x16_t a) {
+  return vreinterpretq_f32_u8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f32_u16
+// CHECK-NEXT: ret
+float32x4_t test_vreinterpretq_f32_u16(uint16x8_t a) {
+  return vreinterpretq_f32_u16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f32_u32
+// CHECK-NEXT: ret
+float32x4_t test_vreinterpretq_f32_u32(uint32x4_t a) {
+  return vreinterpretq_f32_u32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f32_u64
+// CHECK-NEXT: ret
+float32x4_t test_vreinterpretq_f32_u64(uint64x2_t a) {
+  return vreinterpretq_f32_u64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f32_f16
+// CHECK-NEXT: ret
+float32x4_t test_vreinterpretq_f32_f16(float16x8_t a) {
+  return vreinterpretq_f32_f16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f32_f64
+// CHECK-NEXT: ret
+float32x4_t test_vreinterpretq_f32_f64(float64x2_t a) {
+  return vreinterpretq_f32_f64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f32_p8
+// CHECK-NEXT: ret
+float32x4_t test_vreinterpretq_f32_p8(poly8x16_t a) {
+  return vreinterpretq_f32_p8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f32_p16
+// CHECK-NEXT: ret
+float32x4_t test_vreinterpretq_f32_p16(poly16x8_t a) {
+  return vreinterpretq_f32_p16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f32_p64
+// CHECK-NEXT: ret
+float32x4_t test_vreinterpretq_f32_p64(poly64x2_t a) {
+  return vreinterpretq_f32_p64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f64_s8
+// CHECK-NEXT: ret
+float64x2_t test_vreinterpretq_f64_s8(int8x16_t a) {
+  return vreinterpretq_f64_s8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f64_s16
+// CHECK-NEXT: ret
+float64x2_t test_vreinterpretq_f64_s16(int16x8_t a) {
+  return vreinterpretq_f64_s16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f64_s32
+// CHECK-NEXT: ret
+float64x2_t test_vreinterpretq_f64_s32(int32x4_t a) {
+  return vreinterpretq_f64_s32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f64_s64
+// CHECK-NEXT: ret
+float64x2_t test_vreinterpretq_f64_s64(int64x2_t a) {
+  return vreinterpretq_f64_s64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f64_u8
+// CHECK-NEXT: ret
+float64x2_t test_vreinterpretq_f64_u8(uint8x16_t a) {
+  return vreinterpretq_f64_u8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f64_u16
+// CHECK-NEXT: ret
+float64x2_t test_vreinterpretq_f64_u16(uint16x8_t a) {
+  return vreinterpretq_f64_u16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f64_u32
+// CHECK-NEXT: ret
+float64x2_t test_vreinterpretq_f64_u32(uint32x4_t a) {
+  return vreinterpretq_f64_u32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f64_u64
+// CHECK-NEXT: ret
+float64x2_t test_vreinterpretq_f64_u64(uint64x2_t a) {
+  return vreinterpretq_f64_u64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f64_f16
+// CHECK-NEXT: ret
+float64x2_t test_vreinterpretq_f64_f16(float16x8_t a) {
+  return vreinterpretq_f64_f16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f64_f32
+// CHECK-NEXT: ret
+float64x2_t test_vreinterpretq_f64_f32(float32x4_t a) {
+  return vreinterpretq_f64_f32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f64_p8
+// CHECK-NEXT: ret
+float64x2_t test_vreinterpretq_f64_p8(poly8x16_t a) {
+  return vreinterpretq_f64_p8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f64_p16
+// CHECK-NEXT: ret
+float64x2_t test_vreinterpretq_f64_p16(poly16x8_t a) {
+  return vreinterpretq_f64_p16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_f64_p64
+// CHECK-NEXT: ret
+float64x2_t test_vreinterpretq_f64_p64(poly64x2_t a) {
+  return vreinterpretq_f64_p64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p8_s8
+// CHECK-NEXT: ret
+poly8x16_t test_vreinterpretq_p8_s8(int8x16_t a) {
+  return vreinterpretq_p8_s8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p8_s16
+// CHECK-NEXT: ret
+poly8x16_t test_vreinterpretq_p8_s16(int16x8_t a) {
+  return vreinterpretq_p8_s16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p8_s32
+// CHECK-NEXT: ret
+poly8x16_t test_vreinterpretq_p8_s32(int32x4_t a) {
+  return vreinterpretq_p8_s32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p8_s64
+// CHECK-NEXT: ret
+poly8x16_t test_vreinterpretq_p8_s64(int64x2_t a) {
+  return vreinterpretq_p8_s64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p8_u8
+// CHECK-NEXT: ret
+poly8x16_t test_vreinterpretq_p8_u8(uint8x16_t a) {
+  return vreinterpretq_p8_u8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p8_u16
+// CHECK-NEXT: ret
+poly8x16_t test_vreinterpretq_p8_u16(uint16x8_t a) {
+  return vreinterpretq_p8_u16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p8_u32
+// CHECK-NEXT: ret
+poly8x16_t test_vreinterpretq_p8_u32(uint32x4_t a) {
+  return vreinterpretq_p8_u32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p8_u64
+// CHECK-NEXT: ret
+poly8x16_t test_vreinterpretq_p8_u64(uint64x2_t a) {
+  return vreinterpretq_p8_u64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p8_f16
+// CHECK-NEXT: ret
+poly8x16_t test_vreinterpretq_p8_f16(float16x8_t a) {
+  return vreinterpretq_p8_f16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p8_f32
+// CHECK-NEXT: ret
+poly8x16_t test_vreinterpretq_p8_f32(float32x4_t a) {
+  return vreinterpretq_p8_f32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p8_f64
+// CHECK-NEXT: ret
+poly8x16_t test_vreinterpretq_p8_f64(float64x2_t a) {
+  return vreinterpretq_p8_f64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p8_p16
+// CHECK-NEXT: ret
+poly8x16_t test_vreinterpretq_p8_p16(poly16x8_t a) {
+  return vreinterpretq_p8_p16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p8_p64
+// CHECK-NEXT: ret
+poly8x16_t test_vreinterpretq_p8_p64(poly64x2_t a) {
+  return vreinterpretq_p8_p64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p16_s8
+// CHECK-NEXT: ret
+poly16x8_t test_vreinterpretq_p16_s8(int8x16_t a) {
+  return vreinterpretq_p16_s8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p16_s16
+// CHECK-NEXT: ret
+poly16x8_t test_vreinterpretq_p16_s16(int16x8_t a) {
+  return vreinterpretq_p16_s16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p16_s32
+// CHECK-NEXT: ret
+poly16x8_t test_vreinterpretq_p16_s32(int32x4_t a) {
+  return vreinterpretq_p16_s32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p16_s64
+// CHECK-NEXT: ret
+poly16x8_t test_vreinterpretq_p16_s64(int64x2_t a) {
+  return vreinterpretq_p16_s64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p16_u8
+// CHECK-NEXT: ret
+poly16x8_t test_vreinterpretq_p16_u8(uint8x16_t a) {
+  return vreinterpretq_p16_u8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p16_u16
+// CHECK-NEXT: ret
+poly16x8_t test_vreinterpretq_p16_u16(uint16x8_t a) {
+  return vreinterpretq_p16_u16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p16_u32
+// CHECK-NEXT: ret
+poly16x8_t test_vreinterpretq_p16_u32(uint32x4_t a) {
+  return vreinterpretq_p16_u32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p16_u64
+// CHECK-NEXT: ret
+poly16x8_t test_vreinterpretq_p16_u64(uint64x2_t a) {
+  return vreinterpretq_p16_u64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p16_f16
+// CHECK-NEXT: ret
+poly16x8_t test_vreinterpretq_p16_f16(float16x8_t a) {
+  return vreinterpretq_p16_f16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p16_f32
+// CHECK-NEXT: ret
+poly16x8_t test_vreinterpretq_p16_f32(float32x4_t a) {
+  return vreinterpretq_p16_f32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p16_f64
+// CHECK-NEXT: ret
+poly16x8_t test_vreinterpretq_p16_f64(float64x2_t a) {
+  return vreinterpretq_p16_f64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p16_p8
+// CHECK-NEXT: ret
+poly16x8_t test_vreinterpretq_p16_p8(poly8x16_t a) {
+  return vreinterpretq_p16_p8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p16_p64
+// CHECK-NEXT: ret
+poly16x8_t test_vreinterpretq_p16_p64(poly64x2_t a) {
+  return vreinterpretq_p16_p64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p64_s8
+// CHECK-NEXT: ret
+poly64x2_t test_vreinterpretq_p64_s8(int8x16_t a) {
+  return vreinterpretq_p64_s8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p64_s16
+// CHECK-NEXT: ret
+poly64x2_t test_vreinterpretq_p64_s16(int16x8_t a) {
+  return vreinterpretq_p64_s16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p64_s32
+// CHECK-NEXT: ret
+poly64x2_t test_vreinterpretq_p64_s32(int32x4_t a) {
+  return vreinterpretq_p64_s32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p64_s64
+// CHECK-NEXT: ret
+poly64x2_t test_vreinterpretq_p64_s64(int64x2_t a) {
+  return vreinterpretq_p64_s64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p64_u8
+// CHECK-NEXT: ret
+poly64x2_t test_vreinterpretq_p64_u8(uint8x16_t a) {
+  return vreinterpretq_p64_u8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p64_u16
+// CHECK-NEXT: ret
+poly64x2_t test_vreinterpretq_p64_u16(uint16x8_t a) {
+  return vreinterpretq_p64_u16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p64_u32
+// CHECK-NEXT: ret
+poly64x2_t test_vreinterpretq_p64_u32(uint32x4_t a) {
+  return vreinterpretq_p64_u32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p64_u64
+// CHECK-NEXT: ret
+poly64x2_t test_vreinterpretq_p64_u64(uint64x2_t a) {
+  return vreinterpretq_p64_u64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p64_f16
+// CHECK-NEXT: ret
+poly64x2_t test_vreinterpretq_p64_f16(float16x8_t a) {
+  return vreinterpretq_p64_f16(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p64_f32
+// CHECK-NEXT: ret
+poly64x2_t test_vreinterpretq_p64_f32(float32x4_t a) {
+  return vreinterpretq_p64_f32(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p64_f64
+// CHECK-NEXT: ret
+poly64x2_t test_vreinterpretq_p64_f64(float64x2_t a) {
+  return vreinterpretq_p64_f64(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p64_p8
+// CHECK-NEXT: ret
+poly64x2_t test_vreinterpretq_p64_p8(poly8x16_t a) {
+  return vreinterpretq_p64_p8(a);
+}
+
+// CHECK-LABEL: test_vreinterpretq_p64_p16
+// CHECK-NEXT: ret
+poly64x2_t test_vreinterpretq_p64_p16(poly16x8_t a) {
+  return vreinterpretq_p64_p16(a);
+}
+
+float32_t test_vabds_f32(float32_t a, float32_t b) {
+// CHECK-LABEL: test_vabds_f32
+// CHECK: fabd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  return vabds_f32(a, b);
+}
+
+float64_t test_vabdd_f64(float64_t a, float64_t b) {
+// CHECK-LABEL: test_vabdd_f64
+// CHECK: fabd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  return vabdd_f64(a, b);
+}
+
+int64x1_t test_vuqadd_s64(int64x1_t a, uint64x1_t b) {
+  // CHECK-LABEL: test_vuqadd_s64
+  return vuqadd_s64(a, b);
+  // CHECK: suqadd d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+uint64x1_t test_vsqadd_u64(uint64x1_t a, int64x1_t b) {
+  // CHECK-LABEL: test_vsqadd_u64
+  return vsqadd_u64(a, b);
+  // CHECK: usqadd d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+int64x1_t test_vabs_s64(int64x1_t a) {
+  // CHECK-LABEL: test_vabs_s64
+  return vabs_s64(a);
+  // CHECK: abs d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+int64x1_t test_vqabs_s64(int64x1_t a) {
+  // CHECK-LABEL: test_vqabs_s64
+  return vqabs_s64(a);
+  // CHECK: sqabs d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+int64x1_t test_vqneg_s64(int64x1_t a) {
+  // CHECK-LABEL: test_vqneg_s64
+  return vqneg_s64(a);
+  // CHECK: sqneg d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+int64x1_t test_vneg_s64(int64x1_t a) {
+  // CHECK-LABEL: test_vneg_s64
+  return vneg_s64(a);
+  // CHECK: neg d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float32_t test_vaddv_f32(float32x2_t a) {
+  // CHECK-LABEL: test_vaddv_f32
+  return vaddv_f32(a);
+  // CHECK: faddp {{s[0-9]+}}, {{v[0-9]+}}.2s
+}
+
+float32_t test_vaddvq_f32(float32x4_t a) {
+  // CHECK-LABEL: test_vaddvq_f32
+  return vaddvq_f32(a);
+  // CHECK: faddp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+  // CHECK: faddp {{s[0-9]+}}, {{v[0-9]+}}.2s
+}
+
+float64_t test_vaddvq_f64(float64x2_t a) {
+  // CHECK-LABEL: test_vaddvq_f64
+  return vaddvq_f64(a);
+  // CHECK: faddp {{d[0-9]+}}, {{v[0-9]+}}.2d
+}
+
+float32_t test_vmaxv_f32(float32x2_t a) {
+  // CHECK-LABEL: test_vmaxv_f32
+  return vmaxv_f32(a);
+  // CHECK: fmaxp {{s[0-9]+}}, {{v[0-9]+}}.2s
+}
+
+float64_t test_vmaxvq_f64(float64x2_t a) {
+  // CHECK-LABEL: test_vmaxvq_f64
+  return vmaxvq_f64(a);
+  // CHECK: fmaxp {{d[0-9]+}}, {{v[0-9]+}}.2d
+}
+
+float32_t test_vminv_f32(float32x2_t a) {
+  // CHECK-LABEL: test_vminv_f32
+  return vminv_f32(a);
+  // CHECK: fminp {{s[0-9]+}}, {{v[0-9]+}}.2s
+}
+
+float64_t test_vminvq_f64(float64x2_t a) {
+  // CHECK-LABEL: test_vminvq_f64
+  return vminvq_f64(a);
+  // CHECK: fminp {{d[0-9]+}}, {{v[0-9]+}}.2d
+}
+
+float64_t test_vmaxnmvq_f64(float64x2_t a) {
+  // CHECK-LABEL: test_vmaxnmvq_f64
+  return vmaxnmvq_f64(a);
+  // CHECK: fmaxnmp {{d[0-9]+}}, {{v[0-9]+}}.2d
+}
+
+float32_t test_vmaxnmv_f32(float32x2_t a) {
+  // CHECK-LABEL: test_vmaxnmv_f32
+  return vmaxnmv_f32(a);
+  // CHECK: fmaxnmp {{s[0-9]+}}, {{v[0-9]+}}.2s
+}
+
+float64_t test_vminnmvq_f64(float64x2_t a) {
+  // CHECK-LABEL: test_vminnmvq_f64
+  return vminnmvq_f64(a);
+  // CHECK: fminnmp {{d[0-9]+}}, {{v[0-9]+}}.2d
+}
+
+float32_t test_vminnmv_f32(float32x2_t a) {
+  // CHECK-LABEL: test_vminnmv_f32
+  return vminnmv_f32(a);
+  // CHECK: fminnmp {{s[0-9]+}}, {{v[0-9]+}}.2s
+}
+
+int64x2_t test_vpaddq_s64(int64x2_t a, int64x2_t b) {
+  // CHECK-LABEL: test_vpaddq_s64
+  return vpaddq_s64(a, b);
+  // CHECK: addp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint64x2_t test_vpaddq_u64(uint64x2_t a, uint64x2_t b) {
+  // CHECK-LABEL: test_vpaddq_u64
+  return vpaddq_u64(a, b);
+  // CHECK: addp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint64_t test_vpaddd_u64(uint64x2_t a) {
+  // CHECK-LABEL: test_vpaddd_u64
+  return vpaddd_u64(a);
+  // CHECK: addp {{d[0-9]+}}, {{v[0-9]+}}.2d
+}
+
+int64_t test_vaddvq_s64(int64x2_t a) {
+  // CHECK-LABEL: test_vaddvq_s64
+  return vaddvq_s64(a);
+  // CHECK: addp {{d[0-9]+}}, {{v[0-9]+}}.2d
+}
+
+uint64_t test_vaddvq_u64(uint64x2_t a) {
+  // CHECK-LABEL: test_vaddvq_u64
+  return vaddvq_u64(a);
+  // CHECK: addp {{d[0-9]+}}, {{v[0-9]+}}.2d
+}
+
+float64x1_t test_vadd_f64(float64x1_t a, float64x1_t b) {
+  // CHECK-LABEL: test_vadd_f64
+  return vadd_f64(a, b);
+  // CHECK: fadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vmul_f64(float64x1_t a, float64x1_t b) {
+  // CHECK-LABEL: test_vmul_f64
+  return vmul_f64(a, b);
+  // CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vdiv_f64(float64x1_t a, float64x1_t b) {
+  // CHECK-LABEL: test_vdiv_f64
+  return vdiv_f64(a, b);
+  // CHECK: fdiv d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vmla_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
+  // CHECK-LABEL: test_vmla_f64
+  return vmla_f64(a, b, c);
+  // CHECK: fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vmls_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
+  // CHECK-LABEL: test_vmls_f64
+  return vmls_f64(a, b, c);
+  // CHECK: fmsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vfma_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
+  // CHECK-LABEL: test_vfma_f64
+  return vfma_f64(a, b, c);
+  // CHECK: fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vfms_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
+  // CHECK-LABEL: test_vfms_f64
+  return vfms_f64(a, b, c);
+  // CHECK: fmsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vsub_f64(float64x1_t a, float64x1_t b) {
+  // CHECK-LABEL: test_vsub_f64
+  return vsub_f64(a, b);
+  // CHECK: fsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vabd_f64(float64x1_t a, float64x1_t b) {
+  // CHECK-LABEL: test_vabd_f64
+  return vabd_f64(a, b);
+  // CHECK: fabd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vmax_f64(float64x1_t a, float64x1_t b) {
+// CHECK-LABEL: test_vmax_f64
+  return vmax_f64(a, b);
+// CHECK: fmax d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vmin_f64(float64x1_t a, float64x1_t b) {
+// CHECK-LABEL: test_vmin_f64
+  return vmin_f64(a, b);
+// CHECK: fmin d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vmaxnm_f64(float64x1_t a, float64x1_t b) {
+// CHECK-LABEL: test_vmaxnm_f64
+  return vmaxnm_f64(a, b);
+// CHECK: fmaxnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vminnm_f64(float64x1_t a, float64x1_t b) {
+// CHECK-LABEL: test_vminnm_f64
+  return vminnm_f64(a, b);
+// CHECK: fminnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vabs_f64(float64x1_t a) {
+  // CHECK-LABEL: test_vabs_f64
+  return vabs_f64(a);
+  // CHECK: fabs d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vneg_f64(float64x1_t a) {
+  // CHECK-LABEL: test_vneg_f64
+  return vneg_f64(a);
+  // CHECK: fneg d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+int64x1_t test_vcvt_s64_f64(float64x1_t a) {
+  // CHECK-LABEL: test_vcvt_s64_f64
+  return vcvt_s64_f64(a);
+  // CHECK: fcvtzs d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+uint64x1_t test_vcvt_u64_f64(float64x1_t a) {
+  // CHECK-LABEL: test_vcvt_u64_f64
+  return vcvt_u64_f64(a);
+  // CHECK: fcvtzu d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+int64x1_t test_vcvtn_s64_f64(float64x1_t a) {
+  // CHECK-LABEL: test_vcvtn_s64_f64
+  return vcvtn_s64_f64(a);
+  // CHECK: fcvtns d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+uint64x1_t test_vcvtn_u64_f64(float64x1_t a) {
+  // CHECK-LABEL: test_vcvtn_u64_f64
+  return vcvtn_u64_f64(a);
+  // CHECK: fcvtnu d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+int64x1_t test_vcvtp_s64_f64(float64x1_t a) {
+  // CHECK-LABEL: test_vcvtp_s64_f64
+  return vcvtp_s64_f64(a);
+  // CHECK: fcvtps d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+uint64x1_t test_vcvtp_u64_f64(float64x1_t a) {
+  // CHECK-LABEL: test_vcvtp_u64_f64
+  return vcvtp_u64_f64(a);
+  // CHECK: fcvtpu d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+int64x1_t test_vcvtm_s64_f64(float64x1_t a) {
+  // CHECK-LABEL: test_vcvtm_s64_f64
+  return vcvtm_s64_f64(a);
+  // CHECK: fcvtms d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+uint64x1_t test_vcvtm_u64_f64(float64x1_t a) {
+  // CHECK-LABEL: test_vcvtm_u64_f64
+  return vcvtm_u64_f64(a);
+  // CHECK: fcvtmu d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+int64x1_t test_vcvta_s64_f64(float64x1_t a) {
+  // CHECK-LABEL: test_vcvta_s64_f64
+  return vcvta_s64_f64(a);
+  // CHECK: fcvtas d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+uint64x1_t test_vcvta_u64_f64(float64x1_t a) {
+  // CHECK-LABEL: test_vcvta_u64_f64
+  return vcvta_u64_f64(a);
+  // CHECK: fcvtau d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vcvt_f64_s64(int64x1_t a) {
+  // CHECK-LABEL: test_vcvt_f64_s64
+  return vcvt_f64_s64(a);
+  // CHECK: scvtf d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vcvt_f64_u64(uint64x1_t a) {
+  // CHECK-LABEL: test_vcvt_f64_u64
+  return vcvt_f64_u64(a);
+  // CHECK: ucvtf d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+int64x1_t test_vcvt_n_s64_f64(float64x1_t a) {
+  // CHECK-LABEL: test_vcvt_n_s64_f64
+  return vcvt_n_s64_f64(a, 64);
+  // CHECK: fcvtzs d{{[0-9]+}}, d{{[0-9]+}}, #64
+}
+
+uint64x1_t test_vcvt_n_u64_f64(float64x1_t a) {
+  // CHECK-LABEL: test_vcvt_n_u64_f64
+  return vcvt_n_u64_f64(a, 64);
+  // CHECK: fcvtzu d{{[0-9]+}}, d{{[0-9]+}}, #64
+}
+
+float64x1_t test_vcvt_n_f64_s64(int64x1_t a) {
+  // CHECK-LABEL: test_vcvt_n_f64_s64
+  return vcvt_n_f64_s64(a, 64);
+  // CHECK: scvtf d{{[0-9]+}}, d{{[0-9]+}}, #64
+}
+
+float64x1_t test_vcvt_n_f64_u64(uint64x1_t a) {
+  // CHECK-LABEL: test_vcvt_n_f64_u64
+  return vcvt_n_f64_u64(a, 64);
+  // CHECK: ucvtf d{{[0-9]+}}, d{{[0-9]+}}, #64
+}
+
+float64x1_t test_vrndn_f64(float64x1_t a) {
+  // CHECK-LABEL: test_vrndn_f64
+  return vrndn_f64(a);
+  // CHECK: frintn d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vrnda_f64(float64x1_t a) {
+  // CHECK-LABEL: test_vrnda_f64
+  return vrnda_f64(a);
+  // CHECK: frinta d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vrndp_f64(float64x1_t a) {
+  // CHECK-LABEL: test_vrndp_f64
+  return vrndp_f64(a);
+  // CHECK: frintp d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vrndm_f64(float64x1_t a) {
+  // CHECK-LABEL: test_vrndm_f64
+  return vrndm_f64(a);
+  // CHECK: frintm d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vrndx_f64(float64x1_t a) {
+  // CHECK-LABEL: test_vrndx_f64
+  return vrndx_f64(a);
+  // CHECK: frintx d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vrnd_f64(float64x1_t a) {
+  // CHECK-LABEL: test_vrnd_f64
+  return vrnd_f64(a);
+  // CHECK: frintz d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vrndi_f64(float64x1_t a) {
+  // CHECK-LABEL: test_vrndi_f64
+  return vrndi_f64(a);
+  // CHECK: frinti d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vrsqrte_f64(float64x1_t a) {
+  // CHECK-LABEL: test_vrsqrte_f64
+  return vrsqrte_f64(a);
+  // CHECK: frsqrte d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vrecpe_f64(float64x1_t a) {
+  // CHECK-LABEL: test_vrecpe_f64
+  return vrecpe_f64(a);
+  // CHECK: frecpe d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vsqrt_f64(float64x1_t a) {
+  // CHECK-LABEL: test_vsqrt_f64
+  return vsqrt_f64(a);
+  // CHECK: fsqrt d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vrecps_f64(float64x1_t a, float64x1_t b) {
+  // CHECK-LABEL: test_vrecps_f64
+  return vrecps_f64(a, b);
+  // CHECK: frecps d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
+
+float64x1_t test_vrsqrts_f64(float64x1_t a, float64x1_t b) {
+  // CHECK-LABEL: test_vrsqrts_f64
+  return vrsqrts_f64(a, b);
+  // CHECK: frsqrts d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+}
diff --git a/test/CodeGen/aarch64-neon-ldst-one.c b/test/CodeGen/aarch64-neon-ldst-one.c
new file mode 100644
index 0000000..f629260
--- /dev/null
+++ b/test/CodeGen/aarch64-neon-ldst-one.c
@@ -0,0 +1,2047 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
+// RUN:   -ffp-contract=fast -S -O3 -o - %s | FileCheck %s
+
+#include <arm_neon.h>
+
+uint8x16_t test_vld1q_dup_u8(uint8_t const *a) {
+  // CHECK-LABEL: test_vld1q_dup_u8
+  return vld1q_dup_u8(a);
+  // CHECK: ld1r {v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+uint16x8_t test_vld1q_dup_u16(uint16_t const *a) {
+  // CHECK-LABEL: test_vld1q_dup_u16
+  return vld1q_dup_u16(a);
+  // CHECK: ld1r {v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+uint32x4_t test_vld1q_dup_u32(uint32_t const *a) {
+  // CHECK-LABEL: test_vld1q_dup_u32
+  return vld1q_dup_u32(a);
+  // CHECK: ld1r {v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+uint64x2_t test_vld1q_dup_u64(uint64_t const *a) {
+  // CHECK-LABEL: test_vld1q_dup_u64
+  return vld1q_dup_u64(a);
+  // CHECK: ld1r {v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+int8x16_t test_vld1q_dup_s8(int8_t const *a) {
+  // CHECK-LABEL: test_vld1q_dup_s8
+  return vld1q_dup_s8(a);
+  // CHECK: ld1r {v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+int16x8_t test_vld1q_dup_s16(int16_t const *a) {
+  // CHECK-LABEL: test_vld1q_dup_s16
+  return vld1q_dup_s16(a);
+  // CHECK: ld1r {v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+int32x4_t test_vld1q_dup_s32(int32_t const *a) {
+  // CHECK-LABEL: test_vld1q_dup_s32
+  return vld1q_dup_s32(a);
+  // CHECK: ld1r {v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+int64x2_t test_vld1q_dup_s64(int64_t const *a) {
+  // CHECK-LABEL: test_vld1q_dup_s64
+  return vld1q_dup_s64(a);
+  // CHECK: ld1r {v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+float16x8_t test_vld1q_dup_f16(float16_t const *a) {
+  // CHECK-LABEL: test_vld1q_dup_f16
+  return vld1q_dup_f16(a);
+  // CHECK: ld1r {v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+float32x4_t test_vld1q_dup_f32(float32_t const *a) {
+  // CHECK-LABEL: test_vld1q_dup_f32
+  return vld1q_dup_f32(a);
+  // CHECK: ld1r {v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+float64x2_t test_vld1q_dup_f64(float64_t const *a) {
+  // CHECK-LABEL: test_vld1q_dup_f64
+  return vld1q_dup_f64(a);
+  // CHECK: ld1r {v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+poly8x16_t test_vld1q_dup_p8(poly8_t const *a) {
+  // CHECK-LABEL: test_vld1q_dup_p8
+  return vld1q_dup_p8(a);
+  // CHECK: ld1r {v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+poly16x8_t test_vld1q_dup_p16(poly16_t const *a) {
+  // CHECK-LABEL: test_vld1q_dup_p16
+  return vld1q_dup_p16(a);
+  // CHECK: ld1r {v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+poly64x2_t test_vld1q_dup_p64(poly64_t const *a) {
+  // CHECK-LABEL: test_vld1q_dup_p64
+  return vld1q_dup_p64(a);
+  // CHECK: ld1r {v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+uint8x8_t test_vld1_dup_u8(uint8_t const *a) {
+  // CHECK-LABEL: test_vld1_dup_u8
+  return vld1_dup_u8(a);
+  // CHECK: ld1r {v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+uint16x4_t test_vld1_dup_u16(uint16_t const *a) {
+  // CHECK-LABEL: test_vld1_dup_u16
+  return vld1_dup_u16(a);
+  // CHECK: ld1r {v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+uint32x2_t test_vld1_dup_u32(uint32_t const *a) {
+  // CHECK-LABEL: test_vld1_dup_u32
+  return vld1_dup_u32(a);
+  // CHECK: ld1r {v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+uint64x1_t test_vld1_dup_u64(uint64_t const *a) {
+  // CHECK-LABEL: test_vld1_dup_u64
+  return vld1_dup_u64(a);
+  // CHECK: ld1r {v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+int8x8_t test_vld1_dup_s8(int8_t const *a) {
+  // CHECK-LABEL: test_vld1_dup_s8
+  return vld1_dup_s8(a);
+  // CHECK: ld1r {v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+int16x4_t test_vld1_dup_s16(int16_t const *a) {
+  // CHECK-LABEL: test_vld1_dup_s16
+  return vld1_dup_s16(a);
+  // CHECK: ld1r {v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+int32x2_t test_vld1_dup_s32(int32_t const *a) {
+  // CHECK-LABEL: test_vld1_dup_s32
+  return vld1_dup_s32(a);
+  // CHECK: ld1r {v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+int64x1_t test_vld1_dup_s64(int64_t const *a) {
+  // CHECK-LABEL: test_vld1_dup_s64
+  return vld1_dup_s64(a);
+  // CHECK: ld1r {v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+float16x4_t test_vld1_dup_f16(float16_t const *a) {
+  // CHECK-LABEL: test_vld1_dup_f16
+  return vld1_dup_f16(a);
+  // CHECK: ld1r {v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+float32x2_t test_vld1_dup_f32(float32_t const *a) {
+  // CHECK-LABEL: test_vld1_dup_f32
+  return vld1_dup_f32(a);
+  // CHECK: ld1r {v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+float64x1_t test_vld1_dup_f64(float64_t const *a) {
+  // CHECK-LABEL: test_vld1_dup_f64
+  return vld1_dup_f64(a);
+  // CHECK: ld1r {v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+poly8x8_t test_vld1_dup_p8(poly8_t const *a) {
+  // CHECK-LABEL: test_vld1_dup_p8
+  return vld1_dup_p8(a);
+  // CHECK: ld1r {v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+poly16x4_t test_vld1_dup_p16(poly16_t const *a) {
+  // CHECK-LABEL: test_vld1_dup_p16
+  return vld1_dup_p16(a);
+  // CHECK: ld1r {v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+poly64x1_t test_vld1_dup_p64(poly64_t const *a) {
+  // CHECK-LABEL: test_vld1_dup_p64
+  return vld1_dup_p64(a);
+  // CHECK: ld1r {v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+uint8x16x2_t test_vld2q_dup_u8(uint8_t const *a) {
+  // CHECK-LABEL: test_vld2q_dup_u8
+  return vld2q_dup_u8(a);
+  // CHECK: ld2r {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+uint16x8x2_t test_vld2q_dup_u16(uint16_t const *a) {
+  // CHECK-LABEL: test_vld2q_dup_u16
+  return vld2q_dup_u16(a);
+  // CHECK: ld2r {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+uint32x4x2_t test_vld2q_dup_u32(uint32_t const *a) {
+  // CHECK-LABEL: test_vld2q_dup_u32
+  return vld2q_dup_u32(a);
+  // CHECK: ld2r {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+uint64x2x2_t test_vld2q_dup_u64(uint64_t const *a) {
+  // CHECK-LABEL: test_vld2q_dup_u64
+  return vld2q_dup_u64(a);
+  // CHECK: ld2r {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+int8x16x2_t test_vld2q_dup_s8(int8_t const *a) {
+  // CHECK-LABEL: test_vld2q_dup_s8
+  return vld2q_dup_s8(a);
+  // CHECK: ld2r {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+int16x8x2_t test_vld2q_dup_s16(int16_t const *a) {
+  // CHECK-LABEL: test_vld2q_dup_s16
+  return vld2q_dup_s16(a);
+  // CHECK: ld2r {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+int32x4x2_t test_vld2q_dup_s32(int32_t const *a) {
+  // CHECK-LABEL: test_vld2q_dup_s32
+  return vld2q_dup_s32(a);
+  // CHECK: ld2r {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+int64x2x2_t test_vld2q_dup_s64(int64_t const *a) {
+  // CHECK-LABEL: test_vld2q_dup_s64
+  return vld2q_dup_s64(a);
+  // CHECK: ld2r {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+float16x8x2_t test_vld2q_dup_f16(float16_t const *a) {
+  // CHECK-LABEL: test_vld2q_dup_f16
+  return vld2q_dup_f16(a);
+  // CHECK: ld2r {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+float32x4x2_t test_vld2q_dup_f32(float32_t const *a) {
+  // CHECK-LABEL: test_vld2q_dup_f32
+  return vld2q_dup_f32(a);
+  // CHECK: ld2r {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+float64x2x2_t test_vld2q_dup_f64(float64_t const *a) {
+  // CHECK-LABEL: test_vld2q_dup_f64
+  return vld2q_dup_f64(a);
+  // CHECK: ld2r {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+poly8x16x2_t test_vld2q_dup_p8(poly8_t const *a) {
+  // CHECK-LABEL: test_vld2q_dup_p8
+  return vld2q_dup_p8(a);
+  // CHECK: ld2r {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+poly16x8x2_t test_vld2q_dup_p16(poly16_t const *a) {
+  // CHECK-LABEL: test_vld2q_dup_p16
+  return vld2q_dup_p16(a);
+  // CHECK: ld2r {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+poly64x2x2_t test_vld2q_dup_p64(poly64_t const *a) {
+  // CHECK-LABEL: test_vld2q_dup_p64
+  return vld2q_dup_p64(a);
+  // CHECK: ld2r {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+uint8x8x2_t test_vld2_dup_u8(uint8_t const *a) {
+  // CHECK-LABEL: test_vld2_dup_u8
+  return vld2_dup_u8(a);
+  // CHECK: ld2r {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+uint16x4x2_t test_vld2_dup_u16(uint16_t const *a) {
+  // CHECK-LABEL: test_vld2_dup_u16
+  return vld2_dup_u16(a);
+  // CHECK: ld2r {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+uint32x2x2_t test_vld2_dup_u32(uint32_t const *a) {
+  // CHECK-LABEL: test_vld2_dup_u32
+  return vld2_dup_u32(a);
+  // CHECK: ld2r {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+uint64x1x2_t test_vld2_dup_u64(uint64_t const *a) {
+  // CHECK-LABEL: test_vld2_dup_u64
+  return vld2_dup_u64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+int8x8x2_t test_vld2_dup_s8(int8_t const *a) {
+  // CHECK-LABEL: test_vld2_dup_s8
+  return vld2_dup_s8(a);
+  // CHECK: ld2r {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+int16x4x2_t test_vld2_dup_s16(int16_t const *a) {
+  // CHECK-LABEL: test_vld2_dup_s16
+  return vld2_dup_s16(a);
+  // CHECK: ld2r {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+int32x2x2_t test_vld2_dup_s32(int32_t const *a) {
+  // CHECK-LABEL: test_vld2_dup_s32
+  return vld2_dup_s32(a);
+  // CHECK: ld2r {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+int64x1x2_t test_vld2_dup_s64(int64_t const *a) {
+  // CHECK-LABEL: test_vld2_dup_s64
+  return vld2_dup_s64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+float16x4x2_t test_vld2_dup_f16(float16_t const *a) {
+  // CHECK-LABEL: test_vld2_dup_f16
+  return vld2_dup_f16(a);
+  // CHECK: ld2r {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+float32x2x2_t test_vld2_dup_f32(float32_t const *a) {
+  // CHECK-LABEL: test_vld2_dup_f32
+  return vld2_dup_f32(a);
+  // CHECK: ld2r {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+float64x1x2_t test_vld2_dup_f64(float64_t const *a) {
+  // CHECK-LABEL: test_vld2_dup_f64
+  return vld2_dup_f64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+poly8x8x2_t test_vld2_dup_p8(poly8_t const *a) {
+  // CHECK-LABEL: test_vld2_dup_p8
+  return vld2_dup_p8(a);
+  // CHECK: ld2r {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+poly16x4x2_t test_vld2_dup_p16(poly16_t const *a) {
+  // CHECK-LABEL: test_vld2_dup_p16
+  return vld2_dup_p16(a);
+  // CHECK: ld2r {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+poly64x1x2_t test_vld2_dup_p64(poly64_t const *a) {
+  // CHECK-LABEL: test_vld2_dup_p64
+  return vld2_dup_p64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+uint8x16x3_t test_vld3q_dup_u8(uint8_t const *a) {
+  // CHECK-LABEL: test_vld3q_dup_u8
+  return vld3q_dup_u8(a);
+  // CHECK: ld3r {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint16x8x3_t test_vld3q_dup_u16(uint16_t const *a) {
+  // CHECK-LABEL: test_vld3q_dup_u16
+  return vld3q_dup_u16(a);
+  // CHECK: ld3r {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint32x4x3_t test_vld3q_dup_u32(uint32_t const *a) {
+  // CHECK-LABEL: test_vld3q_dup_u32
+  return vld3q_dup_u32(a);
+  // CHECK: ld3r {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint64x2x3_t test_vld3q_dup_u64(uint64_t const *a) {
+  // CHECK-LABEL: test_vld3q_dup_u64
+  return vld3q_dup_u64(a);
+  // CHECK: ld3r {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+  // [{{x[0-9]+|sp}}]
+}
+
+int8x16x3_t test_vld3q_dup_s8(int8_t const *a) {
+  // CHECK-LABEL: test_vld3q_dup_s8
+  return vld3q_dup_s8(a);
+  // CHECK: ld3r {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b},
+  // [{{x[0-9]+|sp}}]
+}
+
+int16x8x3_t test_vld3q_dup_s16(int16_t const *a) {
+  // CHECK-LABEL: test_vld3q_dup_s16
+  return vld3q_dup_s16(a);
+  // CHECK: ld3r {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
+  // [{{x[0-9]+|sp}}]
+}
+
+int32x4x3_t test_vld3q_dup_s32(int32_t const *a) {
+  // CHECK-LABEL: test_vld3q_dup_s32
+  return vld3q_dup_s32(a);
+  // CHECK: ld3r {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
+  // [{{x[0-9]+|sp}}]
+}
+
+int64x2x3_t test_vld3q_dup_s64(int64_t const *a) {
+  // CHECK-LABEL: test_vld3q_dup_s64
+  return vld3q_dup_s64(a);
+  // CHECK: ld3r {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+  // [{{x[0-9]+|sp}}]
+}
+
+float16x8x3_t test_vld3q_dup_f16(float16_t const *a) {
+  // CHECK-LABEL: test_vld3q_dup_f16
+  return vld3q_dup_f16(a);
+  // CHECK: ld3r {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
+  // [{{x[0-9]+|sp}}]
+}
+
+float32x4x3_t test_vld3q_dup_f32(float32_t const *a) {
+  // CHECK-LABEL: test_vld3q_dup_f32
+  return vld3q_dup_f32(a);
+  // CHECK: ld3r {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
+  // [{{x[0-9]+|sp}}]
+}
+
+float64x2x3_t test_vld3q_dup_f64(float64_t const *a) {
+  // CHECK-LABEL: test_vld3q_dup_f64
+  return vld3q_dup_f64(a);
+  // CHECK: ld3r {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+  // [{{x[0-9]+|sp}}]
+}
+
+poly8x16x3_t test_vld3q_dup_p8(poly8_t const *a) {
+  // CHECK-LABEL: test_vld3q_dup_p8
+  return vld3q_dup_p8(a);
+  // CHECK: ld3r {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b},
+  // [{{x[0-9]+|sp}}]
+}
+
+poly16x8x3_t test_vld3q_dup_p16(poly16_t const *a) {
+  // CHECK-LABEL: test_vld3q_dup_p16
+  return vld3q_dup_p16(a);
+  // CHECK: ld3r {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
+  // [{{x[0-9]+|sp}}]
+}
+
+poly64x2x3_t test_vld3q_dup_p64(poly64_t const *a) {
+  // CHECK-LABEL: test_vld3q_dup_p64
+  return vld3q_dup_p64(a);
+  // CHECK: ld3r {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint8x8x3_t test_vld3_dup_u8(uint8_t const *a) {
+  // CHECK-LABEL: test_vld3_dup_u8
+  return vld3_dup_u8(a);
+  // CHECK: ld3r {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint16x4x3_t test_vld3_dup_u16(uint16_t const *a) {
+  // CHECK-LABEL: test_vld3_dup_u16
+  return vld3_dup_u16(a);
+  // CHECK: ld3r {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint32x2x3_t test_vld3_dup_u32(uint32_t const *a) {
+  // CHECK-LABEL: test_vld3_dup_u32
+  return vld3_dup_u32(a);
+  // CHECK: ld3r {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint64x1x3_t test_vld3_dup_u64(uint64_t const *a) {
+  // CHECK-LABEL: test_vld3_dup_u64
+  return vld3_dup_u64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+  // [{{x[0-9]+|sp}}]
+}
+
+int8x8x3_t test_vld3_dup_s8(int8_t const *a) {
+  // CHECK-LABEL: test_vld3_dup_s8
+  return vld3_dup_s8(a);
+  // CHECK: ld3r {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b},
+  // [{{x[0-9]+|sp}}]
+}
+
+int16x4x3_t test_vld3_dup_s16(int16_t const *a) {
+  // CHECK-LABEL: test_vld3_dup_s16
+  return vld3_dup_s16(a);
+  // CHECK: ld3r {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
+  // [{{x[0-9]+|sp}}]
+}
+
+int32x2x3_t test_vld3_dup_s32(int32_t const *a) {
+  // CHECK-LABEL: test_vld3_dup_s32
+  return vld3_dup_s32(a);
+  // CHECK: ld3r {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
+  // [{{x[0-9]+|sp}}]
+}
+
+int64x1x3_t test_vld3_dup_s64(int64_t const *a) {
+  // CHECK-LABEL: test_vld3_dup_s64
+  return vld3_dup_s64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+  // [{{x[0-9]+|sp}}]
+}
+
+float16x4x3_t test_vld3_dup_f16(float16_t const *a) {
+  // CHECK-LABEL: test_vld3_dup_f16
+  return vld3_dup_f16(a);
+  // CHECK: ld3r {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
+  // [{{x[0-9]+|sp}}]
+}
+
+float32x2x3_t test_vld3_dup_f32(float32_t const *a) {
+  // CHECK-LABEL: test_vld3_dup_f32
+  return vld3_dup_f32(a);
+  // CHECK: ld3r {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
+  // [{{x[0-9]+|sp}}]
+}
+
+float64x1x3_t test_vld3_dup_f64(float64_t const *a) {
+  // CHECK-LABEL: test_vld3_dup_f64
+  return vld3_dup_f64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+  // [{{x[0-9]+|sp}}]
+}
+
+poly8x8x3_t test_vld3_dup_p8(poly8_t const *a) {
+  // CHECK-LABEL: test_vld3_dup_p8
+  return vld3_dup_p8(a);
+  // CHECK: ld3r {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b},
+  // [{{x[0-9]+|sp}}]
+}
+
+poly16x4x3_t test_vld3_dup_p16(poly16_t const *a) {
+  // CHECK-LABEL: test_vld3_dup_p16
+  return vld3_dup_p16(a);
+  // CHECK: ld3r {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
+  // [{{x[0-9]+|sp}}]
+}
+
+poly64x1x3_t test_vld3_dup_p64(poly64_t const *a) {
+  // CHECK-LABEL: test_vld3_dup_p64
+  return vld3_dup_p64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+  // [{{x[0-9]+|sp}}]
+}
+
+uint8x16x4_t test_vld4q_dup_u8(uint8_t const *a) {
+  // CHECK-LABEL: test_vld4q_dup_u8
+  return vld4q_dup_u8(a);
+  // CHECK: ld4r {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
+  // v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+uint16x8x4_t test_vld4q_dup_u16(uint16_t const *a) {
+  // CHECK-LABEL: test_vld4q_dup_u16
+  return vld4q_dup_u16(a);
+  // CHECK: ld4r {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
+  // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+uint32x4x4_t test_vld4q_dup_u32(uint32_t const *a) {
+  // CHECK-LABEL: test_vld4q_dup_u32
+  return vld4q_dup_u32(a);
+  // CHECK: ld4r {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
+  // v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+uint64x2x4_t test_vld4q_dup_u64(uint64_t const *a) {
+  // CHECK-LABEL: test_vld4q_dup_u64
+  return vld4q_dup_u64(a);
+  // CHECK: ld4r {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+  // v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+int8x16x4_t test_vld4q_dup_s8(int8_t const *a) {
+  // CHECK-LABEL: test_vld4q_dup_s8
+  return vld4q_dup_s8(a);
+  // CHECK: ld4r {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
+  // v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+int16x8x4_t test_vld4q_dup_s16(int16_t const *a) {
+  // CHECK-LABEL: test_vld4q_dup_s16
+  return vld4q_dup_s16(a);
+  // CHECK: ld4r {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
+  // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+int32x4x4_t test_vld4q_dup_s32(int32_t const *a) {
+  // CHECK-LABEL: test_vld4q_dup_s32
+  return vld4q_dup_s32(a);
+  // CHECK: ld4r {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
+  // v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+int64x2x4_t test_vld4q_dup_s64(int64_t const *a) {
+  // CHECK-LABEL: test_vld4q_dup_s64
+  return vld4q_dup_s64(a);
+  // CHECK: ld4r {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+  // v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+float16x8x4_t test_vld4q_dup_f16(float16_t const *a) {
+  // CHECK-LABEL: test_vld4q_dup_f16
+  return vld4q_dup_f16(a);
+  // CHECK: ld4r {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
+  // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+float32x4x4_t test_vld4q_dup_f32(float32_t const *a) {
+  // CHECK-LABEL: test_vld4q_dup_f32
+  return vld4q_dup_f32(a);
+  // CHECK: ld4r {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
+  // v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+float64x2x4_t test_vld4q_dup_f64(float64_t const *a) {
+  // CHECK-LABEL: test_vld4q_dup_f64
+  return vld4q_dup_f64(a);
+  // CHECK: ld4r {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+  // v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+}
+
+poly8x16x4_t test_vld4q_dup_p8(poly8_t const *a) {
+  // CHECK-LABEL: test_vld4q_dup_p8
+  return vld4q_dup_p8(a);
+  // CHECK: ld4r {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
+  // v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+}
+
+poly16x8x4_t test_vld4q_dup_p16(poly16_t const *a) {
+  // CHECK-LABEL: test_vld4q_dup_p16
+  return vld4q_dup_p16(a);
+  // CHECK: ld4r {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
+  // v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+}
+
+poly64x2x4_t test_vld4q_dup_p64(poly64_t const *a) {
+  // CHECK-LABEL: test_vld4q_dup_p64
+  return vld4q_dup_p64(a);
+  // CHECK: ld4r {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+  // v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+uint8x8x4_t test_vld4_dup_u8(uint8_t const *a) {
+  // CHECK-LABEL: test_vld4_dup_u8
+  return vld4_dup_u8(a);
+  // CHECK: ld4r {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
+  // v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+uint16x4x4_t test_vld4_dup_u16(uint16_t const *a) {
+  // CHECK-LABEL: test_vld4_dup_u16
+  return vld4_dup_u16(a);
+  // CHECK: ld4r {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
+  // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+uint32x2x4_t test_vld4_dup_u32(uint32_t const *a) {
+  // CHECK-LABEL: test_vld4_dup_u32
+  return vld4_dup_u32(a);
+  // CHECK: ld4r {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
+  // v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+uint64x1x4_t test_vld4_dup_u64(uint64_t const *a) {
+  // CHECK-LABEL: test_vld4_dup_u64
+  return vld4_dup_u64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+  // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+int8x8x4_t test_vld4_dup_s8(int8_t const *a) {
+  // CHECK-LABEL: test_vld4_dup_s8
+  return vld4_dup_s8(a);
+  // CHECK: ld4r {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
+  // v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+int16x4x4_t test_vld4_dup_s16(int16_t const *a) {
+  // CHECK-LABEL: test_vld4_dup_s16
+  return vld4_dup_s16(a);
+  // CHECK: ld4r {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
+  // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+int32x2x4_t test_vld4_dup_s32(int32_t const *a) {
+  // CHECK-LABEL: test_vld4_dup_s32
+  return vld4_dup_s32(a);
+  // CHECK: ld4r {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
+  // v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+int64x1x4_t test_vld4_dup_s64(int64_t const *a) {
+  // CHECK-LABEL: test_vld4_dup_s64
+  return vld4_dup_s64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+  // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+float16x4x4_t test_vld4_dup_f16(float16_t const *a) {
+  // CHECK-LABEL: test_vld4_dup_f16
+  return vld4_dup_f16(a);
+  // CHECK: ld4r {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
+  // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+float32x2x4_t test_vld4_dup_f32(float32_t const *a) {
+  // CHECK-LABEL: test_vld4_dup_f32
+  return vld4_dup_f32(a);
+  // CHECK: ld4r {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
+  // v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+}
+
+float64x1x4_t test_vld4_dup_f64(float64_t const *a) {
+  // CHECK-LABEL: test_vld4_dup_f64
+  return vld4_dup_f64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+  // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+poly8x8x4_t test_vld4_dup_p8(poly8_t const *a) {
+  // CHECK-LABEL: test_vld4_dup_p8
+  return vld4_dup_p8(a);
+  // CHECK: ld4r {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
+  // v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+}
+
+poly16x4x4_t test_vld4_dup_p16(poly16_t const *a) {
+  // CHECK-LABEL: test_vld4_dup_p16
+  return vld4_dup_p16(a);
+  // CHECK: ld4r {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
+  // v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+}
+
+poly64x1x4_t test_vld4_dup_p64(poly64_t const *a) {
+  // CHECK-LABEL: test_vld4_dup_p64
+  return vld4_dup_p64(a);
+  // CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+  // v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+uint8x16_t test_vld1q_lane_u8(uint8_t const *a, uint8x16_t b) {
+  // CHECK-LABEL: test_vld1q_lane_u8
+  return vld1q_lane_u8(a, b, 15);
+  // CHECK: ld1 {v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+uint16x8_t test_vld1q_lane_u16(uint16_t const *a, uint16x8_t b) {
+  // CHECK-LABEL: test_vld1q_lane_u16
+  return vld1q_lane_u16(a, b, 7);
+  // CHECK: ld1 {v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+uint32x4_t test_vld1q_lane_u32(uint32_t const *a, uint32x4_t b) {
+  // CHECK-LABEL: test_vld1q_lane_u32
+  return vld1q_lane_u32(a, b, 3);
+  // CHECK: ld1 {v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+uint64x2_t test_vld1q_lane_u64(uint64_t const *a, uint64x2_t b) {
+  // CHECK-LABEL: test_vld1q_lane_u64
+  return vld1q_lane_u64(a, b, 1);
+  // CHECK: ld1 {v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+int8x16_t test_vld1q_lane_s8(int8_t const *a, int8x16_t b) {
+  // CHECK-LABEL: test_vld1q_lane_s8
+  return vld1q_lane_s8(a, b, 15);
+  // CHECK: ld1 {v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+int16x8_t test_vld1q_lane_s16(int16_t const *a, int16x8_t b) {
+  // CHECK-LABEL: test_vld1q_lane_s16
+  return vld1q_lane_s16(a, b, 7);
+  // CHECK: ld1 {v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+int32x4_t test_vld1q_lane_s32(int32_t const *a, int32x4_t b) {
+  // CHECK-LABEL: test_vld1q_lane_s32
+  return vld1q_lane_s32(a, b, 3);
+  // CHECK: ld1 {v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+int64x2_t test_vld1q_lane_s64(int64_t const *a, int64x2_t b) {
+  // CHECK-LABEL: test_vld1q_lane_s64
+  return vld1q_lane_s64(a, b, 1);
+  // CHECK: ld1 {v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+float16x8_t test_vld1q_lane_f16(float16_t const *a, float16x8_t b) {
+  // CHECK-LABEL: test_vld1q_lane_f16
+  return vld1q_lane_f16(a, b, 7);
+  // CHECK: ld1 {v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+float32x4_t test_vld1q_lane_f32(float32_t const *a, float32x4_t b) {
+  // CHECK-LABEL: test_vld1q_lane_f32
+  return vld1q_lane_f32(a, b, 3);
+  // CHECK: ld1 {v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+float64x2_t test_vld1q_lane_f64(float64_t const *a, float64x2_t b) {
+  // CHECK-LABEL: test_vld1q_lane_f64
+  return vld1q_lane_f64(a, b, 1);
+  // CHECK: ld1 {v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+poly8x16_t test_vld1q_lane_p8(poly8_t const *a, poly8x16_t b) {
+  // CHECK-LABEL: test_vld1q_lane_p8
+  return vld1q_lane_p8(a, b, 15);
+  // CHECK: ld1 {v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+poly16x8_t test_vld1q_lane_p16(poly16_t const *a, poly16x8_t b) {
+  // CHECK-LABEL: test_vld1q_lane_p16
+  return vld1q_lane_p16(a, b, 7);
+  // CHECK: ld1 {v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+poly64x2_t test_vld1q_lane_p64(poly64_t const *a, poly64x2_t b) {
+  // CHECK-LABEL: test_vld1q_lane_p64
+  return vld1q_lane_p64(a, b, 1);
+  // CHECK: ld1 {v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+uint8x8_t test_vld1_lane_u8(uint8_t const *a, uint8x8_t b) {
+  // CHECK-LABEL: test_vld1_lane_u8
+  return vld1_lane_u8(a, b, 7);
+  // CHECK: ld1 {v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+uint16x4_t test_vld1_lane_u16(uint16_t const *a, uint16x4_t b) {
+  // CHECK-LABEL: test_vld1_lane_u16
+  return vld1_lane_u16(a, b, 3);
+  // CHECK: ld1 {v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+uint32x2_t test_vld1_lane_u32(uint32_t const *a, uint32x2_t b) {
+  // CHECK-LABEL: test_vld1_lane_u32
+  return vld1_lane_u32(a, b, 1);
+  // CHECK: ld1 {v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+uint64x1_t test_vld1_lane_u64(uint64_t const *a, uint64x1_t b) {
+  // CHECK-LABEL: test_vld1_lane_u64
+  return vld1_lane_u64(a, b, 0);
+  // CHECK: ld1r {v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+int8x8_t test_vld1_lane_s8(int8_t const *a, int8x8_t b) {
+  // CHECK-LABEL: test_vld1_lane_s8
+  return vld1_lane_s8(a, b, 7);
+  // CHECK: ld1 {v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+int16x4_t test_vld1_lane_s16(int16_t const *a, int16x4_t b) {
+  // CHECK-LABEL: test_vld1_lane_s16
+  return vld1_lane_s16(a, b, 3);
+  // CHECK: ld1 {v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+int32x2_t test_vld1_lane_s32(int32_t const *a, int32x2_t b) {
+  // CHECK-LABEL: test_vld1_lane_s32
+  return vld1_lane_s32(a, b, 1);
+  // CHECK: ld1 {v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+int64x1_t test_vld1_lane_s64(int64_t const *a, int64x1_t b) {
+  // CHECK-LABEL: test_vld1_lane_s64
+  return vld1_lane_s64(a, b, 0);
+  // CHECK: ld1r {v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+float16x4_t test_vld1_lane_f16(float16_t const *a, float16x4_t b) {
+  // CHECK-LABEL: test_vld1_lane_f16
+  return vld1_lane_f16(a, b, 3);
+  // CHECK: ld1 {v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+float32x2_t test_vld1_lane_f32(float32_t const *a, float32x2_t b) {
+  // CHECK-LABEL: test_vld1_lane_f32
+  return vld1_lane_f32(a, b, 1);
+  // CHECK: ld1 {v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+float64x1_t test_vld1_lane_f64(float64_t const *a, float64x1_t b) {
+  // CHECK-LABEL: test_vld1_lane_f64
+  return vld1_lane_f64(a, b, 0);
+  // CHECK: ld1r {v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+poly8x8_t test_vld1_lane_p8(poly8_t const *a, poly8x8_t b) {
+  // CHECK-LABEL: test_vld1_lane_p8
+  return vld1_lane_p8(a, b, 7);
+  // CHECK: ld1 {v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+poly16x4_t test_vld1_lane_p16(poly16_t const *a, poly16x4_t b) {
+  // CHECK-LABEL: test_vld1_lane_p16
+  return vld1_lane_p16(a, b, 3);
+  // CHECK: ld1 {v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+poly64x1_t test_vld1_lane_p64(poly64_t const *a, poly64x1_t b) {
+  // CHECK-LABEL: test_vld1_lane_p64
+  return vld1_lane_p64(a, b, 0);
+  // CHECK: ld1r {v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+uint16x8x2_t test_vld2q_lane_u16(uint16_t const *a, uint16x8x2_t b) {
+  // CHECK-LABEL: test_vld2q_lane_u16
+  return vld2q_lane_u16(a, b, 7);
+  // CHECK: ld2 {v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+uint32x4x2_t test_vld2q_lane_u32(uint32_t const *a, uint32x4x2_t b) {
+  // CHECK-LABEL: test_vld2q_lane_u32
+  return vld2q_lane_u32(a, b, 3);
+  // CHECK: ld2 {v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+uint64x2x2_t test_vld2q_lane_u64(uint64_t const *a, uint64x2x2_t b) {
+  // CHECK-LABEL: test_vld2q_lane_u64
+  return vld2q_lane_u64(a, b, 1);
+  // CHECK: ld2 {v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+int16x8x2_t test_vld2q_lane_s16(int16_t const *a, int16x8x2_t b) {
+  // CHECK-LABEL: test_vld2q_lane_s16
+  return vld2q_lane_s16(a, b, 7);
+  // CHECK: ld2 {v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+int32x4x2_t test_vld2q_lane_s32(int32_t const *a, int32x4x2_t b) {
+  // CHECK-LABEL: test_vld2q_lane_s32
+  return vld2q_lane_s32(a, b, 3);
+  // CHECK: ld2 {v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+int64x2x2_t test_vld2q_lane_s64(int64_t const *a, int64x2x2_t b) {
+  // CHECK-LABEL: test_vld2q_lane_s64
+  return vld2q_lane_s64(a, b, 1);
+  // CHECK: ld2 {v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+float16x8x2_t test_vld2q_lane_f16(float16_t const *a, float16x8x2_t b) {
+  // CHECK-LABEL: test_vld2q_lane_f16
+  return vld2q_lane_f16(a, b, 7);
+  // CHECK: ld2 {v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+float32x4x2_t test_vld2q_lane_f32(float32_t const *a, float32x4x2_t b) {
+  // CHECK-LABEL: test_vld2q_lane_f32
+  return vld2q_lane_f32(a, b, 3);
+  // CHECK: ld2 {v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+float64x2x2_t test_vld2q_lane_f64(float64_t const *a, float64x2x2_t b) {
+  // CHECK-LABEL: test_vld2q_lane_f64
+  return vld2q_lane_f64(a, b, 1);
+  // CHECK: ld2 {v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+poly16x8x2_t test_vld2q_lane_p16(poly16_t const *a, poly16x8x2_t b) {
+  // CHECK-LABEL: test_vld2q_lane_p16
+  return vld2q_lane_p16(a, b, 7);
+  // CHECK: ld2 {v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+poly64x2x2_t test_vld2q_lane_p64(poly64_t const *a, poly64x2x2_t b) {
+  // CHECK-LABEL: test_vld2q_lane_p64
+  return vld2q_lane_p64(a, b, 1);
+  // CHECK: ld2 {v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+uint8x8x2_t test_vld2_lane_u8(uint8_t const *a, uint8x8x2_t b) {
+  // CHECK-LABEL: test_vld2_lane_u8
+  return vld2_lane_u8(a, b, 7);
+  // CHECK: ld2 {v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+uint16x4x2_t test_vld2_lane_u16(uint16_t const *a, uint16x4x2_t b) {
+  // CHECK-LABEL: test_vld2_lane_u16
+  return vld2_lane_u16(a, b, 3);
+  // CHECK: ld2 {v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+uint32x2x2_t test_vld2_lane_u32(uint32_t const *a, uint32x2x2_t b) {
+  // CHECK-LABEL: test_vld2_lane_u32
+  return vld2_lane_u32(a, b, 1);
+  // CHECK: ld2 {v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+uint64x1x2_t test_vld2_lane_u64(uint64_t const *a, uint64x1x2_t b) {
+  // CHECK-LABEL: test_vld2_lane_u64
+  return vld2_lane_u64(a, b, 0);
+  // CHECK: ld2 {v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+int8x8x2_t test_vld2_lane_s8(int8_t const *a, int8x8x2_t b) {
+  // CHECK-LABEL: test_vld2_lane_s8
+  return vld2_lane_s8(a, b, 7);
+  // CHECK: ld2 {v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+int16x4x2_t test_vld2_lane_s16(int16_t const *a, int16x4x2_t b) {
+  // CHECK-LABEL: test_vld2_lane_s16
+  return vld2_lane_s16(a, b, 3);
+  // CHECK: ld2 {v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+int32x2x2_t test_vld2_lane_s32(int32_t const *a, int32x2x2_t b) {
+  // CHECK-LABEL: test_vld2_lane_s32
+  return vld2_lane_s32(a, b, 1);
+  // CHECK: ld2 {v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+int64x1x2_t test_vld2_lane_s64(int64_t const *a, int64x1x2_t b) {
+  // CHECK-LABEL: test_vld2_lane_s64
+  return vld2_lane_s64(a, b, 0);
+  // CHECK: ld2 {v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+float16x4x2_t test_vld2_lane_f16(float16_t const *a, float16x4x2_t b) {
+  // CHECK-LABEL: test_vld2_lane_f16
+  return vld2_lane_f16(a, b, 3);
+  // CHECK: ld2 {v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+float32x2x2_t test_vld2_lane_f32(float32_t const *a, float32x2x2_t b) {
+  // CHECK-LABEL: test_vld2_lane_f32
+  return vld2_lane_f32(a, b, 1);
+  // CHECK: ld2 {v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+float64x1x2_t test_vld2_lane_f64(float64_t const *a, float64x1x2_t b) {
+  // CHECK-LABEL: test_vld2_lane_f64
+  return vld2_lane_f64(a, b, 0);
+  // CHECK: ld2 {v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+poly8x8x2_t test_vld2_lane_p8(poly8_t const *a, poly8x8x2_t b) {
+  // CHECK-LABEL: test_vld2_lane_p8
+  return vld2_lane_p8(a, b, 7);
+  // CHECK: ld2 {v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+poly16x4x2_t test_vld2_lane_p16(poly16_t const *a, poly16x4x2_t b) {
+  // CHECK-LABEL: test_vld2_lane_p16
+  return vld2_lane_p16(a, b, 3);
+  // CHECK: ld2 {v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+poly64x1x2_t test_vld2_lane_p64(poly64_t const *a, poly64x1x2_t b) {
+  // CHECK-LABEL: test_vld2_lane_p64
+  return vld2_lane_p64(a, b, 0);
+  // CHECK: ld2 {v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+uint16x8x3_t test_vld3q_lane_u16(uint16_t const *a, uint16x8x3_t b) {
+  // CHECK-LABEL: test_vld3q_lane_u16
+  return vld3q_lane_u16(a, b, 7);
+  // CHECK: ld3 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+uint32x4x3_t test_vld3q_lane_u32(uint32_t const *a, uint32x4x3_t b) {
+  // CHECK-LABEL: test_vld3q_lane_u32
+  return vld3q_lane_u32(a, b, 3);
+  // CHECK: ld3 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+uint64x2x3_t test_vld3q_lane_u64(uint64_t const *a, uint64x2x3_t b) {
+  // CHECK-LABEL: test_vld3q_lane_u64
+  return vld3q_lane_u64(a, b, 1);
+  // CHECK: ld3 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+int16x8x3_t test_vld3q_lane_s16(int16_t const *a, int16x8x3_t b) {
+  // CHECK-LABEL: test_vld3q_lane_s16
+  return vld3q_lane_s16(a, b, 7);
+  // CHECK: ld3 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+int32x4x3_t test_vld3q_lane_s32(int32_t const *a, int32x4x3_t b) {
+  // CHECK-LABEL: test_vld3q_lane_s32
+  return vld3q_lane_s32(a, b, 3);
+  // CHECK: ld3 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+int64x2x3_t test_vld3q_lane_s64(int64_t const *a, int64x2x3_t b) {
+  // CHECK-LABEL: test_vld3q_lane_s64
+  return vld3q_lane_s64(a, b, 1);
+  // CHECK: ld3 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+float16x8x3_t test_vld3q_lane_f16(float16_t const *a, float16x8x3_t b) {
+  // CHECK-LABEL: test_vld3q_lane_f16
+  return vld3q_lane_f16(a, b, 7);
+  // CHECK: ld3 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+float32x4x3_t test_vld3q_lane_f32(float32_t const *a, float32x4x3_t b) {
+  // CHECK-LABEL: test_vld3q_lane_f32
+  return vld3q_lane_f32(a, b, 3);
+  // CHECK: ld3 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+float64x2x3_t test_vld3q_lane_f64(float64_t const *a, float64x2x3_t b) {
+  // CHECK-LABEL: test_vld3q_lane_f64
+  return vld3q_lane_f64(a, b, 1);
+  // CHECK: ld3 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+poly8x16x3_t test_vld3q_lane_p8(poly8_t const *a, poly8x16x3_t b) {
+  // CHECK-LABEL: test_vld3q_lane_p8
+  return vld3q_lane_p8(a, b, 15);
+  // CHECK: ld3 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+poly16x8x3_t test_vld3q_lane_p16(poly16_t const *a, poly16x8x3_t b) {
+  // CHECK-LABEL: test_vld3q_lane_p16
+  return vld3q_lane_p16(a, b, 7);
+  // CHECK: ld3 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+poly64x2x3_t test_vld3q_lane_p64(poly64_t const *a, poly64x2x3_t b) {
+  // CHECK-LABEL: test_vld3q_lane_p64
+  return vld3q_lane_p64(a, b, 1);
+  // CHECK: ld3 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+uint8x8x3_t test_vld3_lane_u8(uint8_t const *a, uint8x8x3_t b) {
+  // CHECK-LABEL: test_vld3_lane_u8
+  return vld3_lane_u8(a, b, 7);
+  // CHECK: ld3 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+uint16x4x3_t test_vld3_lane_u16(uint16_t const *a, uint16x4x3_t b) {
+  // CHECK-LABEL: test_vld3_lane_u16
+  return vld3_lane_u16(a, b, 3);
+  // CHECK: ld3 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+uint32x2x3_t test_vld3_lane_u32(uint32_t const *a, uint32x2x3_t b) {
+  // CHECK-LABEL: test_vld3_lane_u32
+  return vld3_lane_u32(a, b, 1);
+  // CHECK: ld3 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+uint64x1x3_t test_vld3_lane_u64(uint64_t const *a, uint64x1x3_t b) {
+  // CHECK-LABEL: test_vld3_lane_u64
+  return vld3_lane_u64(a, b, 0);
+  // CHECK: ld3 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+int8x8x3_t test_vld3_lane_s8(int8_t const *a, int8x8x3_t b) {
+  // CHECK-LABEL: test_vld3_lane_s8
+  return vld3_lane_s8(a, b, 7);
+  // CHECK: ld3 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+int16x4x3_t test_vld3_lane_s16(int16_t const *a, int16x4x3_t b) {
+  // CHECK-LABEL: test_vld3_lane_s16
+  return vld3_lane_s16(a, b, 3);
+  // CHECK: ld3 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+int32x2x3_t test_vld3_lane_s32(int32_t const *a, int32x2x3_t b) {
+  // CHECK-LABEL: test_vld3_lane_s32
+  return vld3_lane_s32(a, b, 1);
+  // CHECK: ld3 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+int64x1x3_t test_vld3_lane_s64(int64_t const *a, int64x1x3_t b) {
+  // CHECK-LABEL: test_vld3_lane_s64
+  return vld3_lane_s64(a, b, 0);
+  // CHECK: ld3 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+float16x4x3_t test_vld3_lane_f16(float16_t const *a, float16x4x3_t b) {
+  // CHECK-LABEL: test_vld3_lane_f16
+  return vld3_lane_f16(a, b, 3);
+  // CHECK: ld3 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+float32x2x3_t test_vld3_lane_f32(float32_t const *a, float32x2x3_t b) {
+  // CHECK-LABEL: test_vld3_lane_f32
+  return vld3_lane_f32(a, b, 1);
+  // CHECK: ld3 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+float64x1x3_t test_vld3_lane_f64(float64_t const *a, float64x1x3_t b) {
+  // CHECK-LABEL: test_vld3_lane_f64
+  return vld3_lane_f64(a, b, 0);
+  // CHECK: ld3 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+poly8x8x3_t test_vld3_lane_p8(poly8_t const *a, poly8x8x3_t b) {
+  // CHECK-LABEL: test_vld3_lane_p8
+  return vld3_lane_p8(a, b, 7);
+  // CHECK: ld3 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+poly16x4x3_t test_vld3_lane_p16(poly16_t const *a, poly16x4x3_t b) {
+  // CHECK-LABEL: test_vld3_lane_p16
+  return vld3_lane_p16(a, b, 3);
+  // CHECK: ld3 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+poly64x1x3_t test_vld3_lane_p64(poly64_t const *a, poly64x1x3_t b) {
+  // CHECK-LABEL: test_vld3_lane_p64
+  return vld3_lane_p64(a, b, 0);
+  // CHECK: ld3 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+uint8x16x4_t test_vld4q_lane_u8(uint8_t const *a, uint8x16x4_t b) {
+  // CHECK-LABEL: test_vld4q_lane_u8
+  return vld4q_lane_u8(a, b, 15);
+  // CHECK: ld4 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+uint16x8x4_t test_vld4q_lane_u16(uint16_t const *a, uint16x8x4_t b) {
+  // CHECK-LABEL: test_vld4q_lane_u16
+  return vld4q_lane_u16(a, b, 7);
+  // CHECK: ld4 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+uint32x4x4_t test_vld4q_lane_u32(uint32_t const *a, uint32x4x4_t b) {
+  // CHECK-LABEL: test_vld4q_lane_u32
+  return vld4q_lane_u32(a, b, 3);
+  // CHECK: ld4 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+uint64x2x4_t test_vld4q_lane_u64(uint64_t const *a, uint64x2x4_t b) {
+  // CHECK-LABEL: test_vld4q_lane_u64
+  return vld4q_lane_u64(a, b, 1);
+  // CHECK: ld4 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+int8x16x4_t test_vld4q_lane_s8(int8_t const *a, int8x16x4_t b) {
+  // CHECK-LABEL: test_vld4q_lane_s8
+  return vld4q_lane_s8(a, b, 15);
+  // CHECK: ld4 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+int16x8x4_t test_vld4q_lane_s16(int16_t const *a, int16x8x4_t b) {
+  // CHECK-LABEL: test_vld4q_lane_s16
+  return vld4q_lane_s16(a, b, 7);
+  // CHECK: ld4 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+int32x4x4_t test_vld4q_lane_s32(int32_t const *a, int32x4x4_t b) {
+  // CHECK-LABEL: test_vld4q_lane_s32
+  return vld4q_lane_s32(a, b, 3);
+  // CHECK: ld4 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+int64x2x4_t test_vld4q_lane_s64(int64_t const *a, int64x2x4_t b) {
+  // CHECK-LABEL: test_vld4q_lane_s64
+  return vld4q_lane_s64(a, b, 1);
+  // CHECK: ld4 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+float16x8x4_t test_vld4q_lane_f16(float16_t const *a, float16x8x4_t b) {
+  // CHECK-LABEL: test_vld4q_lane_f16
+  return vld4q_lane_f16(a, b, 7);
+  // CHECK: ld4 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+float32x4x4_t test_vld4q_lane_f32(float32_t const *a, float32x4x4_t b) {
+  // CHECK-LABEL: test_vld4q_lane_f32
+  return vld4q_lane_f32(a, b, 3);
+  // CHECK: ld4 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+float64x2x4_t test_vld4q_lane_f64(float64_t const *a, float64x2x4_t b) {
+  // CHECK-LABEL: test_vld4q_lane_f64
+  return vld4q_lane_f64(a, b, 1);
+  // CHECK: ld4 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+poly8x16x4_t test_vld4q_lane_p8(poly8_t const *a, poly8x16x4_t b) {
+  // CHECK-LABEL: test_vld4q_lane_p8
+  return vld4q_lane_p8(a, b, 15);
+  // CHECK: ld4 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+poly16x8x4_t test_vld4q_lane_p16(poly16_t const *a, poly16x8x4_t b) {
+  // CHECK-LABEL: test_vld4q_lane_p16
+  return vld4q_lane_p16(a, b, 7);
+  // CHECK: ld4 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+poly64x2x4_t test_vld4q_lane_p64(poly64_t const *a, poly64x2x4_t b) {
+  // CHECK-LABEL: test_vld4q_lane_p64
+  return vld4q_lane_p64(a, b, 1);
+  // CHECK: ld4 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+uint8x8x4_t test_vld4_lane_u8(uint8_t const *a, uint8x8x4_t b) {
+  // CHECK-LABEL: test_vld4_lane_u8
+  return vld4_lane_u8(a, b, 7);
+  // CHECK: ld4 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+uint16x4x4_t test_vld4_lane_u16(uint16_t const *a, uint16x4x4_t b) {
+  // CHECK-LABEL: test_vld4_lane_u16
+  return vld4_lane_u16(a, b, 3);
+  // CHECK: ld4 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+uint32x2x4_t test_vld4_lane_u32(uint32_t const *a, uint32x2x4_t b) {
+  // CHECK-LABEL: test_vld4_lane_u32
+  return vld4_lane_u32(a, b, 1);
+  // CHECK: ld4 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+uint64x1x4_t test_vld4_lane_u64(uint64_t const *a, uint64x1x4_t b) {
+  // CHECK-LABEL: test_vld4_lane_u64
+  return vld4_lane_u64(a, b, 0);
+  // CHECK: ld4 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+int8x8x4_t test_vld4_lane_s8(int8_t const *a, int8x8x4_t b) {
+  // CHECK-LABEL: test_vld4_lane_s8
+  return vld4_lane_s8(a, b, 7);
+  // CHECK: ld4 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+int16x4x4_t test_vld4_lane_s16(int16_t const *a, int16x4x4_t b) {
+  // CHECK-LABEL: test_vld4_lane_s16
+  return vld4_lane_s16(a, b, 3);
+  // CHECK: ld4 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+int32x2x4_t test_vld4_lane_s32(int32_t const *a, int32x2x4_t b) {
+  // CHECK-LABEL: test_vld4_lane_s32
+  return vld4_lane_s32(a, b, 1);
+  // CHECK: ld4 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+int64x1x4_t test_vld4_lane_s64(int64_t const *a, int64x1x4_t b) {
+  // CHECK-LABEL: test_vld4_lane_s64
+  return vld4_lane_s64(a, b, 0);
+  // CHECK: ld4 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+float16x4x4_t test_vld4_lane_f16(float16_t const *a, float16x4x4_t b) {
+  // CHECK-LABEL: test_vld4_lane_f16
+  return vld4_lane_f16(a, b, 3);
+  // CHECK: ld4 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+float32x2x4_t test_vld4_lane_f32(float32_t const *a, float32x2x4_t b) {
+  // CHECK-LABEL: test_vld4_lane_f32
+  return vld4_lane_f32(a, b, 1);
+  // CHECK: ld4 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+float64x1x4_t test_vld4_lane_f64(float64_t const *a, float64x1x4_t b) {
+  // CHECK-LABEL: test_vld4_lane_f64
+  return vld4_lane_f64(a, b, 0);
+  // CHECK: ld4 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+poly8x8x4_t test_vld4_lane_p8(poly8_t const *a, poly8x8x4_t b) {
+  // CHECK-LABEL: test_vld4_lane_p8
+  return vld4_lane_p8(a, b, 7);
+  // CHECK: ld4 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+poly16x4x4_t test_vld4_lane_p16(poly16_t const *a, poly16x4x4_t b) {
+  // CHECK-LABEL: test_vld4_lane_p16
+  return vld4_lane_p16(a, b, 3);
+  // CHECK: ld4 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+poly64x1x4_t test_vld4_lane_p64(poly64_t const *a, poly64x1x4_t b) {
+  // CHECK-LABEL: test_vld4_lane_p64
+  return vld4_lane_p64(a, b, 0);
+  // CHECK: ld4 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_lane_u8(uint8_t const *a, uint8x16_t b) {
+  // CHECK-LABEL: test_vst1q_lane_u8
+  vst1q_lane_u8(a, b, 15);
+  // CHECK: st1 {v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_lane_u16(uint16_t const *a, uint16x8_t b) {
+  // CHECK-LABEL: test_vst1q_lane_u16
+  vst1q_lane_u16(a, b, 7);
+  // CHECK: st1 {v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_lane_u32(uint32_t const *a, uint32x4_t b) {
+  // CHECK-LABEL: test_vst1q_lane_u32
+  vst1q_lane_u32(a, b, 3);
+  // CHECK: st1 {v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_lane_u64(uint64_t const *a, uint64x2_t b) {
+  // CHECK-LABEL: test_vst1q_lane_u64
+  vst1q_lane_u64(a, b, 1);
+  // CHECK: st1 {v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_lane_s8(int8_t const *a, int8x16_t b) {
+  // CHECK-LABEL: test_vst1q_lane_s8
+  vst1q_lane_s8(a, b, 15);
+  // CHECK: st1 {v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_lane_s16(int16_t const *a, int16x8_t b) {
+  // CHECK-LABEL: test_vst1q_lane_s16
+  vst1q_lane_s16(a, b, 7);
+  // CHECK: st1 {v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_lane_s32(int32_t const *a, int32x4_t b) {
+  // CHECK-LABEL: test_vst1q_lane_s32
+  vst1q_lane_s32(a, b, 3);
+  // CHECK: st1 {v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_lane_s64(int64_t const *a, int64x2_t b) {
+  // CHECK-LABEL: test_vst1q_lane_s64
+  vst1q_lane_s64(a, b, 1);
+  // CHECK: st1 {v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_lane_f16(float16_t const *a, float16x8_t b) {
+  // CHECK-LABEL: test_vst1q_lane_f16
+  vst1q_lane_f16(a, b, 7);
+  // CHECK: st1 {v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_lane_f32(float32_t const *a, float32x4_t b) {
+  // CHECK-LABEL: test_vst1q_lane_f32
+  vst1q_lane_f32(a, b, 3);
+  // CHECK: st1 {v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_lane_f64(float64_t const *a, float64x2_t b) {
+  // CHECK-LABEL: test_vst1q_lane_f64
+  vst1q_lane_f64(a, b, 1);
+  // CHECK: st1 {v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+poly8x16_t test_vst1q_lane_p8(poly8_t const *a, poly8x16_t b) {
+  // CHECK-LABEL: test_vst1q_lane_p8
+  vst1q_lane_p8(a, b, 15);
+  // CHECK: st1 {v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_lane_p16(poly16_t const *a, poly16x8_t b) {
+  // CHECK-LABEL: test_vst1q_lane_p16
+  vst1q_lane_p16(a, b, 7);
+  // CHECK: st1 {v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_lane_p64(poly64_t const *a, poly64x2_t b) {
+  // CHECK-LABEL: test_vst1q_lane_p64
+  vst1q_lane_p64(a, b, 1);
+  // CHECK: st1 {v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_lane_u8(uint8_t const *a, uint8x8_t b) {
+  // CHECK-LABEL: test_vst1_lane_u8
+  vst1_lane_u8(a, b, 7);
+  // CHECK: st1 {v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_lane_u16(uint16_t const *a, uint16x4_t b) {
+  // CHECK-LABEL: test_vst1_lane_u16
+  vst1_lane_u16(a, b, 3);
+  // CHECK: st1 {v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_lane_u32(uint32_t const *a, uint32x2_t b) {
+  // CHECK-LABEL: test_vst1_lane_u32
+  vst1_lane_u32(a, b, 1);
+  // CHECK: st1 {v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_lane_u64(uint64_t const *a, uint64x1_t b) {
+  // CHECK-LABEL: test_vst1_lane_u64
+  vst1_lane_u64(a, b, 0);
+  // CHECK: st1 {v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_lane_s8(int8_t const *a, int8x8_t b) {
+  // CHECK-LABEL: test_vst1_lane_s8
+  vst1_lane_s8(a, b, 7);
+  // CHECK: st1 {v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_lane_s16(int16_t const *a, int16x4_t b) {
+  // CHECK-LABEL: test_vst1_lane_s16
+  vst1_lane_s16(a, b, 3);
+  // CHECK: st1 {v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_lane_s32(int32_t const *a, int32x2_t b) {
+  // CHECK-LABEL: test_vst1_lane_s32
+  vst1_lane_s32(a, b, 1);
+  // CHECK: st1 {v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_lane_s64(int64_t const *a, int64x1_t b) {
+  // CHECK-LABEL: test_vst1_lane_s64
+  vst1_lane_s64(a, b, 0);
+  // CHECK: st1 {v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_lane_f16(float16_t const *a, float16x4_t b) {
+  // CHECK-LABEL: test_vst1_lane_f16
+  vst1_lane_f16(a, b, 3);
+  // CHECK: st1 {v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_lane_f32(float32_t const *a, float32x2_t b) {
+  // CHECK-LABEL: test_vst1_lane_f32
+  vst1_lane_f32(a, b, 1);
+  // CHECK: st1 {v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_lane_f64(float64_t const *a, float64x1_t b) {
+  // CHECK-LABEL: test_vst1_lane_f64
+  vst1_lane_f64(a, b, 0);
+  // CHECK: st1 {v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_lane_p8(poly8_t const *a, poly8x8_t b) {
+  // CHECK-LABEL: test_vst1_lane_p8
+  vst1_lane_p8(a, b, 7);
+  // CHECK: st1 {v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_lane_p16(poly16_t const *a, poly16x4_t b) {
+  // CHECK-LABEL: test_vst1_lane_p16
+  vst1_lane_p16(a, b, 3);
+  // CHECK: st1 {v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_lane_p64(poly64_t const *a, poly64x1_t b) {
+  // CHECK-LABEL: test_vst1_lane_p64
+  vst1_lane_p64(a, b, 0);
+  // CHECK: st1 {v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_lane_u8(uint8_t const *a, uint8x16x2_t b) {
+  // CHECK-LABEL: test_vst2q_lane_u8
+  vst2q_lane_u8(a, b, 15);
+  // CHECK: st2 {v{{[0-9]+}}.b, v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_lane_u16(uint16_t const *a, uint16x8x2_t b) {
+  // CHECK-LABEL: test_vst2q_lane_u16
+  vst2q_lane_u16(a, b, 7);
+  // CHECK: st2 {v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_lane_u32(uint32_t const *a, uint32x4x2_t b) {
+  // CHECK-LABEL: test_vst2q_lane_u32
+  vst2q_lane_u32(a, b, 3);
+  // CHECK: st2 {v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_lane_u64(uint64_t const *a, uint64x2x2_t b) {
+  // CHECK-LABEL: test_vst2q_lane_u64
+  vst2q_lane_u64(a, b, 1);
+  // CHECK: st2 {v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_lane_s8(int8_t const *a, int8x16x2_t b) {
+  // CHECK-LABEL: test_vst2q_lane_s8
+  vst2q_lane_s8(a, b, 15);
+  // CHECK: st2 {v{{[0-9]+}}.b, v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_lane_s16(int16_t const *a, int16x8x2_t b) {
+  // CHECK-LABEL: test_vst2q_lane_s16
+  vst2q_lane_s16(a, b, 7);
+  // CHECK: st2 {v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_lane_s32(int32_t const *a, int32x4x2_t b) {
+  // CHECK-LABEL: test_vst2q_lane_s32
+  vst2q_lane_s32(a, b, 3);
+  // CHECK: st2 {v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_lane_s64(int64_t const *a, int64x2x2_t b) {
+  // CHECK-LABEL: test_vst2q_lane_s64
+  vst2q_lane_s64(a, b, 1);
+  // CHECK: st2 {v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_lane_f16(float16_t const *a, float16x8x2_t b) {
+  // CHECK-LABEL: test_vst2q_lane_f16
+  vst2q_lane_f16(a, b, 7);
+  // CHECK: st2 {v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_lane_f32(float32_t const *a, float32x4x2_t b) {
+  // CHECK-LABEL: test_vst2q_lane_f32
+  vst2q_lane_f32(a, b, 3);
+  // CHECK: st2 {v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_lane_f64(float64_t const *a, float64x2x2_t b) {
+  // CHECK-LABEL: test_vst2q_lane_f64
+  vst2q_lane_f64(a, b, 1);
+  // CHECK: st2 {v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_lane_p8(poly8_t const *a, poly8x16x2_t b) {
+  // CHECK-LABEL: test_vst2q_lane_p8
+  vst2q_lane_p8(a, b, 15);
+  // CHECK: st2 {v{{[0-9]+}}.b, v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_lane_p16(poly16_t const *a, poly16x8x2_t b) {
+  // CHECK-LABEL: test_vst2q_lane_p16
+  vst2q_lane_p16(a, b, 7);
+  // CHECK: st2 {v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_lane_p64(poly64_t const *a, poly64x2x2_t b) {
+  // CHECK-LABEL: test_vst2q_lane_p64
+  vst2q_lane_p64(a, b, 1);
+  // CHECK: st2 {v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_lane_u8(uint8_t const *a, uint8x8x2_t b) {
+  // CHECK-LABEL: test_vst2_lane_u8
+  vst2_lane_u8(a, b, 7);
+  // CHECK: st2 {v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_lane_u16(uint16_t const *a, uint16x4x2_t b) {
+  // CHECK-LABEL: test_vst2_lane_u16
+  vst2_lane_u16(a, b, 3);
+  // CHECK: st2 {v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_lane_u32(uint32_t const *a, uint32x2x2_t b) {
+  // CHECK-LABEL: test_vst2_lane_u32
+  vst2_lane_u32(a, b, 1);
+  // CHECK: st2 {v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_lane_u64(uint64_t const *a, uint64x1x2_t b) {
+  // CHECK-LABEL: test_vst2_lane_u64
+  vst2_lane_u64(a, b, 0);
+  // CHECK: st2 {v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_lane_s8(int8_t const *a, int8x8x2_t b) {
+  // CHECK-LABEL: test_vst2_lane_s8
+  vst2_lane_s8(a, b, 7);
+  // CHECK: st2 {v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_lane_s16(int16_t const *a, int16x4x2_t b) {
+  // CHECK-LABEL: test_vst2_lane_s16
+  vst2_lane_s16(a, b, 3);
+  // CHECK: st2 {v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_lane_s32(int32_t const *a, int32x2x2_t b) {
+  // CHECK-LABEL: test_vst2_lane_s32
+  vst2_lane_s32(a, b, 1);
+  // CHECK: st2 {v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_lane_s64(int64_t const *a, int64x1x2_t b) {
+  // CHECK-LABEL: test_vst2_lane_s64
+  vst2_lane_s64(a, b, 0);
+  // CHECK: st2 {v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_lane_f16(float16_t const *a, float16x4x2_t b) {
+  // CHECK-LABEL: test_vst2_lane_f16
+  vst2_lane_f16(a, b, 3);
+  // CHECK: st2 {v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_lane_f32(float32_t const *a, float32x2x2_t b) {
+  // CHECK-LABEL: test_vst2_lane_f32
+  vst2_lane_f32(a, b, 1);
+  // CHECK: st2 {v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_lane_f64(float64_t const *a, float64x1x2_t b) {
+  // CHECK-LABEL: test_vst2_lane_f64
+  vst2_lane_f64(a, b, 0);
+  // CHECK: st2 {v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_lane_p8(poly8_t const *a, poly8x8x2_t b) {
+  // CHECK-LABEL: test_vst2_lane_p8
+  vst2_lane_p8(a, b, 7);
+  // CHECK: st2 {v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_lane_p16(poly16_t const *a, poly16x4x2_t b) {
+  // CHECK-LABEL: test_vst2_lane_p16
+  vst2_lane_p16(a, b, 3);
+  // CHECK: st2 {v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_lane_p64(poly64_t const *a, poly64x1x2_t b) {
+  // CHECK-LABEL: test_vst2_lane_p64
+  vst2_lane_p64(a, b, 0);
+  // CHECK: st2 {v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_lane_u8(uint8_t const *a, uint8x16x3_t b) {
+  // CHECK-LABEL: test_vst3q_lane_u8
+  vst3q_lane_u8(a, b, 15);
+  // CHECK: st3 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_lane_u16(uint16_t const *a, uint16x8x3_t b) {
+  // CHECK-LABEL: test_vst3q_lane_u16
+  vst3q_lane_u16(a, b, 7);
+  // CHECK: st3 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_lane_u32(uint32_t const *a, uint32x4x3_t b) {
+  // CHECK-LABEL: test_vst3q_lane_u32
+  vst3q_lane_u32(a, b, 3);
+  // CHECK: st3 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_lane_u64(uint64_t const *a, uint64x2x3_t b) {
+  // CHECK-LABEL: test_vst3q_lane_u64
+  vst3q_lane_u64(a, b, 1);
+  // CHECK: st3 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_lane_s8(int8_t const *a, int8x16x3_t b) {
+  // CHECK-LABEL: test_vst3q_lane_s8
+  vst3q_lane_s8(a, b, 15);
+  // CHECK: st3 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_lane_s16(int16_t const *a, int16x8x3_t b) {
+  // CHECK-LABEL: test_vst3q_lane_s16
+  vst3q_lane_s16(a, b, 7);
+  // CHECK: st3 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_lane_s32(int32_t const *a, int32x4x3_t b) {
+  // CHECK-LABEL: test_vst3q_lane_s32
+  vst3q_lane_s32(a, b, 3);
+  // CHECK: st3 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_lane_s64(int64_t const *a, int64x2x3_t b) {
+  // CHECK-LABEL: test_vst3q_lane_s64
+  vst3q_lane_s64(a, b, 1);
+  // CHECK: st3 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_lane_f16(float16_t const *a, float16x8x3_t b) {
+  // CHECK-LABEL: test_vst3q_lane_f16
+  vst3q_lane_f16(a, b, 7);
+  // CHECK: st3 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_lane_f32(float32_t const *a, float32x4x3_t b) {
+  // CHECK-LABEL: test_vst3q_lane_f32
+  vst3q_lane_f32(a, b, 3);
+  // CHECK: st3 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_lane_f64(float64_t const *a, float64x2x3_t b) {
+  // CHECK-LABEL: test_vst3q_lane_f64
+  vst3q_lane_f64(a, b, 1);
+  // CHECK: st3 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_lane_p8(poly8_t const *a, poly8x16x3_t b) {
+  // CHECK-LABEL: test_vst3q_lane_p8
+  vst3q_lane_p8(a, b, 15);
+  // CHECK: st3 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_lane_p16(poly16_t const *a, poly16x8x3_t b) {
+  // CHECK-LABEL: test_vst3q_lane_p16
+  vst3q_lane_p16(a, b, 7);
+  // CHECK: st3 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_lane_p64(poly64_t const *a, poly64x2x3_t b) {
+  // CHECK-LABEL: test_vst3q_lane_p64
+  vst3q_lane_p64(a, b, 1);
+  // CHECK: st3 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_lane_u8(uint8_t const *a, uint8x8x3_t b) {
+  // CHECK-LABEL: test_vst3_lane_u8
+  vst3_lane_u8(a, b, 7);
+  // CHECK: st3 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_lane_u16(uint16_t const *a, uint16x4x3_t b) {
+  // CHECK-LABEL: test_vst3_lane_u16
+  vst3_lane_u16(a, b, 3);
+  // CHECK: st3 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_lane_u32(uint32_t const *a, uint32x2x3_t b) {
+  // CHECK-LABEL: test_vst3_lane_u32
+  vst3_lane_u32(a, b, 1);
+  // CHECK: st3 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_lane_u64(uint64_t const *a, uint64x1x3_t b) {
+  // CHECK-LABEL: test_vst3_lane_u64
+  vst3_lane_u64(a, b, 0);
+  // CHECK: st3 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_lane_s8(int8_t const *a, int8x8x3_t b) {
+  // CHECK-LABEL: test_vst3_lane_s8
+  vst3_lane_s8(a, b, 7);
+  // CHECK: st3 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_lane_s16(int16_t const *a, int16x4x3_t b) {
+  // CHECK-LABEL: test_vst3_lane_s16
+  vst3_lane_s16(a, b, 3);
+  // CHECK: st3 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_lane_s32(int32_t const *a, int32x2x3_t b) {
+  // CHECK-LABEL: test_vst3_lane_s32
+  vst3_lane_s32(a, b, 1);
+  // CHECK: st3 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_lane_s64(int64_t const *a, int64x1x3_t b) {
+  // CHECK-LABEL: test_vst3_lane_s64
+  vst3_lane_s64(a, b, 0);
+  // CHECK: st3 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_lane_f16(float16_t const *a, float16x4x3_t b) {
+  // CHECK-LABEL: test_vst3_lane_f16
+  vst3_lane_f16(a, b, 3);
+  // CHECK: st3 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_lane_f32(float32_t const *a, float32x2x3_t b) {
+  // CHECK-LABEL: test_vst3_lane_f32
+  vst3_lane_f32(a, b, 1);
+  // CHECK: st3 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_lane_f64(float64_t const *a, float64x1x3_t b) {
+  // CHECK-LABEL: test_vst3_lane_f64
+  vst3_lane_f64(a, b, 0);
+  // CHECK: st3 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_lane_p8(poly8_t const *a, poly8x8x3_t b) {
+  // CHECK-LABEL: test_vst3_lane_p8
+  vst3_lane_p8(a, b, 7);
+  // CHECK: st3 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_lane_p16(poly16_t const *a, poly16x4x3_t b) {
+  // CHECK-LABEL: test_vst3_lane_p16
+  vst3_lane_p16(a, b, 3);
+  // CHECK: st3 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_lane_p64(poly64_t const *a, poly64x1x3_t b) {
+  // CHECK-LABEL: test_vst3_lane_p64
+  vst3_lane_p64(a, b, 0);
+  // CHECK: st3 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_lane_u8(uint16_t const *a, uint8x16x4_t b) {
+  // CHECK-LABEL: test_vst4q_lane_u8
+  vst4q_lane_u8(a, b, 15);
+  // CHECK: st4 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_lane_u16(uint16_t const *a, uint16x8x4_t b) {
+  // CHECK-LABEL: test_vst4q_lane_u16
+  vst4q_lane_u16(a, b, 7);
+  // CHECK: st4 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_lane_u32(uint32_t const *a, uint32x4x4_t b) {
+  // CHECK-LABEL: test_vst4q_lane_u32
+  vst4q_lane_u32(a, b, 3);
+  // CHECK: st4 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_lane_u64(uint64_t const *a, uint64x2x4_t b) {
+  // CHECK-LABEL: test_vst4q_lane_u64
+  vst4q_lane_u64(a, b, 1);
+  // CHECK: st4 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_lane_s8(int16_t const *a, int8x16x4_t b) {
+  // CHECK-LABEL: test_vst4q_lane_s8
+  vst4q_lane_s8(a, b, 15);
+  // CHECK: st4 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_lane_s16(int16_t const *a, int16x8x4_t b) {
+  // CHECK-LABEL: test_vst4q_lane_s16
+  vst4q_lane_s16(a, b, 7);
+  // CHECK: st4 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_lane_s32(int32_t const *a, int32x4x4_t b) {
+  // CHECK-LABEL: test_vst4q_lane_s32
+  vst4q_lane_s32(a, b, 3);
+  // CHECK: st4 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_lane_s64(int64_t const *a, int64x2x4_t b) {
+  // CHECK-LABEL: test_vst4q_lane_s64
+  vst4q_lane_s64(a, b, 1);
+  // CHECK: st4 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_lane_f16(float16_t const *a, float16x8x4_t b) {
+  // CHECK-LABEL: test_vst4q_lane_f16
+  vst4q_lane_f16(a, b, 7);
+  // CHECK: st4 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_lane_f32(float32_t const *a, float32x4x4_t b) {
+  // CHECK-LABEL: test_vst4q_lane_f32
+  vst4q_lane_f32(a, b, 3);
+  // CHECK: st4 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_lane_f64(float64_t const *a, float64x2x4_t b) {
+  // CHECK-LABEL: test_vst4q_lane_f64
+  vst4q_lane_f64(a, b, 1);
+  // CHECK: st4 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_lane_p8(poly16_t const *a, poly8x16x4_t b) {
+  // CHECK-LABEL: test_vst4q_lane_p8
+  vst4q_lane_p8(a, b, 15);
+  // CHECK: st4 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[15], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_lane_p16(poly16_t const *a, poly16x8x4_t b) {
+  // CHECK-LABEL: test_vst4q_lane_p16
+  vst4q_lane_p16(a, b, 7);
+  // CHECK: st4 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_lane_p64(poly64_t const *a, poly64x2x4_t b) {
+  // CHECK-LABEL: test_vst4q_lane_p64
+  vst4q_lane_p64(a, b, 1);
+  // CHECK: st4 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_lane_u8(uint8_t const *a, uint8x8x4_t b) {
+  // CHECK-LABEL: test_vst4_lane_u8
+  vst4_lane_u8(a, b, 7);
+  // CHECK: st4 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_lane_u16(uint16_t const *a, uint16x4x4_t b) {
+  // CHECK-LABEL: test_vst4_lane_u16
+  vst4_lane_u16(a, b, 3);
+  // CHECK: st4 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_lane_u32(uint32_t const *a, uint32x2x4_t b) {
+  // CHECK-LABEL: test_vst4_lane_u32
+  vst4_lane_u32(a, b, 1);
+  // CHECK: st4 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_lane_u64(uint64_t const *a, uint64x1x4_t b) {
+  // CHECK-LABEL: test_vst4_lane_u64
+  vst4_lane_u64(a, b, 0);
+  // CHECK: st4 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_lane_s8(int8_t const *a, int8x8x4_t b) {
+  // CHECK-LABEL: test_vst4_lane_s8
+  vst4_lane_s8(a, b, 7);
+  // CHECK: st4 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_lane_s16(int16_t const *a, int16x4x4_t b) {
+  // CHECK-LABEL: test_vst4_lane_s16
+  vst4_lane_s16(a, b, 3);
+  // CHECK: st4 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_lane_s32(int32_t const *a, int32x2x4_t b) {
+  // CHECK-LABEL: test_vst4_lane_s32
+  vst4_lane_s32(a, b, 1);
+  // CHECK: st4 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_lane_s64(int64_t const *a, int64x1x4_t b) {
+  // CHECK-LABEL: test_vst4_lane_s64
+  vst4_lane_s64(a, b, 0);
+  // CHECK: st4 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_lane_f16(float16_t const *a, float16x4x4_t b) {
+  // CHECK-LABEL: test_vst4_lane_f16
+  vst4_lane_f16(a, b, 3);
+  // CHECK: st4 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_lane_f32(float32_t const *a, float32x2x4_t b) {
+  // CHECK-LABEL: test_vst4_lane_f32
+  vst4_lane_f32(a, b, 1);
+  // CHECK: st4 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_lane_f64(float64_t const *a, float64x1x4_t b) {
+  // CHECK-LABEL: test_vst4_lane_f64
+  vst4_lane_f64(a, b, 0);
+  // CHECK: st4 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_lane_p8(poly8_t const *a, poly8x8x4_t b) {
+  // CHECK-LABEL: test_vst4_lane_p8
+  vst4_lane_p8(a, b, 7);
+  // CHECK: st4 {v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_lane_p16(poly16_t const *a, poly16x4x4_t b) {
+  // CHECK-LABEL: test_vst4_lane_p16
+  vst4_lane_p16(a, b, 3);
+  // CHECK: st4 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_lane_p64(poly64_t const *a, poly64x1x4_t b) {
+  // CHECK-LABEL: test_vst4_lane_p64
+  vst4_lane_p64(a, b, 0);
+  // CHECK: st4 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[0], [{{x[0-9]+|sp}}]
+}
diff --git a/test/CodeGen/aarch64-neon-misc.c b/test/CodeGen/aarch64-neon-misc.c
new file mode 100644
index 0000000..08174d9
--- /dev/null
+++ b/test/CodeGen/aarch64-neon-misc.c
@@ -0,0 +1,2005 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
+// RUN:   -ffp-contract=fast -S -O3 -o - %s | FileCheck %s
+
+// Test new aarch64 intrinsics and types
+
+#include <arm_neon.h>
+
+// CHECK: test_vceqz_s8
+// CHECK: cmeq  {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x0
+uint8x8_t test_vceqz_s8(int8x8_t a) {
+  return vceqz_s8(a);
+}
+
+// CHECK: test_vceqz_s16
+// CHECK: cmeq  {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0x0
+uint16x4_t test_vceqz_s16(int16x4_t a) {
+  return vceqz_s16(a);
+}
+
+// CHECK: test_vceqz_s32
+// CHECK: cmeq  {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0x0
+uint32x2_t test_vceqz_s32(int32x2_t a) {
+  return vceqz_s32(a);
+}
+
+// CHECK: test_vceqz_s64
+// CHECK: cmeq {{d[0-9]+}}, {{d[0-9]+}}, #0x0
+uint64x1_t test_vceqz_s64(int64x1_t a) {
+  return vceqz_s64(a);
+}
+
+// CHECK: test_vceqz_u64
+// CHECK: cmeq {{d[0-9]+}}, {{d[0-9]+}}, #0x0
+uint64x1_t test_vceqz_u64(uint64x1_t a) {
+  return vceqz_u64(a);
+}
+
+// CHECK: test_vceqz_p64
+// CHECK: cmeq {{d[0-9]+}}, {{d[0-9]+}}, #0x0
+uint64x1_t test_vceqz_p64(poly64x1_t a) {
+  return vceqz_p64(a);
+}
+
+// CHECK: test_vceqzq_s8
+// CHECK: cmeq  {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x0
+uint8x16_t test_vceqzq_s8(int8x16_t a) {
+  return vceqzq_s8(a);
+}
+
+// CHECK: test_vceqzq_s16
+// CHECK: cmeq  {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0x0
+uint16x8_t test_vceqzq_s16(int16x8_t a) {
+  return vceqzq_s16(a);
+}
+
+// CHECK: test_vceqzq_s32
+// CHECK: cmeq  {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0x0
+uint32x4_t test_vceqzq_s32(int32x4_t a) {
+  return vceqzq_s32(a);
+}
+
+// CHECK: test_vceqzq_s64
+// CHECK: cmeq  {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0x0
+uint64x2_t test_vceqzq_s64(int64x2_t a) {
+  return vceqzq_s64(a);
+}
+
+// CHECK: test_vceqz_u8
+// CHECK: cmeq  {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x0
+uint8x8_t test_vceqz_u8(uint8x8_t a) {
+  return vceqz_u8(a);
+}
+
+// CHECK: test_vceqz_u16
+// CHECK: cmeq  {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0x0
+uint16x4_t test_vceqz_u16(uint16x4_t a) {
+  return vceqz_u16(a);
+}
+
+// CHECK: test_vceqz_u32
+// CHECK: cmeq  {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0x0
+uint32x2_t test_vceqz_u32(uint32x2_t a) {
+  return vceqz_u32(a);
+}
+
+// CHECK: test_vceqzq_u8
+// CHECK: cmeq  {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x0
+uint8x16_t test_vceqzq_u8(uint8x16_t a) {
+  return vceqzq_u8(a);
+}
+
+// CHECK: test_vceqzq_u16
+// CHECK: cmeq  {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0x0
+uint16x8_t test_vceqzq_u16(uint16x8_t a) {
+  return vceqzq_u16(a);
+}
+
+// CHECK: test_vceqzq_u32
+// CHECK: cmeq  {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0x0
+uint32x4_t test_vceqzq_u32(uint32x4_t a) {
+  return vceqzq_u32(a);
+}
+
+// CHECK: test_vceqzq_u64
+// CHECK: cmeq  {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0x0
+uint64x2_t test_vceqzq_u64(uint64x2_t a) {
+  return vceqzq_u64(a);
+}
+
+// CHECK: test_vceqz_f32
+// CHECK: fcmeq  {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0
+uint32x2_t test_vceqz_f32(float32x2_t a) {
+  return vceqz_f32(a);
+}
+
+// CHECK: test_vceqz_f64
+// CHECK: fcmeq  {{d[0-9]+}}, {{d[0-9]+}}, #0
+uint64x1_t test_vceqz_f64(float64x1_t a) {
+  return vceqz_f64(a);
+}
+
+// CHECK: test_vceqzq_f32
+// CHECK: fcmeq  {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0
+uint32x4_t test_vceqzq_f32(float32x4_t a) {
+  return vceqzq_f32(a);
+}
+
+// CHECK: test_vceqz_p16
+// CHECK: cmeq  {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0x0
+uint16x4_t test_vceqz_p16(poly16x4_t a) {
+  return vceqz_p16(a);
+}
+
+// CHECK: test_vceqzq_p16
+// CHECK: cmeq  {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0x0
+uint16x8_t test_vceqzq_p16(poly16x8_t a) {
+  return vceqzq_p16(a);
+}
+
+// CHECK: test_vceqzq_f64
+// CHECK: fcmeq  {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0
+uint64x2_t test_vceqzq_f64(float64x2_t a) {
+  return vceqzq_f64(a);
+}
+
+// CHECK: test_vceqzq_p64
+// CHECK: cmeq  {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0
+uint64x2_t test_vceqzq_p64(poly64x2_t a) {
+  return vceqzq_p64(a);
+}
+
+// CHECK: test_vcgez_s8
+// CHECK: cmge  {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x0
+uint8x8_t test_vcgez_s8(int8x8_t a) {
+  return vcgez_s8(a);
+}
+
+// CHECK: test_vcgez_s16
+// CHECK: cmge  {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0x0
+uint16x4_t test_vcgez_s16(int16x4_t a) {
+  return vcgez_s16(a);
+}
+
+// CHECK: test_vcgez_s32
+// CHECK: cmge  {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0x0
+uint32x2_t test_vcgez_s32(int32x2_t a) {
+  return vcgez_s32(a);
+}
+
+// CHECK: test_vcgez_s64
+// CHECK: cmge {{d[0-9]+}}, {{d[0-9]+}}, #0x0
+uint64x1_t test_vcgez_s64(int64x1_t a) {
+  return vcgez_s64(a);
+}
+
+// CHECK: test_vcgezq_s8
+// CHECK: cmge  {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x0
+uint8x16_t test_vcgezq_s8(int8x16_t a) {
+  return vcgezq_s8(a);
+}
+
+// CHECK: test_vcgezq_s16
+// CHECK: cmge  {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0x0
+uint16x8_t test_vcgezq_s16(int16x8_t a) {
+  return vcgezq_s16(a);
+}
+
+// CHECK: test_vcgezq_s32
+// CHECK: cmge  {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0x0
+uint32x4_t test_vcgezq_s32(int32x4_t a) {
+  return vcgezq_s32(a);
+}
+
+// CHECK: test_vcgezq_s64
+// CHECK: cmge  {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0x0
+uint64x2_t test_vcgezq_s64(int64x2_t a) {
+  return vcgezq_s64(a);
+}
+
+// CHECK: test_vcgez_f32
+// CHECK: fcmge  {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0
+uint32x2_t test_vcgez_f32(float32x2_t a) {
+  return vcgez_f32(a);
+}
+
+// CHECK: test_vcgezq_f32
+// CHECK: fcmge  {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0
+uint32x4_t test_vcgezq_f32(float32x4_t a) {
+  return vcgezq_f32(a);
+}
+
+// CHECK: test_vcgezq_f64
+// CHECK: fcmge  {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0
+uint64x2_t test_vcgezq_f64(float64x2_t a) {
+  return vcgezq_f64(a);
+}
+
+// CHECK: test_vclez_s8
+// CHECK: cmle  {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x0
+uint8x8_t test_vclez_s8(int8x8_t a) {
+  return vclez_s8(a);
+}
+
+// CHECK: test_vclez_s16
+// CHECK: cmle  {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0x0
+uint16x4_t test_vclez_s16(int16x4_t a) {
+  return vclez_s16(a);
+}
+
+// CHECK: test_vclez_s32
+// CHECK: cmle  {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0x0
+uint32x2_t test_vclez_s32(int32x2_t a) {
+  return vclez_s32(a);
+}
+
+// CHECK: test_vclez_s64
+// CHECK: cmle {{d[0-9]+}}, {{d[0-9]+}}, #0x0
+uint64x1_t test_vclez_s64(int64x1_t a) {
+  return vclez_s64(a);
+}
+
+// CHECK: test_vclezq_s8
+// CHECK: cmle  {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x0
+uint8x16_t test_vclezq_s8(int8x16_t a) {
+  return vclezq_s8(a);
+}
+
+// CHECK: test_vclezq_s16
+// CHECK: cmle  {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0x0
+uint16x8_t test_vclezq_s16(int16x8_t a) {
+  return vclezq_s16(a);
+}
+
+// CHECK: test_vclezq_s32
+// CHECK: cmle  {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0x0
+uint32x4_t test_vclezq_s32(int32x4_t a) {
+  return vclezq_s32(a);
+}
+
+// CHECK: test_vclezq_s64
+// CHECK: cmle  {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0x0
+uint64x2_t test_vclezq_s64(int64x2_t a) {
+  return vclezq_s64(a);
+}
+
+// CHECK: test_vclez_f32
+// CHECK: fcmle  {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0
+uint32x2_t test_vclez_f32(float32x2_t a) {
+  return vclez_f32(a);
+}
+
+// CHECK: test_vclezq_f32
+// CHECK: fcmle  {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0
+uint32x4_t test_vclezq_f32(float32x4_t a) {
+  return vclezq_f32(a);
+}
+
+// CHECK: test_vclezq_f64
+// CHECK: fcmle  {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0
+uint64x2_t test_vclezq_f64(float64x2_t a) {
+  return vclezq_f64(a);
+}
+
+// CHECK: test_vcgtz_s8
+// CHECK: cmgt  {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x0
+uint8x8_t test_vcgtz_s8(int8x8_t a) {
+  return vcgtz_s8(a);
+}
+
+// CHECK: test_vcgtz_s16
+// CHECK: cmgt  {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0x0
+uint16x4_t test_vcgtz_s16(int16x4_t a) {
+  return vcgtz_s16(a);
+}
+
+// CHECK: test_vcgtz_s32
+// CHECK: cmgt  {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0x0
+uint32x2_t test_vcgtz_s32(int32x2_t a) {
+  return vcgtz_s32(a);
+}
+
+// CHECK: test_vcgtz_s64
+// CHECK: cmgt {{d[0-9]+}}, {{d[0-9]+}}, #0x0
+uint64x1_t test_vcgtz_s64(int64x1_t a) {
+  return vcgtz_s64(a);
+}
+
+// CHECK: test_vcgtzq_s8
+// CHECK: cmgt  {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x0
+uint8x16_t test_vcgtzq_s8(int8x16_t a) {
+  return vcgtzq_s8(a);
+}
+
+// CHECK: test_vcgtzq_s16
+// CHECK: cmgt  {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0x0
+uint16x8_t test_vcgtzq_s16(int16x8_t a) {
+  return vcgtzq_s16(a);
+}
+
+// CHECK: test_vcgtzq_s32
+// CHECK: cmgt  {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0x0
+uint32x4_t test_vcgtzq_s32(int32x4_t a) {
+  return vcgtzq_s32(a);
+}
+
+// CHECK: test_vcgtzq_s64
+// CHECK: cmgt  {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0x0
+uint64x2_t test_vcgtzq_s64(int64x2_t a) {
+  return vcgtzq_s64(a);
+}
+
+// CHECK: test_vcgtz_f32
+// CHECK: fcmgt  {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0
+uint32x2_t test_vcgtz_f32(float32x2_t a) {
+  return vcgtz_f32(a);
+}
+
+// CHECK: test_vcgtzq_f32
+// CHECK: fcmgt  {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0
+uint32x4_t test_vcgtzq_f32(float32x4_t a) {
+  return vcgtzq_f32(a);
+}
+
+// CHECK: test_vcgtzq_f64
+// CHECK: fcmgt  {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0
+uint64x2_t test_vcgtzq_f64(float64x2_t a) {
+  return vcgtzq_f64(a);
+}
+
+// CHECK: test_vcltz_s8
+// CHECK: cmlt  {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0
+uint8x8_t test_vcltz_s8(int8x8_t a) {
+  return vcltz_s8(a);
+}
+
+// CHECK: test_vcltz_s16
+// CHECK: cmlt  {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0
+uint16x4_t test_vcltz_s16(int16x4_t a) {
+  return vcltz_s16(a);
+}
+
+// CHECK: test_vcltz_s32
+// CHECK: cmlt  {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0
+uint32x2_t test_vcltz_s32(int32x2_t a) {
+  return vcltz_s32(a);
+}
+
+// CHECK: test_vcltz_s64
+// CHECK: cmlt {{d[0-9]+}}, {{d[0-9]+}}, #0
+uint64x1_t test_vcltz_s64(int64x1_t a) {
+  return vcltz_s64(a);
+}
+
+// CHECK: test_vcltzq_s8
+// CHECK: cmlt  {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0
+uint8x16_t test_vcltzq_s8(int8x16_t a) {
+  return vcltzq_s8(a);
+}
+
+// CHECK: test_vcltzq_s16
+// CHECK: cmlt  {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0
+uint16x8_t test_vcltzq_s16(int16x8_t a) {
+  return vcltzq_s16(a);
+}
+
+// CHECK: test_vcltzq_s32
+// CHECK: cmlt  {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0
+uint32x4_t test_vcltzq_s32(int32x4_t a) {
+  return vcltzq_s32(a);
+}
+
+// CHECK: test_vcltzq_s64
+// CHECK: cmlt  {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0
+uint64x2_t test_vcltzq_s64(int64x2_t a) {
+  return vcltzq_s64(a);
+}
+
+// CHECK: test_vcltz_f32
+// CHECK: fcmlt  {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0
+uint32x2_t test_vcltz_f32(float32x2_t a) {
+  return vcltz_f32(a);
+}
+
+// CHECK: test_vcltzq_f32
+// CHECK: fcmlt  {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0
+uint32x4_t test_vcltzq_f32(float32x4_t a) {
+  return vcltzq_f32(a);
+}
+
+// CHECK: test_vcltzq_f64
+// CHECK: fcmlt  {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0
+uint64x2_t test_vcltzq_f64(float64x2_t a) {
+  return vcltzq_f64(a);
+}
+
+// CHECK: test_vrev16_s8
+// CHECK: rev16 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+int8x8_t test_vrev16_s8(int8x8_t a) {
+  return vrev16_s8(a);
+}
+
+// CHECK: test_vrev16_u8
+// CHECK: rev16 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+uint8x8_t test_vrev16_u8(uint8x8_t a) {
+  return vrev16_u8(a);
+}
+
+// CHECK: test_vrev16_p8
+// CHECK: rev16 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+poly8x8_t test_vrev16_p8(poly8x8_t a) {
+  return vrev16_p8(a);
+}
+
+// CHECK: test_vrev16q_s8
+// CHECK: rev16 v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+int8x16_t test_vrev16q_s8(int8x16_t a) {
+  return vrev16q_s8(a);
+}
+
+// CHECK: test_vrev16q_u8
+// CHECK: rev16 v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+uint8x16_t test_vrev16q_u8(uint8x16_t a) {
+  return vrev16q_u8(a);
+}
+
+// CHECK: test_vrev16q_p8
+// CHECK: rev16 v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+poly8x16_t test_vrev16q_p8(poly8x16_t a) {
+  return vrev16q_p8(a);
+}
+
+// CHECK: test_vrev32_s8
+// CHECK: rev32 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+int8x8_t test_vrev32_s8(int8x8_t a) {
+  return vrev32_s8(a);
+}
+
+// CHECK: test_vrev32_s16
+// CHECK: rev32 v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
+int16x4_t test_vrev32_s16(int16x4_t a) {
+  return vrev32_s16(a);
+}
+
+// CHECK: test_vrev32_u8
+// CHECK: rev32 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+uint8x8_t test_vrev32_u8(uint8x8_t a) {
+  return vrev32_u8(a);
+}
+
+// CHECK: test_vrev32_u16
+// CHECK: rev32 v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
+uint16x4_t test_vrev32_u16(uint16x4_t a) {
+  return vrev32_u16(a);
+}
+
+// CHECK: test_vrev32_p8
+// CHECK: rev32 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+poly8x8_t test_vrev32_p8(poly8x8_t a) {
+  return vrev32_p8(a);
+}
+
+// CHECK: test_vrev32_p16
+// CHECK: rev32 v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
+poly16x4_t test_vrev32_p16(poly16x4_t a) {
+  return vrev32_p16(a);
+}
+
+// CHECK: test_vrev32q_s8
+// CHECK: rev32 v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+int8x16_t test_vrev32q_s8(int8x16_t a) {
+  return vrev32q_s8(a);
+}
+
+// CHECK: test_vrev32q_s16
+// CHECK: rev32 v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
+int16x8_t test_vrev32q_s16(int16x8_t a) {
+  return vrev32q_s16(a);
+}
+
+// CHECK: test_vrev32q_u8
+// CHECK: rev32 v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+uint8x16_t test_vrev32q_u8(uint8x16_t a) {
+  return vrev32q_u8(a);
+}
+
+// CHECK: test_vrev32q_u16
+// CHECK: rev32 v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
+uint16x8_t test_vrev32q_u16(uint16x8_t a) {
+  return vrev32q_u16(a);
+}
+
+// CHECK: test_vrev32q_p8
+// CHECK: rev32 v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+poly8x16_t test_vrev32q_p8(poly8x16_t a) {
+  return vrev32q_p8(a);
+}
+
+// CHECK: test_vrev32q_p16
+// CHECK: rev32 v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
+poly16x8_t test_vrev32q_p16(poly16x8_t a) {
+  return vrev32q_p16(a);
+}
+
+// CHECK: test_vrev64_s8
+// CHECK: rev64 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+int8x8_t test_vrev64_s8(int8x8_t a) {
+  return vrev64_s8(a);
+}
+
+// CHECK: test_vrev64_s16
+// CHECK: rev64 v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
+int16x4_t test_vrev64_s16(int16x4_t a) {
+  return vrev64_s16(a);
+}
+
+// CHECK: test_vrev64_s32
+// CHECK: rev64 v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+int32x2_t test_vrev64_s32(int32x2_t a) {
+  return vrev64_s32(a);
+}
+
+// CHECK: test_vrev64_u8
+// CHECK: rev64 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+uint8x8_t test_vrev64_u8(uint8x8_t a) {
+  return vrev64_u8(a);
+}
+
+// CHECK: test_vrev64_u16
+// CHECK: rev64 v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
+uint16x4_t test_vrev64_u16(uint16x4_t a) {
+  return vrev64_u16(a);
+}
+
+// CHECK: test_vrev64_u32
+// CHECK: rev64 v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+uint32x2_t test_vrev64_u32(uint32x2_t a) {
+  return vrev64_u32(a);
+}
+
+// CHECK: test_vrev64_p8
+// CHECK: rev64 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+poly8x8_t test_vrev64_p8(poly8x8_t a) {
+  return vrev64_p8(a);
+}
+
+// CHECK: test_vrev64_p16
+// CHECK: rev64 v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
+poly16x4_t test_vrev64_p16(poly16x4_t a) {
+  return vrev64_p16(a);
+}
+
+// CHECK: test_vrev64_f32
+// CHECK: rev64 v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+float32x2_t test_vrev64_f32(float32x2_t a) {
+  return vrev64_f32(a);
+}
+
+// CHECK: test_vrev64q_s8
+// CHECK: rev64 v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+int8x16_t test_vrev64q_s8(int8x16_t a) {
+  return vrev64q_s8(a);
+}
+
+// CHECK: test_vrev64q_s16
+// CHECK: rev64 v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
+int16x8_t test_vrev64q_s16(int16x8_t a) {
+  return vrev64q_s16(a);
+}
+
+// CHECK: test_vrev64q_s32
+// CHECK: rev64 v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+int32x4_t test_vrev64q_s32(int32x4_t a) {
+  return vrev64q_s32(a);
+}
+
+// CHECK: test_vrev64q_u8
+// CHECK: rev64 v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+uint8x16_t test_vrev64q_u8(uint8x16_t a) {
+  return vrev64q_u8(a);
+}
+
+// CHECK: test_vrev64q_u16
+// CHECK: rev64 v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
+uint16x8_t test_vrev64q_u16(uint16x8_t a) {
+  return vrev64q_u16(a);
+}
+
+// CHECK: test_vrev64q_u32
+// CHECK: rev64 v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+uint32x4_t test_vrev64q_u32(uint32x4_t a) {
+  return vrev64q_u32(a);
+}
+
+// CHECK: test_vrev64q_p8
+// CHECK: rev64 v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+poly8x16_t test_vrev64q_p8(poly8x16_t a) {
+  return vrev64q_p8(a);
+}
+
+// CHECK: test_vrev64q_p16
+// CHECK: rev64 v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
+poly16x8_t test_vrev64q_p16(poly16x8_t a) {
+  return vrev64q_p16(a);
+}
+
+// CHECK: test_vrev64q_f32
+// CHECK: rev64 v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+float32x4_t test_vrev64q_f32(float32x4_t a) {
+  return vrev64q_f32(a);
+}
+
+int16x4_t test_vpaddl_s8(int8x8_t a) {
+  // CHECK: test_vpaddl_s8
+  return vpaddl_s8(a);
+  // CHECK: saddlp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b
+}
+
+int32x2_t test_vpaddl_s16(int16x4_t a) {
+  // CHECK: test_vpaddl_s16
+  return vpaddl_s16(a);
+  // CHECK: saddlp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h
+}
+
+int64x1_t test_vpaddl_s32(int32x2_t a) {
+  // CHECK: test_vpaddl_s32
+  return vpaddl_s32(a);
+  // CHECK: saddlp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s
+}
+
+uint16x4_t test_vpaddl_u8(uint8x8_t a) {
+  // CHECK: test_vpaddl_u8
+  return vpaddl_u8(a);
+  // CHECK: uaddlp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b
+}
+
+uint32x2_t test_vpaddl_u16(uint16x4_t a) {
+  // CHECK: test_vpaddl_u16
+  return vpaddl_u16(a);
+  // CHECK: uaddlp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h
+}
+
+uint64x1_t test_vpaddl_u32(uint32x2_t a) {
+  // CHECK: test_vpaddl_u32
+  return vpaddl_u32(a);
+  // CHECK: uaddlp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s
+}
+
+int16x8_t test_vpaddlq_s8(int8x16_t a) {
+  // CHECK: test_vpaddlq_s8
+  return vpaddlq_s8(a);
+  // CHECK: saddlp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b
+}
+
+int32x4_t test_vpaddlq_s16(int16x8_t a) {
+  // CHECK: test_vpaddlq_s16
+  return vpaddlq_s16(a);
+  // CHECK: saddlp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h
+}
+
+int64x2_t test_vpaddlq_s32(int32x4_t a) {
+  // CHECK: test_vpaddlq_s32
+  return vpaddlq_s32(a);
+  // CHECK: saddlp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s
+}
+
+uint16x8_t test_vpaddlq_u8(uint8x16_t a) {
+  // CHECK: test_vpaddlq_u8
+  return vpaddlq_u8(a);
+  // CHECK: uaddlp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b
+}
+
+uint32x4_t test_vpaddlq_u16(uint16x8_t a) {
+  // CHECK: test_vpaddlq_u16
+  return vpaddlq_u16(a);
+  // CHECK: uaddlp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h
+}
+
+uint64x2_t test_vpaddlq_u32(uint32x4_t a) {
+  // CHECK: test_vpaddlq_u32
+  return vpaddlq_u32(a);
+  // CHECK: uaddlp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s
+}
+
+int16x4_t test_vpadal_s8(int16x4_t a, int8x8_t b) {
+  // CHECK: test_vpadal_s8
+  return vpadal_s8(a, b);
+  // CHECK: sadalp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b
+}
+
+int32x2_t test_vpadal_s16(int32x2_t a, int16x4_t b) {
+  // CHECK: test_vpadal_s16
+  return vpadal_s16(a, b);
+  // CHECK: sadalp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h
+}
+
+int64x1_t test_vpadal_s32(int64x1_t a, int32x2_t b) {
+  // CHECK: test_vpadal_s32
+  return vpadal_s32(a, b);
+  // CHECK: sadalp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s
+}
+
+uint16x4_t test_vpadal_u8(uint16x4_t a, uint8x8_t b) {
+  // CHECK: test_vpadal_u8
+  return vpadal_u8(a, b);
+  // CHECK: uadalp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b
+}
+
+uint32x2_t test_vpadal_u16(uint32x2_t a, uint16x4_t b) {
+  // CHECK: test_vpadal_u16
+  return vpadal_u16(a, b);
+  // CHECK: uadalp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h
+}
+
+uint64x1_t test_vpadal_u32(uint64x1_t a, uint32x2_t b) {
+  // CHECK: test_vpadal_u32
+  return vpadal_u32(a, b);
+  // CHECK: uadalp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s
+}
+
+int16x8_t test_vpadalq_s8(int16x8_t a, int8x16_t b) {
+  // CHECK: test_vpadalq_s8
+  return vpadalq_s8(a, b);
+  // CHECK: sadalp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b
+}
+
+int32x4_t test_vpadalq_s16(int32x4_t a, int16x8_t b) {
+  // CHECK: test_vpadalq_s16
+  return vpadalq_s16(a, b);
+  // CHECK: sadalp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h
+}
+
+int64x2_t test_vpadalq_s32(int64x2_t a, int32x4_t b) {
+  // CHECK: test_vpadalq_s32
+  return vpadalq_s32(a, b);
+  // CHECK: sadalp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s
+}
+
+uint16x8_t test_vpadalq_u8(uint16x8_t a, uint8x16_t b) {
+  // CHECK: test_vpadalq_u8
+  return vpadalq_u8(a, b);
+  // CHECK: uadalp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b
+}
+
+uint32x4_t test_vpadalq_u16(uint32x4_t a, uint16x8_t b) {
+  // CHECK: test_vpadalq_u16
+  return vpadalq_u16(a, b);
+  // CHECK: uadalp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h
+}
+
+uint64x2_t test_vpadalq_u32(uint64x2_t a, uint32x4_t b) {
+  // CHECK: test_vpadalq_u32
+  return vpadalq_u32(a, b);
+  // CHECK: uadalp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s
+}
+
+int8x8_t test_vqabs_s8(int8x8_t a) {
+  // CHECK: test_vqabs_s8
+  return vqabs_s8(a);
+  // CHECK: sqabs v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+}
+
+int8x16_t test_vqabsq_s8(int8x16_t a) {
+  // CHECK: test_vqabsq_s8
+  return vqabsq_s8(a);
+  // CHECK: sqabs v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+}
+
+int16x4_t test_vqabs_s16(int16x4_t a) {
+  // CHECK: test_vqabs_s16
+  return vqabs_s16(a);
+  // CHECK: sqabs v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
+}
+
+int16x8_t test_vqabsq_s16(int16x8_t a) {
+  // CHECK: test_vqabsq_s16
+  return vqabsq_s16(a);
+  // CHECK: sqabs v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
+}
+
+int32x2_t test_vqabs_s32(int32x2_t a) {
+  // CHECK: test_vqabs_s32
+  return vqabs_s32(a);
+  // CHECK: sqabs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+}
+
+int32x4_t test_vqabsq_s32(int32x4_t a) {
+  // CHECK: test_vqabsq_s32
+  return vqabsq_s32(a);
+  // CHECK: sqabs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+}
+
+int64x2_t test_vqabsq_s64(int64x2_t a) {
+  // CHECK: test_vqabsq_s64
+  return vqabsq_s64(a);
+  // CHECK: sqabs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+}
+
+int8x8_t test_vqneg_s8(int8x8_t a) {
+  // CHECK: test_vqneg_s8
+  return vqneg_s8(a);
+  // CHECK: sqneg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+}
+
+int8x16_t test_vqnegq_s8(int8x16_t a) {
+  // CHECK: test_vqnegq_s8
+  return vqnegq_s8(a);
+  // CHECK: sqneg v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+}
+
+int16x4_t test_vqneg_s16(int16x4_t a) {
+  // CHECK: test_vqneg_s16
+  return vqneg_s16(a);
+  // CHECK: sqneg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
+}
+
+int16x8_t test_vqnegq_s16(int16x8_t a) {
+  // CHECK: test_vqnegq_s16
+  return vqnegq_s16(a);
+  // CHECK: sqneg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
+}
+
+int32x2_t test_vqneg_s32(int32x2_t a) {
+  // CHECK: test_vqneg_s32
+  return vqneg_s32(a);
+  // CHECK: sqneg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+}
+
+int32x4_t test_vqnegq_s32(int32x4_t a) {
+  // CHECK: test_vqnegq_s32
+  return vqnegq_s32(a);
+  // CHECK: sqneg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+}
+
+int64x2_t test_vqnegq_s64(int64x2_t a) {
+  // CHECK: test_vqnegq_s64
+  return vqnegq_s64(a);
+  // CHECK: sqneg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+}
+
+int8x8_t test_vneg_s8(int8x8_t a) {
+  // CHECK: test_vneg_s8
+  return vneg_s8(a);
+  // CHECK: neg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+}
+
+int8x16_t test_vnegq_s8(int8x16_t a) {
+  // CHECK: test_vnegq_s8
+  return vnegq_s8(a);
+  // CHECK: neg v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+}
+
+int16x4_t test_vneg_s16(int16x4_t a) {
+  // CHECK: test_vneg_s16
+  return vneg_s16(a);
+  // CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
+}
+
+int16x8_t test_vnegq_s16(int16x8_t a) {
+  // CHECK: test_vnegq_s16
+  return vnegq_s16(a);
+  // CHECK: neg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
+}
+
+int32x2_t test_vneg_s32(int32x2_t a) {
+  // CHECK: test_vneg_s32
+  return vneg_s32(a);
+  // CHECK: neg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+}
+
+int32x4_t test_vnegq_s32(int32x4_t a) {
+  // CHECK: test_vnegq_s32
+  return vnegq_s32(a);
+  // CHECK: neg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+}
+
+int64x2_t test_vnegq_s64(int64x2_t a) {
+  // CHECK: test_vnegq_s64
+  return vnegq_s64(a);
+  // CHECK: neg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+}
+
+float32x2_t test_vneg_f32(float32x2_t a) {
+  // CHECK: test_vneg_f32
+  return vneg_f32(a);
+  // CHECK: fneg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+}
+
+float32x4_t test_vnegq_f32(float32x4_t a) {
+  // CHECK: test_vnegq_f32
+  return vnegq_f32(a);
+  // CHECK: fneg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+}
+
+float64x2_t test_vnegq_f64(float64x2_t a) {
+  // CHECK: test_vnegq_f64
+  return vnegq_f64(a);
+  // CHECK: fneg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+}
+
+int8x8_t test_vabs_s8(int8x8_t a) {
+  // CHECK: test_vabs_s8
+  return vabs_s8(a);
+  // CHECK: abs v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+}
+
+int8x16_t test_vabsq_s8(int8x16_t a) {
+  // CHECK: test_vabsq_s8
+  return vabsq_s8(a);
+  // CHECK: abs v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+}
+
+int16x4_t test_vabs_s16(int16x4_t a) {
+  // CHECK: test_vabs_s16
+  return vabs_s16(a);
+  // CHECK: abs v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
+}
+
+int16x8_t test_vabsq_s16(int16x8_t a) {
+  // CHECK: test_vabsq_s16
+  return vabsq_s16(a);
+  // CHECK: abs v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
+}
+
+int32x2_t test_vabs_s32(int32x2_t a) {
+  // CHECK: test_vabs_s32
+  return vabs_s32(a);
+  // CHECK: abs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+}
+
+int32x4_t test_vabsq_s32(int32x4_t a) {
+  // CHECK: test_vabsq_s32
+  return vabsq_s32(a);
+  // CHECK: abs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+}
+
+int64x2_t test_vabsq_s64(int64x2_t a) {
+  // CHECK: test_vabsq_s64
+  return vabsq_s64(a);
+  // CHECK: abs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+}
+
+float32x2_t test_vabs_f32(float32x2_t a) {
+  // CHECK: test_vabs_f32
+  return vabs_f32(a);
+  // CHECK: fabs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+}
+
+float32x4_t test_vabsq_f32(float32x4_t a) {
+  // CHECK: test_vabsq_f32
+  return vabsq_f32(a);
+  // CHECK: fabs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+}
+
+float64x2_t test_vabsq_f64(float64x2_t a) {
+  // CHECK: test_vabsq_f64
+  return vabsq_f64(a);
+  // CHECK: fabs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+}
+
+int8x8_t test_vuqadd_s8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vuqadd_s8
+  return vuqadd_s8(a, b);
+  // CHECK: suqadd v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+}
+
+int8x16_t test_vuqaddq_s8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vuqaddq_s8
+  return vuqaddq_s8(a, b);
+  // CHECK: suqadd v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+}
+
+int16x4_t test_vuqadd_s16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vuqadd_s16
+  return vuqadd_s16(a, b);
+  // CHECK: suqadd v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
+}
+
+int16x8_t test_vuqaddq_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vuqaddq_s16
+  return vuqaddq_s16(a, b);
+  // CHECK: suqadd v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
+}
+
+int32x2_t test_vuqadd_s32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vuqadd_s32
+  return vuqadd_s32(a, b);
+  // CHECK: suqadd v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+}
+
+int32x4_t test_vuqaddq_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vuqaddq_s32
+  return vuqaddq_s32(a, b);
+  // CHECK: suqadd v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+}
+
+int64x2_t test_vuqaddq_s64(int64x2_t a, int64x2_t b) {
+  // CHECK: test_vuqaddq_s64
+  return vuqaddq_s64(a, b);
+  // CHECK: suqadd v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+}
+
+int8x8_t test_vcls_s8(int8x8_t a) {
+  // CHECK: test_vcls_s8
+  return vcls_s8(a);
+  // CHECK: cls v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+}
+
+int8x16_t test_vclsq_s8(int8x16_t a) {
+  // CHECK: test_vclsq_s8
+  return vclsq_s8(a);
+  // CHECK: cls v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+}
+
+int16x4_t test_vcls_s16(int16x4_t a) {
+  // CHECK: test_vcls_s16
+  return vcls_s16(a);
+  // CHECK: cls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
+}
+
+int16x8_t test_vclsq_s16(int16x8_t a) {
+  // CHECK: test_vclsq_s16
+  return vclsq_s16(a);
+  // CHECK: cls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
+}
+
+int32x2_t test_vcls_s32(int32x2_t a) {
+  // CHECK: test_vcls_s32
+  return vcls_s32(a);
+  // CHECK: cls v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+}
+
+int32x4_t test_vclsq_s32(int32x4_t a) {
+  // CHECK: test_vclsq_s32
+  return vclsq_s32(a);
+  // CHECK: cls v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+}
+
+int8x8_t test_vclz_s8(int8x8_t a) {
+  // CHECK: test_vclz_s8
+  return vclz_s8(a);
+  // CHECK: clz v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+}
+
+int8x16_t test_vclzq_s8(int8x16_t a) {
+  // CHECK: test_vclzq_s8
+  return vclzq_s8(a);
+  // CHECK: clz v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+}
+
+int16x4_t test_vclz_s16(int16x4_t a) {
+  // CHECK: test_vclz_s16
+  return vclz_s16(a);
+  // CHECK: clz v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
+}
+
+int16x8_t test_vclzq_s16(int16x8_t a) {
+  // CHECK: test_vclzq_s16
+  return vclzq_s16(a);
+  // CHECK: clz v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
+}
+
+int32x2_t test_vclz_s32(int32x2_t a) {
+  // CHECK: test_vclz_s32
+  return vclz_s32(a);
+  // CHECK: clz v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+}
+
+int32x4_t test_vclzq_s32(int32x4_t a) {
+  // CHECK: test_vclzq_s32
+  return vclzq_s32(a);
+  // CHECK: clz v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+}
+
+uint8x8_t test_vclz_u8(uint8x8_t a) {
+  // CHECK: test_vclz_u8
+  return vclz_u8(a);
+  // CHECK: clz v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+}
+
+uint8x16_t test_vclzq_u8(uint8x16_t a) {
+  // CHECK: test_vclzq_u8
+  return vclzq_u8(a);
+  // CHECK: clz v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+}
+
+uint16x4_t test_vclz_u16(uint16x4_t a) {
+  // CHECK: test_vclz_u16
+  return vclz_u16(a);
+  // CHECK: clz v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
+}
+
+uint16x8_t test_vclzq_u16(uint16x8_t a) {
+  // CHECK: test_vclzq_u16
+  return vclzq_u16(a);
+  // CHECK: clz v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
+}
+
+uint32x2_t test_vclz_u32(uint32x2_t a) {
+  // CHECK: test_vclz_u32
+  return vclz_u32(a);
+  // CHECK: clz v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+}
+
+uint32x4_t test_vclzq_u32(uint32x4_t a) {
+  // CHECK: test_vclzq_u32
+  return vclzq_u32(a);
+  // CHECK: clz v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+}
+
+int8x8_t test_vcnt_s8(int8x8_t a) {
+  // CHECK: test_vcnt_s8
+  return vcnt_s8(a);
+  // CHECK: cnt v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+}
+
+int8x16_t test_vcntq_s8(int8x16_t a) {
+  // CHECK: test_vcntq_s8
+  return vcntq_s8(a);
+  // CHECK: cnt v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+}
+
+uint8x8_t test_vcnt_u8(uint8x8_t a) {
+  // CHECK: test_vcnt_u8
+  return vcnt_u8(a);
+  // CHECK: cnt v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+}
+
+uint8x16_t test_vcntq_u8(uint8x16_t a) {
+  // CHECK: test_vcntq_u8
+  return vcntq_u8(a);
+  // CHECK: cnt v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+}
+
+poly8x8_t test_vcnt_p8(poly8x8_t a) {
+  // CHECK: test_vcnt_p8
+  return vcnt_p8(a);
+  // CHECK: cnt v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+}
+
+poly8x16_t test_vcntq_p8(poly8x16_t a) {
+  // CHECK: test_vcntq_p8
+  return vcntq_p8(a);
+  // CHECK: cnt v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+}
+
+int8x8_t test_vmvn_s8(int8x8_t a) {
+  // CHECK: test_vmvn_s8
+  return vmvn_s8(a);
+  // CHECK: not v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+}
+
+int8x16_t test_vmvnq_s8(int8x16_t a) {
+  // CHECK: test_vmvnq_s8
+  return vmvnq_s8(a);
+  // CHECK: not v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+}
+
+int16x4_t test_vmvn_s16(int16x4_t a) {
+  // CHECK: test_vmvn_s16
+  return vmvn_s16(a);
+  // CHECK: not v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+}
+
+int16x8_t test_vmvnq_s16(int16x8_t a) {
+  // CHECK: test_vmvnq_s16
+  return vmvnq_s16(a);
+  // CHECK: not v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+}
+
+int32x2_t test_vmvn_s32(int32x2_t a) {
+  // CHECK: test_vmvn_s32
+  return vmvn_s32(a);
+  // CHECK: not v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+}
+
+int32x4_t test_vmvnq_s32(int32x4_t a) {
+  // CHECK: test_vmvnq_s32
+  return vmvnq_s32(a);
+  // CHECK: not v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+}
+
+uint8x8_t test_vmvn_u8(uint8x8_t a) {
+  // CHECK: test_vmvn_u8
+  return vmvn_u8(a);
+  // CHECK: not v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+}
+
+uint8x16_t test_vmvnq_u8(uint8x16_t a) {
+  // CHECK: test_vmvnq_u8
+  return vmvnq_u8(a);
+  // CHECK: not v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+}
+
+uint16x4_t test_vmvn_u16(uint16x4_t a) {
+  // CHECK: test_vmvn_u16
+  return vmvn_u16(a);
+  // CHECK: not v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+}
+
+uint16x8_t test_vmvnq_u16(uint16x8_t a) {
+  // CHECK: test_vmvnq_u16
+  return vmvnq_u16(a);
+  // CHECK: not v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+}
+
+uint32x2_t test_vmvn_u32(uint32x2_t a) {
+  // CHECK: test_vmvn_u32
+  return vmvn_u32(a);
+  // CHECK: not v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+}
+
+uint32x4_t test_vmvnq_u32(uint32x4_t a) {
+  // CHECK: test_vmvnq_u32
+  return vmvnq_u32(a);
+  // CHECK: not v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+}
+
+poly8x8_t test_vmvn_p8(poly8x8_t a) {
+  // CHECK: test_vmvn_p8
+  return vmvn_p8(a);
+  // CHECK: not v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+}
+
+poly8x16_t test_vmvnq_p8(poly8x16_t a) {
+  // CHECK: test_vmvnq_p8
+  return vmvnq_p8(a);
+  // CHECK: not v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+}
+
+int8x8_t test_vrbit_s8(int8x8_t a) {
+  // CHECK: test_vrbit_s8
+  return vrbit_s8(a);
+  // CHECK: rbit v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+}
+
+int8x16_t test_vrbitq_s8(int8x16_t a) {
+  // CHECK: test_vrbitq_s8
+  return vrbitq_s8(a);
+  // CHECK: rbit v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+}
+
+uint8x8_t test_vrbit_u8(uint8x8_t a) {
+  // CHECK: test_vrbit_u8
+  return vrbit_u8(a);
+  // CHECK: rbit v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+}
+
+uint8x16_t test_vrbitq_u8(uint8x16_t a) {
+  // CHECK: test_vrbitq_u8
+  return vrbitq_u8(a);
+  // CHECK: rbit v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+}
+
+poly8x8_t test_vrbit_p8(poly8x8_t a) {
+  // CHECK: test_vrbit_p8
+  return vrbit_p8(a);
+  // CHECK: rbit v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+}
+
+poly8x16_t test_vrbitq_p8(poly8x16_t a) {
+  // CHECK: test_vrbitq_p8
+  return vrbitq_p8(a);
+  // CHECK: rbit v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+}
+
+int8x8_t test_vmovn_s16(int16x8_t a) {
+  // CHECK: test_vmovn_s16
+  return vmovn_s16(a);
+  // CHECK: xtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h
+}
+
+int16x4_t test_vmovn_s32(int32x4_t a) {
+  // CHECK: test_vmovn_s32
+  return vmovn_s32(a);
+  // CHECK: xtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
+}
+
+int32x2_t test_vmovn_s64(int64x2_t a) {
+  // CHECK: test_vmovn_s64
+  return vmovn_s64(a);
+  // CHECK: xtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
+}
+
+uint8x8_t test_vmovn_u16(uint16x8_t a) {
+  // CHECK: test_vmovn_u16
+  return vmovn_u16(a);
+  // CHECK: xtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h
+}
+
+uint16x4_t test_vmovn_u32(uint32x4_t a) {
+  // CHECK: test_vmovn_u32
+  return vmovn_u32(a);
+  // CHECK: xtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
+}
+
+uint32x2_t test_vmovn_u64(uint64x2_t a) {
+  // CHECK: test_vmovn_u64
+  return vmovn_u64(a);
+  // CHECK: xtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
+}
+
+int8x16_t test_vmovn_high_s16(int8x8_t a, int16x8_t b) {
+  // CHECK: test_vmovn_high_s16
+  return vmovn_high_s16(a, b);
+  // CHECK: xtn2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h
+}
+
+int16x8_t test_vmovn_high_s32(int16x4_t a, int32x4_t b) {
+  // CHECK: test_vmovn_high_s32
+  return vmovn_high_s32(a, b);
+  // CHECK: xtn2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s
+}
+
+int32x4_t test_vmovn_high_s64(int32x2_t a, int64x2_t b) {
+  // CHECK: test_vmovn_high_s64
+  return vmovn_high_s64(a, b);
+  // CHECK: xtn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
+}
+
+int8x16_t test_vmovn_high_u16(int8x8_t a, int16x8_t b) {
+  // CHECK: test_vmovn_high_u16
+  return vmovn_high_u16(a, b);
+  // CHECK: xtn2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h
+}
+
+int16x8_t test_vmovn_high_u32(int16x4_t a, int32x4_t b) {
+  // CHECK: test_vmovn_high_u32
+  return vmovn_high_u32(a, b);
+  // CHECK: xtn2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s
+}
+
+int32x4_t test_vmovn_high_u64(int32x2_t a, int64x2_t b) {
+  // CHECK: test_vmovn_high_u64
+  return vmovn_high_u64(a, b);
+  // CHECK: xtn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
+}
+
+int8x8_t test_vqmovun_s16(int16x8_t a) {
+  // CHECK: test_vqmovun_s16
+  return vqmovun_s16(a);
+  // CHECK: sqxtun v{{[0-9]+}}.8b, v{{[0-9]+}}.8h
+}
+
+int16x4_t test_vqmovun_s32(int32x4_t a) {
+  // CHECK: test_vqmovun_s32
+  return vqmovun_s32(a);
+  // CHECK: sqxtun v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
+}
+
+int32x2_t test_vqmovun_s64(int64x2_t a) {
+  // CHECK: test_vqmovun_s64
+  return vqmovun_s64(a);
+  // CHECK: sqxtun v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
+}
+
+int8x16_t test_vqmovun_high_s16(int8x8_t a, int16x8_t b) {
+  // CHECK: test_vqmovun_high_s16
+  return vqmovun_high_s16(a, b);
+  // CHECK: sqxtun2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h
+}
+
+int16x8_t test_vqmovun_high_s32(int16x4_t a, int32x4_t b) {
+  // CHECK: test_vqmovun_high_s32
+  return vqmovun_high_s32(a, b);
+  // CHECK: sqxtun2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s
+}
+
+int32x4_t test_vqmovun_high_s64(int32x2_t a, int64x2_t b) {
+  // CHECK: test_vqmovun_high_s64
+  return vqmovun_high_s64(a, b);
+  // CHECK: sqxtun2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
+}
+
+int8x8_t test_vqmovn_s16(int16x8_t a) {
+  // CHECK: test_vqmovn_s16
+  return vqmovn_s16(a);
+  // CHECK: sqxtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h
+}
+
+int16x4_t test_vqmovn_s32(int32x4_t a) {
+  // CHECK: test_vqmovn_s32
+  return vqmovn_s32(a);
+  // CHECK: sqxtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
+}
+
+int32x2_t test_vqmovn_s64(int64x2_t a) {
+  // CHECK: test_vqmovn_s64
+  return vqmovn_s64(a);
+  // CHECK: sqxtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
+}
+
+int8x16_t test_vqmovn_high_s16(int8x8_t a, int16x8_t b) {
+  // CHECK: test_vqmovn_high_s16
+  return vqmovn_high_s16(a, b);
+  // CHECK: sqxtn2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h
+}
+
+int16x8_t test_vqmovn_high_s32(int16x4_t a, int32x4_t b) {
+  // CHECK: test_vqmovn_high_s32
+  return vqmovn_high_s32(a, b);
+  // CHECK: sqxtn2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s
+}
+
+int32x4_t test_vqmovn_high_s64(int32x2_t a, int64x2_t b) {
+  // CHECK: test_vqmovn_high_s64
+  return vqmovn_high_s64(a, b);
+  // CHECK: sqxtn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
+}
+
+uint8x8_t test_vqmovn_u16(uint16x8_t a) {
+  // CHECK: test_vqmovn_u16
+  return vqmovn_u16(a);
+  // CHECK: uqxtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h
+}
+
+uint16x4_t test_vqmovn_u32(uint32x4_t a) {
+  // CHECK: test_vqmovn_u32
+  return vqmovn_u32(a);
+  // CHECK: uqxtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
+}
+
+uint32x2_t test_vqmovn_u64(uint64x2_t a) {
+  // CHECK: test_vqmovn_u64
+  return vqmovn_u64(a);
+  // CHECK: uqxtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
+}
+
+uint8x16_t test_vqmovn_high_u16(uint8x8_t a, uint16x8_t b) {
+  // CHECK: test_vqmovn_high_u16
+  return vqmovn_high_u16(a, b);
+  // CHECK: uqxtn2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h
+}
+
+uint16x8_t test_vqmovn_high_u32(uint16x4_t a, uint32x4_t b) {
+  // CHECK: test_vqmovn_high_u32
+  return vqmovn_high_u32(a, b);
+  // CHECK: uqxtn2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s
+}
+
+uint32x4_t test_vqmovn_high_u64(uint32x2_t a, uint64x2_t b) {
+  // CHECK: test_vqmovn_high_u64
+  return vqmovn_high_u64(a, b);
+  // CHECK: uqxtn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
+}
+
+int16x8_t test_vshll_n_s8(int8x8_t a) {
+  // CHECK: test_vshll_n_s8
+  return vshll_n_s8(a, 8);
+  // CHECK: shll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #8
+}
+
+int32x4_t test_vshll_n_s16(int16x4_t a) {
+  // CHECK: test_vshll_n_s16
+  return vshll_n_s16(a, 16);
+  // CHECK: shll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #16
+}
+
+int64x2_t test_vshll_n_s32(int32x2_t a) {
+  // CHECK: test_vshll_n_s32
+  return vshll_n_s32(a, 32);
+  // CHECK: shll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #32
+}
+
+uint16x8_t test_vshll_n_u8(uint8x8_t a) {
+  // CHECK: test_vshll_n_u8
+  return vshll_n_u8(a, 8);
+  // CHECK: shll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #8
+}
+
+uint32x4_t test_vshll_n_u16(uint16x4_t a) {
+  // CHECK: test_vshll_n_u16
+  return vshll_n_u16(a, 16);
+  // CHECK: shll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #16
+}
+
+uint64x2_t test_vshll_n_u32(uint32x2_t a) {
+  // CHECK: test_vshll_n_u32
+  return vshll_n_u32(a, 32);
+  // CHECK: shll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #32
+}
+
+int16x8_t test_vshll_high_n_s8(int8x16_t a) {
+  // CHECK: test_vshll_high_n_s8
+  return vshll_high_n_s8(a, 8);
+  // CHECK: shll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #8
+}
+
+int32x4_t test_vshll_high_n_s16(int16x8_t a) {
+  // CHECK: test_vshll_high_n_s16
+  return vshll_high_n_s16(a, 16);
+  // CHECK: shll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #16
+}
+
+int64x2_t test_vshll_high_n_s32(int32x4_t a) {
+  // CHECK: test_vshll_high_n_s32
+  return vshll_high_n_s32(a, 32);
+  // CHECK: shll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #32
+}
+
+uint16x8_t test_vshll_high_n_u8(uint8x16_t a) {
+  // CHECK: test_vshll_high_n_u8
+  return vshll_high_n_u8(a, 8);
+  // CHECK: shll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #8
+}
+
+uint32x4_t test_vshll_high_n_u16(uint16x8_t a) {
+  // CHECK: test_vshll_high_n_u16
+  return vshll_high_n_u16(a, 16);
+  // CHECK: shll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #16
+}
+
+uint64x2_t test_vshll_high_n_u32(uint32x4_t a) {
+  // CHECK: test_vshll_high_n_u32
+  return vshll_high_n_u32(a, 32);
+  // CHECK: shll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #32
+}
+
+float16x4_t test_vcvt_f16_f32(float32x4_t a) {
+  //CHECK: test_vcvt_f16_f32
+  return vcvt_f16_f32(a);
+  // CHECK: fcvtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
+}
+
+float16x8_t test_vcvt_high_f16_f32(float16x4_t a, float32x4_t b) {
+  //CHECK: test_vcvt_high_f16_f32
+  return vcvt_high_f16_f32(a, b);
+  // CHECK: fcvtn2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s
+}
+
+float32x2_t test_vcvt_f32_f64(float64x2_t a) {
+  //CHECK: test_vcvt_f32_f64
+  return vcvt_f32_f64(a);
+  // CHECK: fcvtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
+}
+
+float32x4_t test_vcvt_high_f32_f64(float32x2_t a, float64x2_t b) {
+  //CHECK: test_vcvt_high_f32_f64
+  return vcvt_high_f32_f64(a, b);
+  // CHECK: fcvtn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
+}
+
+float32x2_t test_vcvtx_f32_f64(float64x2_t a) {
+  //CHECK: test_vcvtx_f32_f64
+  return vcvtx_f32_f64(a);
+  // CHECK: fcvtxn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
+}
+
+float32x4_t test_vcvtx_high_f32_f64(float32x2_t a, float64x2_t b) {
+  //CHECK: test_vcvtx_high_f32_f64
+  return vcvtx_high_f32_f64(a, b);
+  // CHECK: fcvtxn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
+}
+
+float32x4_t test_vcvt_f32_f16(float16x4_t a) {
+  //CHECK: test_vcvt_f32_f16
+  return vcvt_f32_f16(a);
+  // CHECK: fcvtl v{{[0-9]+}}.4s, v{{[0-9]+}}.4h
+}
+
+float32x4_t test_vcvt_high_f32_f16(float16x8_t a) {
+  //CHECK: test_vcvt_high_f32_f16
+  return vcvt_high_f32_f16(a);
+  // CHECK: fcvtl2 v{{[0-9]+}}.4s, v{{[0-9]+}}.8h
+}
+
+float64x2_t test_vcvt_f64_f32(float32x2_t a) {
+  //CHECK: test_vcvt_f64_f32
+  return vcvt_f64_f32(a);
+  // CHECK: fcvtl v{{[0-9]+}}.2d, v{{[0-9]+}}.2s
+}
+
+float64x2_t test_vcvt_high_f64_f32(float32x4_t a) {
+  //CHECK: test_vcvt_high_f64_f32
+  return vcvt_high_f64_f32(a);
+  // CHECK: fcvtl2 v{{[0-9]+}}.2d, v{{[0-9]+}}.4s
+}
+
+float32x2_t test_vrndn_f32(float32x2_t a) {
+  //CHECK: test_vrndn_f32
+  return vrndn_f32(a);
+  // CHECK: frintn v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+}
+
+float32x4_t test_vrndnq_f32(float32x4_t a) {
+  //CHECK: test_vrndnq_f32
+  return vrndnq_f32(a);
+  // CHECK: frintn v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+}
+
+float64x2_t test_vrndnq_f64(float64x2_t a) {
+  //CHECK: test_vrndnq_f64
+  return vrndnq_f64(a);
+  // CHECK: frintn v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+}
+
+float32x2_t test_vrnda_f32(float32x2_t a) {
+  //CHECK: test_vrnda_f32
+  return vrnda_f32(a);
+  // CHECK: frinta v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+}
+
+float32x4_t test_vrndaq_f32(float32x4_t a) {
+  //CHECK: test_vrndaq_f32
+  return vrndaq_f32(a);
+  // CHECK: frinta v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+}
+
+float64x2_t test_vrndaq_f64(float64x2_t a) {
+  //CHECK: test_vrndaq_f64
+  return vrndaq_f64(a);
+  // CHECK: frinta v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+}
+
+float32x2_t test_vrndp_f32(float32x2_t a) {
+  //CHECK: test_vrndp_f32
+  return vrndp_f32(a);
+  // CHECK: frintp v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+}
+
+float32x4_t test_vrndpq_f32(float32x4_t a) {
+  //CHECK: test_vrndpq_f32
+  return vrndpq_f32(a);
+  // CHECK: frintp v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+}
+
+float64x2_t test_vrndpq_f64(float64x2_t a) {
+  //CHECK: test_vrndpq_f64
+  return vrndpq_f64(a);
+  // CHECK: frintp v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+}
+
+float32x2_t test_vrndm_f32(float32x2_t a) {
+  //CHECK: test_vrndm_f32
+  return vrndm_f32(a);
+  // CHECK: frintm v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+}
+
+float32x4_t test_vrndmq_f32(float32x4_t a) {
+  //CHECK: test_vrndmq_f32
+  return vrndmq_f32(a);
+  // CHECK: frintm v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+}
+
+float64x2_t test_vrndmq_f64(float64x2_t a) {
+  //CHECK: test_vrndmq_f64
+  return vrndmq_f64(a);
+  // CHECK: frintm v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+}
+
+float32x2_t test_vrndx_f32(float32x2_t a) {
+  //CHECK: test_vrndx_f32
+  return vrndx_f32(a);
+  // CHECK: frintx v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+}
+
+float32x4_t test_vrndxq_f32(float32x4_t a) {
+  //CHECK: test_vrndxq_f32
+  return vrndxq_f32(a);
+  // CHECK: frintx v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+}
+
+float64x2_t test_vrndxq_f64(float64x2_t a) {
+  //CHECK: test_vrndxq_f64
+  return vrndxq_f64(a);
+  // CHECK: frintx v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+}
+
+float32x2_t test_vrnd_f32(float32x2_t a) {
+  //CHECK: test_vrnd_f32
+  return vrnd_f32(a);
+  // CHECK: frintz v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+}
+
+float32x4_t test_vrndq_f32(float32x4_t a) {
+  //CHECK: test_vrndq_f32
+  return vrndq_f32(a);
+  // CHECK: frintz v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+}
+
+float64x2_t test_vrndq_f64(float64x2_t a) {
+  //CHECK: test_vrndq_f64
+  return vrndq_f64(a);
+  // CHECK: frintz v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+}
+
+float32x2_t test_vrndi_f32(float32x2_t a) {
+  //CHECK: test_vrndi_f32
+  return vrndi_f32(a);
+  // CHECK: frinti v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+}
+
+float32x4_t test_vrndiq_f32(float32x4_t a) {
+  //CHECK: test_vrndiq_f32
+  return vrndiq_f32(a);
+  // CHECK: frinti v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+}
+
+float64x2_t test_vrndiq_f64(float64x2_t a) {
+  //CHECK: test_vrndiq_f64
+  return vrndiq_f64(a);
+  // CHECK: frinti v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+}
+
+int32x2_t test_vcvt_s32_f32(float32x2_t a) {
+  //CHECK: test_vcvt_s32_f32
+  return vcvt_s32_f32(a);
+  // CHECK: fcvtzs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+}
+
+int32x4_t test_vcvtq_s32_f32(float32x4_t a) {
+  //CHECK: test_vcvtq_s32_f32
+  return vcvtq_s32_f32(a);
+  // CHECK: fcvtzs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+}
+
+int64x2_t test_vcvtq_s64_f64(float64x2_t a) {
+  //CHECK: test_vcvtq_s64_f64
+  return vcvtq_s64_f64(a);
+  // CHECK: fcvtzs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+}
+
+uint32x2_t test_vcvt_u32_f32(float32x2_t a) {
+  //CHECK: test_vcvt_u32_f32
+  return vcvt_u32_f32(a);
+  // CHECK: fcvtzu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+}
+
+uint32x4_t test_vcvtq_u32_f32(float32x4_t a) {
+  //CHECK: test_vcvtq_u32_f32
+  return vcvtq_u32_f32(a);
+  // CHECK: fcvtzu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+}
+
+uint64x2_t test_vcvtq_u64_f64(float64x2_t a) {
+  //CHECK: test_vcvtq_u64_f64
+  return vcvtq_u64_f64(a);
+  // CHECK: fcvtzu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+}
+
+int32x2_t test_vcvtn_s32_f32(float32x2_t a) {
+  //CHECK: test_vcvtn_s32_f32
+  return vcvtn_s32_f32(a);
+  // CHECK: fcvtns v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+}
+
+int32x4_t test_vcvtnq_s32_f32(float32x4_t a) {
+  //CHECK: test_vcvtnq_s32_f32
+  return vcvtnq_s32_f32(a);
+  // CHECK: fcvtns v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+}
+
+int64x2_t test_vcvtnq_s64_f64(float64x2_t a) {
+  //CHECK: test_vcvtnq_s64_f64
+  return vcvtnq_s64_f64(a);
+  // CHECK: fcvtns v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+}
+
+uint32x2_t test_vcvtn_u32_f32(float32x2_t a) {
+  //CHECK: test_vcvtn_u32_f32
+  return vcvtn_u32_f32(a);
+  // CHECK: fcvtnu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+}
+
+uint32x4_t test_vcvtnq_u32_f32(float32x4_t a) {
+  //CHECK: test_vcvtnq_u32_f32
+  return vcvtnq_u32_f32(a);
+  // CHECK: fcvtnu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+}
+
+uint64x2_t test_vcvtnq_u64_f64(float64x2_t a) {
+  //CHECK: test_vcvtnq_u64_f64
+  return vcvtnq_u64_f64(a);
+  // CHECK: fcvtnu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+}
+
+int32x2_t test_vcvtp_s32_f32(float32x2_t a) {
+  //CHECK: test_vcvtp_s32_f32
+  return vcvtp_s32_f32(a);
+  // CHECK: fcvtps v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+}
+
+int32x4_t test_vcvtpq_s32_f32(float32x4_t a) {
+  //CHECK: test_vcvtpq_s32_f32
+  return vcvtpq_s32_f32(a);
+  // CHECK: fcvtps v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+}
+
+int64x2_t test_vcvtpq_s64_f64(float64x2_t a) {
+  //CHECK: test_vcvtpq_s64_f64
+  return vcvtpq_s64_f64(a);
+  // CHECK: fcvtps v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+}
+
+uint32x2_t test_vcvtp_u32_f32(float32x2_t a) {
+  //CHECK: test_vcvtp_u32_f32
+  return vcvtp_u32_f32(a);
+  // CHECK: fcvtpu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+}
+
+uint32x4_t test_vcvtpq_u32_f32(float32x4_t a) {
+  //CHECK: test_vcvtpq_u32_f32
+  return vcvtpq_u32_f32(a);
+  // CHECK: fcvtpu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+}
+
+uint64x2_t test_vcvtpq_u64_f64(float64x2_t a) {
+  //CHECK: test_vcvtpq_u64_f64
+  return vcvtpq_u64_f64(a);
+  // CHECK: fcvtpu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+}
+
+int32x2_t test_vcvtm_s32_f32(float32x2_t a) {
+  //CHECK: test_vcvtm_s32_f32
+  return vcvtm_s32_f32(a);
+  // CHECK: fcvtms v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+}
+
+int32x4_t test_vcvtmq_s32_f32(float32x4_t a) {
+  //CHECK: test_vcvtmq_s32_f32
+  return vcvtmq_s32_f32(a);
+  // CHECK: fcvtms v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+}
+
+int64x2_t test_vcvtmq_s64_f64(float64x2_t a) {
+  //CHECK: test_vcvtmq_s64_f64
+  return vcvtmq_s64_f64(a);
+  // CHECK: fcvtms v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+}
+
+uint32x2_t test_vcvtm_u32_f32(float32x2_t a) {
+  //CHECK: test_vcvtm_u32_f32
+  return vcvtm_u32_f32(a);
+  // CHECK: fcvtmu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+}
+
+uint32x4_t test_vcvtmq_u32_f32(float32x4_t a) {
+  //CHECK: test_vcvtmq_u32_f32
+  return vcvtmq_u32_f32(a);
+  // CHECK: fcvtmu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+}
+
+uint64x2_t test_vcvtmq_u64_f64(float64x2_t a) {
+  //CHECK: test_vcvtmq_u64_f64
+  return vcvtmq_u64_f64(a);
+  // CHECK: fcvtmu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+}
+
+int32x2_t test_vcvta_s32_f32(float32x2_t a) {
+  //CHECK: test_vcvta_s32_f32
+  return vcvta_s32_f32(a);
+  // CHECK: fcvtas v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+}
+
+int32x4_t test_vcvtaq_s32_f32(float32x4_t a) {
+  //CHECK: test_vcvtaq_s32_f32
+  return vcvtaq_s32_f32(a);
+  // CHECK: fcvtas v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+}
+
+int64x2_t test_vcvtaq_s64_f64(float64x2_t a) {
+  //CHECK: test_vcvtaq_s64_f64
+  return vcvtaq_s64_f64(a);
+  // CHECK: fcvtas v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+}
+
+uint32x2_t test_vcvta_u32_f32(float32x2_t a) {
+  //CHECK: test_vcvta_u32_f32
+  return vcvta_u32_f32(a);
+  // CHECK: fcvtau v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+}
+
+uint32x4_t test_vcvtaq_u32_f32(float32x4_t a) {
+  //CHECK: test_vcvtaq_u32_f32
+  return vcvtaq_u32_f32(a);
+  // CHECK: fcvtau v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+}
+
+uint64x2_t test_vcvtaq_u64_f64(float64x2_t a) {
+  //CHECK: test_vcvtaq_u64_f64
+  return vcvtaq_u64_f64(a);
+  // CHECK: fcvtau v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+}
+
+float32x2_t test_vrsqrte_f32(float32x2_t a) {
+  //CHECK: test_vrsqrte_f32
+  return vrsqrte_f32(a);
+  // CHECK: frsqrte v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+}
+
+float32x4_t test_vrsqrteq_f32(float32x4_t a) {
+  //CHECK: test_vrsqrteq_f32
+  return vrsqrteq_f32(a);
+  // CHECK: frsqrte v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+}
+
+float64x2_t test_vrsqrteq_f64(float64x2_t a) {
+  //CHECK: test_vrsqrteq_f64
+  return vrsqrteq_f64(a);
+  // CHECK: frsqrte v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+}
+
+float32x2_t test_vrecpe_f32(float32x2_t a) {
+  //CHECK: test_vrecpe_f32
+  return vrecpe_f32(a);
+  // CHECK: frecpe v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+}
+
+float32x4_t test_vrecpeq_f32(float32x4_t a) {
+  //CHECK: test_vrecpeq_f32
+  return vrecpeq_f32(a);
+  // CHECK: frecpe v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+}
+
+float64x2_t test_vrecpeq_f64(float64x2_t a) {
+  //CHECK: test_vrecpeq_f64
+  return vrecpeq_f64(a);
+  // CHECK: frecpe v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+}
+
+uint32x2_t test_vrecpe_u32(uint32x2_t a) {
+  //CHECK: test_vrecpe_u32
+  return vrecpe_u32(a);
+  // CHECK: urecpe v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+}
+
+uint32x4_t test_vrecpeq_u32(uint32x4_t a) {
+  //CHECK: test_vrecpeq_u32
+  return vrecpeq_u32(a);
+  // CHECK: urecpe v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+}
+
+float32x2_t test_vsqrt_f32(float32x2_t a) {
+  //CHECK: test_vsqrt_f32
+  return vsqrt_f32(a);
+  // CHECK: fsqrt v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+}
+
+float32x4_t test_vsqrtq_f32(float32x4_t a) {
+  //CHECK: test_vsqrtq_f32
+  return vsqrtq_f32(a);
+  // CHECK: fsqrt v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+}
+
+float64x2_t test_vsqrtq_f64(float64x2_t a) {
+  //CHECK: test_vsqrtq_f64
+  return vsqrtq_f64(a);
+  // CHECK: fsqrt v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+}
+
+float32x2_t test_vcvt_f32_s32(int32x2_t a) {
+  //CHECK: test_vcvt_f32_s32
+  return vcvt_f32_s32(a);
+  //CHECK: scvtf v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+}
+
+float32x2_t test_vcvt_f32_u32(uint32x2_t a) {
+  //CHECK: test_vcvt_f32_u32
+  return vcvt_f32_u32(a);
+  //CHECK: ucvtf v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+}
+
+float32x4_t test_vcvtq_f32_s32(int32x4_t a) {
+  //CHECK: test_vcvtq_f32_s32
+  return vcvtq_f32_s32(a);
+  //CHECK: scvtf v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+}
+
+float32x4_t test_vcvtq_f32_u32(uint32x4_t a) {
+  //CHECK: test_vcvtq_f32_u32
+  return vcvtq_f32_u32(a);
+  //CHECK: ucvtf v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+}
+
+float64x2_t test_vcvtq_f64_s64(int64x2_t a) {
+  //CHECK: test_vcvtq_f64_s64
+  return vcvtq_f64_s64(a);
+  //CHECK: scvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+}
+
+float64x2_t test_vcvtq_f64_u64(uint64x2_t a) {
+  //CHECK: test_vcvtq_f64_u64
+  return vcvtq_f64_u64(a);
+  //CHECK: ucvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+}
diff --git a/test/CodeGen/aarch64-neon-perm.c b/test/CodeGen/aarch64-neon-perm.c
new file mode 100644
index 0000000..903570b
--- /dev/null
+++ b/test/CodeGen/aarch64-neon-perm.c
@@ -0,0 +1,1093 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
+// RUN:   -ffp-contract=fast -S -O3 -o - %s | FileCheck %s
+
+// Test new aarch64 intrinsics and types
+
+#include <arm_neon.h>
+
+int8x8_t test_vuzp1_s8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vuzp1_s8
+  return vuzp1_s8(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int8x16_t test_vuzp1q_s8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vuzp1q_s8
+  return vuzp1q_s8(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x4_t test_vuzp1_s16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vuzp1_s16
+  return vuzp1_s16(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int16x8_t test_vuzp1q_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vuzp1q_s16
+  return vuzp1q_s16(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x2_t test_vuzp1_s32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vuzp1_s32
+  return vuzp1_s32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vuzp1q_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vuzp1q_s32
+  return vuzp1q_s32(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int64x2_t test_vuzp1q_s64(int64x2_t a, int64x2_t b) {
+  // CHECK: test_vuzp1q_s64
+  return vuzp1q_s64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+uint8x8_t test_vuzp1_u8(uint8x8_t a, uint8x8_t b) {
+  // CHECK: test_vuzp1_u8
+  return vuzp1_u8(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint8x16_t test_vuzp1q_u8(uint8x16_t a, uint8x16_t b) {
+  // CHECK: test_vuzp1q_u8
+  return vuzp1q_u8(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x4_t test_vuzp1_u16(uint16x4_t a, uint16x4_t b) {
+  // CHECK: test_vuzp1_u16
+  return vuzp1_u16(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint16x8_t test_vuzp1q_u16(uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vuzp1q_u16
+  return vuzp1q_u16(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x2_t test_vuzp1_u32(uint32x2_t a, uint32x2_t b) {
+  // CHECK: test_vuzp1_u32
+  return vuzp1_u32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+}
+
+uint32x4_t test_vuzp1q_u32(uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vuzp1q_u32
+  return vuzp1q_u32(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vuzp1q_u64(uint64x2_t a, uint64x2_t b) {
+  // CHECK: test_vuzp1q_u64
+  return vuzp1q_u64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+float32x2_t test_vuzp1_f32(float32x2_t a, float32x2_t b) {
+  // CHECK: test_vuzp1_f32
+  return vuzp1_f32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+}
+
+float32x4_t test_vuzp1q_f32(float32x4_t a, float32x4_t b) {
+  // CHECK: test_vuzp1q_f32
+  return vuzp1q_f32(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vuzp1q_f64(float64x2_t a, float64x2_t b) {
+  // CHECK: test_vuzp1q_f64
+  return vuzp1q_f64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+poly8x8_t test_vuzp1_p8(poly8x8_t a, poly8x8_t b) {
+  // CHECK: test_vuzp1_p8
+  return vuzp1_p8(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+poly8x16_t test_vuzp1q_p8(poly8x16_t a, poly8x16_t b) {
+  // CHECK: test_vuzp1q_p8
+  return vuzp1q_p8(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+poly16x4_t test_vuzp1_p16(poly16x4_t a, poly16x4_t b) {
+  // CHECK: test_vuzp1_p16
+  return vuzp1_p16(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+poly16x8_t test_vuzp1q_p16(poly16x8_t a, poly16x8_t b) {
+  // CHECK: test_vuzp1q_p16
+  return vuzp1q_p16(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int8x8_t test_vuzp2_s8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vuzp2_s8
+  return vuzp2_s8(a, b);
+  // CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int8x16_t test_vuzp2q_s8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vuzp2q_s8
+  return vuzp2q_s8(a, b);
+  // CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x4_t test_vuzp2_s16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vuzp2_s16
+  return vuzp2_s16(a, b);
+  // CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int16x8_t test_vuzp2q_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vuzp2q_s16
+  return vuzp2q_s16(a, b);
+  // CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x2_t test_vuzp2_s32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vuzp2_s32
+  return vuzp2_s32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+int32x4_t test_vuzp2q_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vuzp2q_s32
+  return vuzp2q_s32(a, b);
+  // CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int64x2_t test_vuzp2q_s64(int64x2_t a, int64x2_t b) {
+  // CHECK: test_vuzp2q_s64
+  return vuzp2q_s64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+}
+
+uint8x8_t test_vuzp2_u8(uint8x8_t a, uint8x8_t b) {
+  // CHECK: test_vuzp2_u8
+  return vuzp2_u8(a, b);
+  // CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint8x16_t test_vuzp2q_u8(uint8x16_t a, uint8x16_t b) {
+  // CHECK: test_vuzp2q_u8
+  return vuzp2q_u8(a, b);
+  // CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x4_t test_vuzp2_u16(uint16x4_t a, uint16x4_t b) {
+  // CHECK: test_vuzp2_u16
+  return vuzp2_u16(a, b);
+  // CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint16x8_t test_vuzp2q_u16(uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vuzp2q_u16
+  return vuzp2q_u16(a, b);
+  // CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x2_t test_vuzp2_u32(uint32x2_t a, uint32x2_t b) {
+  // CHECK: test_vuzp2_u32
+  return vuzp2_u32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+uint32x4_t test_vuzp2q_u32(uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vuzp2q_u32
+  return vuzp2q_u32(a, b);
+  // CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vuzp2q_u64(uint64x2_t a, uint64x2_t b) {
+  // CHECK: test_vuzp2q_u64
+  return vuzp2q_u64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+}
+
+float32x2_t test_vuzp2_f32(float32x2_t a, float32x2_t b) {
+  // CHECK: test_vuzp2_f32
+  return vuzp2_f32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+float32x4_t test_vuzp2q_f32(float32x4_t a, float32x4_t b) {
+  // CHECK: test_vuzp2q_f32
+  return vuzp2q_f32(a, b);
+  // CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vuzp2q_f64(float64x2_t a, float64x2_t b) {
+  // CHECK: test_vuzp2q_f64
+  return vuzp2q_f64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+}
+
+poly8x8_t test_vuzp2_p8(poly8x8_t a, poly8x8_t b) {
+  // CHECK: test_vuzp2_p8
+  return vuzp2_p8(a, b);
+  // CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+poly8x16_t test_vuzp2q_p8(poly8x16_t a, poly8x16_t b) {
+  // CHECK: test_vuzp2q_p8
+  return vuzp2q_p8(a, b);
+  // CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+poly16x4_t test_vuzp2_p16(poly16x4_t a, poly16x4_t b) {
+  // CHECK: test_vuzp2_p16
+  return vuzp2_p16(a, b);
+  // CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+poly16x8_t test_vuzp2q_p16(poly16x8_t a, poly16x8_t b) {
+  // CHECK: test_vuzp2q_p16
+  return vuzp2q_p16(a, b);
+  // CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int8x8_t test_vzip1_s8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vzip1_s8
+  return vzip1_s8(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int8x16_t test_vzip1q_s8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vzip1q_s8
+  return vzip1q_s8(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x4_t test_vzip1_s16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vzip1_s16
+  return vzip1_s16(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int16x8_t test_vzip1q_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vzip1q_s16
+  return vzip1q_s16(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x2_t test_vzip1_s32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vzip1_s32
+  return vzip1_s32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vzip1q_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vzip1q_s32
+  return vzip1q_s32(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int64x2_t test_vzip1q_s64(int64x2_t a, int64x2_t b) {
+  // CHECK: test_vzip1q_s64
+  return vzip1q_s64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+uint8x8_t test_vzip1_u8(uint8x8_t a, uint8x8_t b) {
+  // CHECK: test_vzip1_u8
+  return vzip1_u8(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint8x16_t test_vzip1q_u8(uint8x16_t a, uint8x16_t b) {
+  // CHECK: test_vzip1q_u8
+  return vzip1q_u8(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x4_t test_vzip1_u16(uint16x4_t a, uint16x4_t b) {
+  // CHECK: test_vzip1_u16
+  return vzip1_u16(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint16x8_t test_vzip1q_u16(uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vzip1q_u16
+  return vzip1q_u16(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x2_t test_vzip1_u32(uint32x2_t a, uint32x2_t b) {
+  // CHECK: test_vzip1_u32
+  return vzip1_u32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+}
+
+uint32x4_t test_vzip1q_u32(uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vzip1q_u32
+  return vzip1q_u32(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vzip1q_u64(uint64x2_t a, uint64x2_t b) {
+  // CHECK: test_vzip1q_u64
+  return vzip1q_u64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+float32x2_t test_vzip1_f32(float32x2_t a, float32x2_t b) {
+  // CHECK: test_vzip1_f32
+  return vzip1_f32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+}
+
+float32x4_t test_vzip1q_f32(float32x4_t a, float32x4_t b) {
+  // CHECK: test_vzip1q_f32
+  return vzip1q_f32(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vzip1q_f64(float64x2_t a, float64x2_t b) {
+  // CHECK: test_vzip1q_f64
+  return vzip1q_f64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+poly8x8_t test_vzip1_p8(poly8x8_t a, poly8x8_t b) {
+  // CHECK: test_vzip1_p8
+  return vzip1_p8(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+poly8x16_t test_vzip1q_p8(poly8x16_t a, poly8x16_t b) {
+  // CHECK: test_vzip1q_p8
+  return vzip1q_p8(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+poly16x4_t test_vzip1_p16(poly16x4_t a, poly16x4_t b) {
+  // CHECK: test_vzip1_p16
+  return vzip1_p16(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+poly16x8_t test_vzip1q_p16(poly16x8_t a, poly16x8_t b) {
+  // CHECK: test_vzip1q_p16
+  return vzip1q_p16(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int8x8_t test_vzip2_s8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vzip2_s8
+  return vzip2_s8(a, b);
+  // CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int8x16_t test_vzip2q_s8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vzip2q_s8
+  return vzip2q_s8(a, b);
+  // CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x4_t test_vzip2_s16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vzip2_s16
+  return vzip2_s16(a, b);
+  // CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int16x8_t test_vzip2q_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vzip2q_s16
+  return vzip2q_s16(a, b);
+  // CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x2_t test_vzip2_s32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vzip2_s32
+  return vzip2_s32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+int32x4_t test_vzip2q_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vzip2q_s32
+  return vzip2q_s32(a, b);
+  // CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int64x2_t test_vzip2q_s64(int64x2_t a, int64x2_t b) {
+  // CHECK: test_vzip2q_s64
+  return vzip2q_s64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+}
+
+uint8x8_t test_vzip2_u8(uint8x8_t a, uint8x8_t b) {
+  // CHECK: test_vzip2_u8
+  return vzip2_u8(a, b);
+  // CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint8x16_t test_vzip2q_u8(uint8x16_t a, uint8x16_t b) {
+  // CHECK: test_vzip2q_u8
+  return vzip2q_u8(a, b);
+  // CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x4_t test_vzip2_u16(uint16x4_t a, uint16x4_t b) {
+  // CHECK: test_vzip2_u16
+  return vzip2_u16(a, b);
+  // CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint16x8_t test_vzip2q_u16(uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vzip2q_u16
+  return vzip2q_u16(a, b);
+  // CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x2_t test_vzip2_u32(uint32x2_t a, uint32x2_t b) {
+  // CHECK: test_vzip2_u32
+  return vzip2_u32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+uint32x4_t test_vzip2q_u32(uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vzip2q_u32
+  return vzip2q_u32(a, b);
+  // CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vzip2q_u64(uint64x2_t a, uint64x2_t b) {
+  // CHECK: test_vzip2q_u64
+  return vzip2q_u64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+}
+
+float32x2_t test_vzip2_f32(float32x2_t a, float32x2_t b) {
+  // CHECK: test_vzip2_f32
+  return vzip2_f32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+float32x4_t test_vzip2q_f32(float32x4_t a, float32x4_t b) {
+  // CHECK: test_vzip2q_f32
+  return vzip2q_f32(a, b);
+  // CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vzip2q_f64(float64x2_t a, float64x2_t b) {
+  // CHECK: test_vzip2q_f64
+  return vzip2q_f64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+}
+
+poly8x8_t test_vzip2_p8(poly8x8_t a, poly8x8_t b) {
+  // CHECK: test_vzip2_p8
+  return vzip2_p8(a, b);
+  // CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+poly8x16_t test_vzip2q_p8(poly8x16_t a, poly8x16_t b) {
+  // CHECK: test_vzip2q_p8
+  return vzip2q_p8(a, b);
+  // CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+poly16x4_t test_vzip2_p16(poly16x4_t a, poly16x4_t b) {
+  // CHECK: test_vzip2_p16
+  return vzip2_p16(a, b);
+  // CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+poly16x8_t test_vzip2q_p16(poly16x8_t a, poly16x8_t b) {
+  // CHECK: test_vzip2q_p16
+  return vzip2q_p16(a, b);
+  // CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int8x8_t test_vtrn1_s8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vtrn1_s8
+  return vtrn1_s8(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int8x16_t test_vtrn1q_s8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vtrn1q_s8
+  return vtrn1q_s8(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x4_t test_vtrn1_s16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vtrn1_s16
+  return vtrn1_s16(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int16x8_t test_vtrn1q_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vtrn1q_s16
+  return vtrn1q_s16(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x2_t test_vtrn1_s32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vtrn1_s32
+  return vtrn1_s32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+}
+
+int32x4_t test_vtrn1q_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vtrn1q_s32
+  return vtrn1q_s32(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int64x2_t test_vtrn1q_s64(int64x2_t a, int64x2_t b) {
+  // CHECK: test_vtrn1q_s64
+  return vtrn1q_s64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+uint8x8_t test_vtrn1_u8(uint8x8_t a, uint8x8_t b) {
+  // CHECK: test_vtrn1_u8
+  return vtrn1_u8(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint8x16_t test_vtrn1q_u8(uint8x16_t a, uint8x16_t b) {
+  // CHECK: test_vtrn1q_u8
+  return vtrn1q_u8(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x4_t test_vtrn1_u16(uint16x4_t a, uint16x4_t b) {
+  // CHECK: test_vtrn1_u16
+  return vtrn1_u16(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint16x8_t test_vtrn1q_u16(uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vtrn1q_u16
+  return vtrn1q_u16(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x2_t test_vtrn1_u32(uint32x2_t a, uint32x2_t b) {
+  // CHECK: test_vtrn1_u32
+  return vtrn1_u32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+}
+
+uint32x4_t test_vtrn1q_u32(uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vtrn1q_u32
+  return vtrn1q_u32(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vtrn1q_u64(uint64x2_t a, uint64x2_t b) {
+  // CHECK: test_vtrn1q_u64
+  return vtrn1q_u64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+float32x2_t test_vtrn1_f32(float32x2_t a, float32x2_t b) {
+  // CHECK: test_vtrn1_f32
+  return vtrn1_f32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+}
+
+float32x4_t test_vtrn1q_f32(float32x4_t a, float32x4_t b) {
+  // CHECK: test_vtrn1q_f32
+  return vtrn1q_f32(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vtrn1q_f64(float64x2_t a, float64x2_t b) {
+  // CHECK: test_vtrn1q_f64
+  return vtrn1q_f64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+poly8x8_t test_vtrn1_p8(poly8x8_t a, poly8x8_t b) {
+  // CHECK: test_vtrn1_p8
+  return vtrn1_p8(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+poly8x16_t test_vtrn1q_p8(poly8x16_t a, poly8x16_t b) {
+  // CHECK: test_vtrn1q_p8
+  return vtrn1q_p8(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+poly16x4_t test_vtrn1_p16(poly16x4_t a, poly16x4_t b) {
+  // CHECK: test_vtrn1_p16
+  return vtrn1_p16(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+poly16x8_t test_vtrn1q_p16(poly16x8_t a, poly16x8_t b) {
+  // CHECK: test_vtrn1q_p16
+  return vtrn1q_p16(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int8x8_t test_vtrn2_s8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vtrn2_s8
+  return vtrn2_s8(a, b);
+  // CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int8x16_t test_vtrn2q_s8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vtrn2q_s8
+  return vtrn2q_s8(a, b);
+  // CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x4_t test_vtrn2_s16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vtrn2_s16
+  return vtrn2_s16(a, b);
+  // CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int16x8_t test_vtrn2q_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vtrn2q_s16
+  return vtrn2q_s16(a, b);
+  // CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x2_t test_vtrn2_s32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vtrn2_s32
+  return vtrn2_s32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+int32x4_t test_vtrn2q_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vtrn2q_s32
+  return vtrn2q_s32(a, b);
+  // CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int64x2_t test_vtrn2q_s64(int64x2_t a, int64x2_t b) {
+  // CHECK: test_vtrn2q_s64
+  return vtrn2q_s64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+}
+
+uint8x8_t test_vtrn2_u8(uint8x8_t a, uint8x8_t b) {
+  // CHECK: test_vtrn2_u8
+  return vtrn2_u8(a, b);
+  // CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint8x16_t test_vtrn2q_u8(uint8x16_t a, uint8x16_t b) {
+  // CHECK: test_vtrn2q_u8
+  return vtrn2q_u8(a, b);
+  // CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x4_t test_vtrn2_u16(uint16x4_t a, uint16x4_t b) {
+  // CHECK: test_vtrn2_u16
+  return vtrn2_u16(a, b);
+  // CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint16x8_t test_vtrn2q_u16(uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vtrn2q_u16
+  return vtrn2q_u16(a, b);
+  // CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x2_t test_vtrn2_u32(uint32x2_t a, uint32x2_t b) {
+  // CHECK: test_vtrn2_u32
+  return vtrn2_u32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+uint32x4_t test_vtrn2q_u32(uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vtrn2q_u32
+  return vtrn2q_u32(a, b);
+  // CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vtrn2q_u64(uint64x2_t a, uint64x2_t b) {
+  // CHECK: test_vtrn2q_u64
+  return vtrn2q_u64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+}
+
+float32x2_t test_vtrn2_f32(float32x2_t a, float32x2_t b) {
+  // CHECK: test_vtrn2_f32
+  return vtrn2_f32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+float32x4_t test_vtrn2q_f32(float32x4_t a, float32x4_t b) {
+  // CHECK: test_vtrn2q_f32
+  return vtrn2q_f32(a, b);
+  // CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vtrn2q_f64(float64x2_t a, float64x2_t b) {
+  // CHECK: test_vtrn2q_f64
+  return vtrn2q_f64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+}
+
+poly8x8_t test_vtrn2_p8(poly8x8_t a, poly8x8_t b) {
+  // CHECK: test_vtrn2_p8
+  return vtrn2_p8(a, b);
+  // CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+poly8x16_t test_vtrn2q_p8(poly8x16_t a, poly8x16_t b) {
+  // CHECK: test_vtrn2q_p8
+  return vtrn2q_p8(a, b);
+  // CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+poly16x4_t test_vtrn2_p16(poly16x4_t a, poly16x4_t b) {
+  // CHECK: test_vtrn2_p16
+  return vtrn2_p16(a, b);
+  // CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+poly16x8_t test_vtrn2q_p16(poly16x8_t a, poly16x8_t b) {
+  // CHECK: test_vtrn2q_p16
+  return vtrn2q_p16(a, b);
+  // CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int8x8x2_t test_vuzp_s8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vuzp_s8
+  return vuzp_s8(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  // CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4x2_t test_vuzp_s16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vuzp_s16
+  return vuzp_s16(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+  // CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+int32x2x2_t test_vuzp_s32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vuzp_s32
+  return vuzp_s32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+uint8x8x2_t test_vuzp_u8(uint8x8_t a, uint8x8_t b) {
+  // CHECK: test_vuzp_u8
+  return vuzp_u8(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  // CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) {
+  // CHECK: test_vuzp_u16
+  return vuzp_u16(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+  // CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+uint32x2x2_t test_vuzp_u32(uint32x2_t a, uint32x2_t b) {
+  // CHECK: test_vuzp_u32
+  return vuzp_u32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+float32x2x2_t test_vuzp_f32(float32x2_t a, float32x2_t b) {
+  // CHECK: test_vuzp_f32
+  return vuzp_f32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+poly8x8x2_t test_vuzp_p8(poly8x8_t a, poly8x8_t b) {
+  // CHECK: test_vuzp_p8
+  return vuzp_p8(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  // CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+poly16x4x2_t test_vuzp_p16(poly16x4_t a, poly16x4_t b) {
+  // CHECK: test_vuzp_p16
+  return vuzp_p16(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+  // CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+int8x16x2_t test_vuzpq_s8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vuzpq_s8
+  return vuzpq_s8(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+  // CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vuzpq_s16
+  return vuzpq_s16(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+  // CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+int32x4x2_t test_vuzpq_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vuzpq_s32
+  return vuzpq_s32(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+  // CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) {
+  // CHECK: test_vuzpq_u8
+  return vuzpq_u8(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+  // CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vuzpq_u16
+  return vuzpq_u16(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+  // CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vuzpq_u32
+  return vuzpq_u32(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+  // CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) {
+  // CHECK: test_vuzpq_f32
+  return vuzpq_f32(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+  // CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+poly8x16x2_t test_vuzpq_p8(poly8x16_t a, poly8x16_t b) {
+  // CHECK: test_vuzpq_p8
+  return vuzpq_p8(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+  // CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+poly16x8x2_t test_vuzpq_p16(poly16x8_t a, poly16x8_t b) {
+  // CHECK: test_vuzpq_p16
+  return vuzpq_p16(a, b);
+  // CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+  // CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int8x8x2_t test_vzip_s8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vzip_s8
+  return vzip_s8(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  // CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4x2_t test_vzip_s16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vzip_s16
+  return vzip_s16(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+  // CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+int32x2x2_t test_vzip_s32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vzip_s32
+  return vzip_s32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+uint8x8x2_t test_vzip_u8(uint8x8_t a, uint8x8_t b) {
+  // CHECK: test_vzip_u8
+  return vzip_u8(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  // CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) {
+  // CHECK: test_vzip_u16
+  return vzip_u16(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+  // CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+uint32x2x2_t test_vzip_u32(uint32x2_t a, uint32x2_t b) {
+  // CHECK: test_vzip_u32
+  return vzip_u32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+float32x2x2_t test_vzip_f32(float32x2_t a, float32x2_t b) {
+  // CHECK: test_vzip_f32
+  return vzip_f32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+poly8x8x2_t test_vzip_p8(poly8x8_t a, poly8x8_t b) {
+  // CHECK: test_vzip_p8
+  return vzip_p8(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  // CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+poly16x4x2_t test_vzip_p16(poly16x4_t a, poly16x4_t b) {
+  // CHECK: test_vzip_p16
+  return vzip_p16(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+  // CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+int8x16x2_t test_vzipq_s8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vzipq_s8
+  return vzipq_s8(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+  // CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vzipq_s16
+  return vzipq_s16(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+  // CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+int32x4x2_t test_vzipq_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vzipq_s32
+  return vzipq_s32(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+  // CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) {
+  // CHECK: test_vzipq_u8
+  return vzipq_u8(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+  // CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vzipq_u16
+  return vzipq_u16(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+  // CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vzipq_u32
+  return vzipq_u32(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+  // CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) {
+  // CHECK: test_vzipq_f32
+  return vzipq_f32(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+  // CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+poly8x16x2_t test_vzipq_p8(poly8x16_t a, poly8x16_t b) {
+  // CHECK: test_vzipq_p8
+  return vzipq_p8(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+  // CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+poly16x8x2_t test_vzipq_p16(poly16x8_t a, poly16x8_t b) {
+  // CHECK: test_vzipq_p16
+  return vzipq_p16(a, b);
+  // CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+  // CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int8x8x2_t test_vtrn_s8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vtrn_s8
+  return vtrn_s8(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  // CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) {
+  // CHECK: test_vtrn_s16
+  return vtrn_s16(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+  // CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) {
+  // CHECK: test_vtrn_s32
+  return vtrn_s32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) {
+  // CHECK: test_vtrn_u8
+  return vtrn_u8(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  // CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) {
+  // CHECK: test_vtrn_u16
+  return vtrn_u16(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+  // CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) {
+  // CHECK: test_vtrn_u32
+  return vtrn_u32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) {
+  // CHECK: test_vtrn_f32
+  return vtrn_f32(a, b);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) {
+  // CHECK: test_vtrn_p8
+  return vtrn_p8(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  // CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) {
+  // CHECK: test_vtrn_p16
+  return vtrn_p16(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+  // CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vtrnq_s8
+  return vtrnq_s8(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+  // CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) {
+  // CHECK: test_vtrnq_s16
+  return vtrnq_s16(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+  // CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) {
+  // CHECK: test_vtrnq_s32
+  return vtrnq_s32(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+  // CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) {
+  // CHECK: test_vtrnq_u8
+  return vtrnq_u8(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+  // CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) {
+  // CHECK: test_vtrnq_u16
+  return vtrnq_u16(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+  // CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) {
+  // CHECK: test_vtrnq_u32
+  return vtrnq_u32(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+  // CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) {
+  // CHECK: test_vtrnq_f32
+  return vtrnq_f32(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+  // CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) {
+  // CHECK: test_vtrnq_p8
+  return vtrnq_p8(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+  // CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+poly16x8x2_t test_vtrnq_p16(poly16x8_t a, poly16x8_t b) {
+  // CHECK: test_vtrnq_p16
+  return vtrnq_p16(a, b);
+  // CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+  // CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
diff --git a/test/CodeGen/aarch64-neon-scalar-copy.c b/test/CodeGen/aarch64-neon-scalar-copy.c
new file mode 100644
index 0000000..33e97c7
--- /dev/null
+++ b/test/CodeGen/aarch64-neon-scalar-copy.c
@@ -0,0 +1,173 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
+// RUN:   -ffp-contract=fast -S -O3 -o - %s | FileCheck %s
+
+
+#include <arm_neon.h>
+
+// CHECK: test_vdups_lane_f32
+float32_t test_vdups_lane_f32(float32x2_t a) {
+  return vdups_lane_f32(a, 1);
+// CHECK: ret
+// CHECK-NOT: dup {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+}
+
+
+// CHECK: test_vdupd_lane_f64
+float64_t test_vdupd_lane_f64(float64x1_t a) {
+  return vdupd_lane_f64(a, 0);
+// CHECK: ret
+// CHECK-NOT: dup {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+}
+
+
+// CHECK: test_vdups_laneq_f32
+float32_t test_vdups_laneq_f32(float32x4_t a) {
+  return vdups_laneq_f32(a, 3);
+// CHECK: ret
+// CHECK-NOT: dup {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+}
+
+
+// CHECK: test_vdupd_laneq_f64
+float64_t test_vdupd_laneq_f64(float64x2_t a) {
+  return vdupd_laneq_f64(a, 1);
+// CHECK: ret
+// CHECK-NOT: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+}
+
+
+// CHECK: test_vdupb_lane_s8
+int8_t test_vdupb_lane_s8(int8x8_t a) {
+  return vdupb_lane_s8(a, 7);
+// CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[7]
+}
+
+
+// CHECK: test_vduph_lane_s16
+int16_t test_vduph_lane_s16(int16x4_t a) {
+  return vduph_lane_s16(a, 3);
+// CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[3]
+}
+
+
+// CHECK: test_vdups_lane_s32
+int32_t test_vdups_lane_s32(int32x2_t a) {
+  return vdups_lane_s32(a, 1);
+// CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.s[1]
+}
+
+
+// CHECK: test_vdupd_lane_s64
+int64_t test_vdupd_lane_s64(int64x1_t a) {
+  return vdupd_lane_s64(a, 0);
+// CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
+}
+
+
+// CHECK: test_vdupb_lane_u8
+uint8_t test_vdupb_lane_u8(uint8x8_t a) {
+  return vdupb_lane_u8(a, 7);
+// CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[7]
+}
+
+
+// CHECK: test_vduph_lane_u16
+uint16_t test_vduph_lane_u16(uint16x4_t a) {
+  return vduph_lane_u16(a, 3);
+// CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[3]
+}
+
+
+// CHECK: test_vdups_lane_u32
+uint32_t test_vdups_lane_u32(uint32x2_t a) {
+  return vdups_lane_u32(a, 1);
+// CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.s[1]
+}
+
+
+// CHECK: test_vdupd_lane_u64
+uint64_t test_vdupd_lane_u64(uint64x1_t a) {
+  return vdupd_lane_u64(a, 0);
+// CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
+}
+
+// CHECK: test_vdupb_laneq_s8
+int8_t test_vdupb_laneq_s8(int8x16_t a) {
+  return vdupb_laneq_s8(a, 15);
+// CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[15]
+}
+
+
+// CHECK: test_vduph_laneq_s16
+int16_t test_vduph_laneq_s16(int16x8_t a) {
+  return vduph_laneq_s16(a, 7);
+// CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[7]
+}
+
+
+// CHECK: test_vdups_laneq_s32
+int32_t test_vdups_laneq_s32(int32x4_t a) {
+  return vdups_laneq_s32(a, 3);
+// CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.s[3]
+}
+
+
+// CHECK: test_vdupd_laneq_s64
+int64_t test_vdupd_laneq_s64(int64x2_t a) {
+  return vdupd_laneq_s64(a, 1);
+// CHECK: umov {{x[0-9]+}}, {{v[0-9]+}}.d[1]
+}
+
+
+// CHECK: test_vdupb_laneq_u8
+uint8_t test_vdupb_laneq_u8(uint8x16_t a) {
+  return vdupb_laneq_u8(a, 15);
+// CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[15]
+}
+
+
+// CHECK: test_vduph_laneq_u16
+uint16_t test_vduph_laneq_u16(uint16x8_t a) {
+  return vduph_laneq_u16(a, 7);
+// CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[7]
+}
+
+
+// CHECK: test_vdups_laneq_u32
+uint32_t test_vdups_laneq_u32(uint32x4_t a) {
+  return vdups_laneq_u32(a, 3);
+// CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.s[3]
+}
+
+
+// CHECK: test_vdupd_laneq_u64
+uint64_t test_vdupd_laneq_u64(uint64x2_t a) {
+  return vdupd_laneq_u64(a, 1);
+// CHECK: umov {{x[0-9]+}}, {{v[0-9]+}}.d[1]
+}
+
+// CHECK: test_vdupb_lane_p8
+poly8_t test_vdupb_lane_p8(poly8x8_t a) {
+  return vdupb_lane_p8(a, 7);
+// CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[7]
+}
+
+// CHECK: test_vduph_lane_p16
+poly16_t test_vduph_lane_p16(poly16x4_t a) {
+  return vduph_lane_p16(a, 3);
+// CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[3]
+}
+
+// CHECK: test_vdupb_laneq_p8
+poly8_t test_vdupb_laneq_p8(poly8x16_t a) {
+  return vdupb_laneq_p8(a, 15);
+// CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[15]
+}
+
+// CHECK: test_vduph_laneq_p16
+poly16_t test_vduph_laneq_p16(poly16x8_t a) {
+  return vduph_laneq_p16(a, 7);
+// CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[7]
+}
+
diff --git a/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c b/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c
new file mode 100644
index 0000000..4f0771a
--- /dev/null
+++ b/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c
@@ -0,0 +1,255 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
+// RUN:   -ffp-contract=fast -S -O3 -o - %s | FileCheck %s
+
+// Test new aarch64 intrinsics and types
+
+#include <arm_neon.h>
+
+
+float32_t test_vmuls_lane_f32(float32_t a, float32x2_t b) {
+  // CHECK: test_vmuls_lane_f32
+  return vmuls_lane_f32(a, b, 1);
+  // CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+}
+
+float64_t test_vmuld_lane_f64(float64_t a, float64x1_t b) {
+  // CHECK: test_vmuld_lane_f64
+  return vmuld_lane_f64(a, b, 0);
+  // CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+}
+
+float32_t test_vmuls_laneq_f32(float32_t a, float32x4_t b) {
+  // CHECK: test_vmuls_laneq_f32
+  return vmuls_laneq_f32(a, b, 3);
+  // CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+}
+
+float64_t test_vmuld_laneq_f64(float64_t a, float64x2_t b) {
+  // CHECK: test_vmuld_laneq_f64
+  return vmuld_laneq_f64(a, b, 1);
+  // CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+}
+
+float64x1_t test_vmul_n_f64(float64x1_t a, float64_t b) {
+  // CHECK: test_vmul_n_f64
+  return vmul_n_f64(a, b);
+  // CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+}
+
+float32_t test_vmulxs_lane_f32(float32_t a, float32x2_t b) {
+// CHECK: test_vmulxs_lane_f32
+  return vmulxs_lane_f32(a, b, 1);
+// CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+}
+
+float32_t test_vmulxs_laneq_f32(float32_t a, float32x4_t b) {
+// CHECK: test_vmulxs_laneq_f32
+  return vmulxs_laneq_f32(a, b, 3);
+// CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+}
+
+float64_t test_vmulxd_lane_f64(float64_t a, float64x1_t b) {
+// CHECK: test_vmulxd_lane_f64
+  return vmulxd_lane_f64(a, b, 0);
+// CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+}
+
+float64_t test_vmulxd_laneq_f64(float64_t a, float64x2_t b) {
+// CHECK: test_vmulxd_laneq_f64
+  return vmulxd_laneq_f64(a, b, 1);
+// CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+}
+
+// CHECK: test_vmulx_lane_f64
+float64x1_t test_vmulx_lane_f64(float64x1_t a, float64x1_t b) {
+  return vmulx_lane_f64(a, b, 0);
+  // CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+}
+
+
+// CHECK: test_vmulx_laneq_f64_0
+float64x1_t test_vmulx_laneq_f64_0(float64x1_t a, float64x2_t b) {
+  return vmulx_laneq_f64(a, b, 0);
+  // CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+}
+
+// CHECK: test_vmulx_laneq_f64_1
+float64x1_t test_vmulx_laneq_f64_1(float64x1_t a, float64x2_t b) {
+  return vmulx_laneq_f64(a, b, 1);
+  // CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+}
+
+
+// CHECK: test_vfmas_lane_f32
+float32_t test_vfmas_lane_f32(float32_t a, float32_t b, float32x2_t c) {
+  return vfmas_lane_f32(a, b, c, 1);
+  // CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+}
+
+// CHECK: test_vfmad_lane_f64
+float64_t test_vfmad_lane_f64(float64_t a, float64_t b, float64x1_t c) {
+  return vfmad_lane_f64(a, b, c, 0);
+  // CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+}
+
+// CHECK: test_vfmad_laneq_f64
+float64_t test_vfmad_laneq_f64(float64_t a, float64_t b, float64x2_t c) {
+  return vfmad_laneq_f64(a, b, c, 1);
+  // CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+}
+
+// CHECK: test_vfmss_lane_f32
+float32_t test_vfmss_lane_f32(float32_t a, float32_t b, float32x2_t c) {
+  return vfmss_lane_f32(a, b, c, 1);
+  // CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+}
+
+// CHECK: test_vfma_lane_f64
+float64x1_t test_vfma_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) {
+  return vfma_lane_f64(a, b, v, 0);
+  // CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+}
+
+// CHECK: test_vfms_lane_f64
+float64x1_t test_vfms_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) {
+  return vfms_lane_f64(a, b, v, 0);
+  // CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+}
+
+// CHECK: test_vfma_laneq_f64
+float64x1_t test_vfma_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) {
+  return vfma_laneq_f64(a, b, v, 0);
+  // CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+}
+
+// CHECK: test_vfms_laneq_f64
+float64x1_t test_vfms_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) {
+  return vfms_laneq_f64(a, b, v, 0);
+  // CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+}
+
+// CHECK: test_vqdmullh_lane_s16
+int32_t test_vqdmullh_lane_s16(int16_t a, int16x4_t b) {
+  return vqdmullh_lane_s16(a, b, 3);
+  // CHECK: sqdmull {{s[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[3]
+}
+
+// CHECK: test_vqdmulls_lane_s32
+int64_t test_vqdmulls_lane_s32(int32_t a, int32x2_t b) {
+  return vqdmulls_lane_s32(a, b, 1);
+  // CHECK: sqdmull {{d[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+}
+
+// CHECK: test_vqdmullh_laneq_s16
+int32_t test_vqdmullh_laneq_s16(int16_t a, int16x8_t b) {
+  return vqdmullh_laneq_s16(a, b, 7);
+  // CHECK: sqdmull {{s[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[7]
+}
+
+// CHECK: test_vqdmulls_laneq_s32
+int64_t test_vqdmulls_laneq_s32(int32_t a, int32x4_t b) {
+  return vqdmulls_laneq_s32(a, b, 3);
+  // CHECK: sqdmull {{d[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+}
+
+// CHECK: test_vqdmulhh_lane_s16
+int16_t test_vqdmulhh_lane_s16(int16_t a, int16x4_t b) {
+  return vqdmulhh_lane_s16(a, b, 3);
+// CHECK: sqdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[3]
+}
+
+// CHECK: test_vqdmulhs_lane_s32
+int32_t test_vqdmulhs_lane_s32(int32_t a, int32x2_t b) {
+  return vqdmulhs_lane_s32(a, b, 1);
+// CHECK: sqdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+}
+
+
+// CHECK: test_vqdmulhh_laneq_s16
+int16_t test_vqdmulhh_laneq_s16(int16_t a, int16x8_t b) {
+  return vqdmulhh_laneq_s16(a, b, 7);
+// CHECK: sqdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[7]
+}
+
+
+// CHECK: test_vqdmulhs_laneq_s32
+int32_t test_vqdmulhs_laneq_s32(int32_t a, int32x4_t b) {
+  return vqdmulhs_laneq_s32(a, b, 3);
+// CHECK: sqdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+}
+
+// CHECK: test_vqrdmulhh_lane_s16
+int16_t test_vqrdmulhh_lane_s16(int16_t a, int16x4_t b) {
+  return vqrdmulhh_lane_s16(a, b, 3);
+// CHECK: sqrdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[3]
+}
+
+// CHECK: test_vqrdmulhs_lane_s32
+int32_t test_vqrdmulhs_lane_s32(int32_t a, int32x2_t b) {
+  return vqrdmulhs_lane_s32(a, b, 1);
+// CHECK: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+}
+
+
+// CHECK: test_vqrdmulhh_laneq_s16
+int16_t test_vqrdmulhh_laneq_s16(int16_t a, int16x8_t b) {
+  return vqrdmulhh_laneq_s16(a, b, 7);
+// CHECK: sqrdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[7]
+}
+
+
+// CHECK: test_vqrdmulhs_laneq_s32
+int32_t test_vqrdmulhs_laneq_s32(int32_t a, int32x4_t b) {
+  return vqrdmulhs_laneq_s32(a, b, 3);
+// CHECK: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+}
+
+// CHECK: test_vqdmlalh_lane_s16
+int32_t test_vqdmlalh_lane_s16(int32_t a, int16_t b, int16x4_t c) {
+  return vqdmlalh_lane_s16(a, b, c, 3);
+// CHECK: sqdmlal {{s[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[3]
+}
+
+// CHECK: test_vqdmlals_lane_s32
+int64_t test_vqdmlals_lane_s32(int64_t a, int32_t b, int32x2_t c) {
+  return vqdmlals_lane_s32(a, b, c, 1);
+// CHECK: sqdmlal {{d[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+}
+
+// CHECK: test_vqdmlalh_laneq_s16
+int32_t test_vqdmlalh_laneq_s16(int32_t a, int16_t b, int16x8_t c) {
+  return vqdmlalh_laneq_s16(a, b, c, 7);
+// CHECK: sqdmlal {{s[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[7]
+}
+
+// CHECK: test_vqdmlals_laneq_s32
+int64_t test_vqdmlals_laneq_s32(int64_t a, int32_t b, int32x4_t c) {
+  return vqdmlals_laneq_s32(a, b, c, 3);
+// CHECK: sqdmlal {{d[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+}
+
+// CHECK: test_vqdmlslh_lane_s16
+int32_t test_vqdmlslh_lane_s16(int32_t a, int16_t b, int16x4_t c) {
+  return vqdmlslh_lane_s16(a, b, c, 3);
+// CHECK: sqdmlsl {{s[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[3]
+}
+
+// CHECK: test_vqdmlsls_lane_s32
+int64_t test_vqdmlsls_lane_s32(int64_t a, int32_t b, int32x2_t c) {
+  return vqdmlsls_lane_s32(a, b, c, 1);
+// CHECK: sqdmlsl {{d[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+}
+
+// CHECK: test_vqdmlslh_laneq_s16
+int32_t test_vqdmlslh_laneq_s16(int32_t a, int16_t b, int16x8_t c) {
+  return vqdmlslh_laneq_s16(a, b, c, 7);
+// CHECK: sqdmlsl {{s[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[7]
+}
+
+// CHECK: test_vqdmlsls_laneq_s32
+int64_t test_vqdmlsls_laneq_s32(int64_t a, int32_t b, int32x4_t c) {
+  return vqdmlsls_laneq_s32(a, b, c, 3);
+// CHECK: sqdmlsl {{d[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+}
+
diff --git a/test/CodeGen/aarch64-neon-shifts.c b/test/CodeGen/aarch64-neon-shifts.c
new file mode 100644
index 0000000..4777f18
--- /dev/null
+++ b/test/CodeGen/aarch64-neon-shifts.c
@@ -0,0 +1,43 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
+// RUN:   -ffp-contract=fast -S -emit-llvm -O1 -o - %s | FileCheck %s
+
+#include <arm_neon.h>
+
+uint8x8_t test_shift_vshr(uint8x8_t a) {
+  // CHECK-LABEL: test_shift_vshr
+  // CHECK: %{{.*}} = lshr <8 x i8> %a, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+  return vshr_n_u8(a, 5);
+}
+
+int8x8_t test_shift_vshr_smax(int8x8_t a) {
+  // CHECK-LABEL: test_shift_vshr_smax
+  // CHECK: %{{.*}} = ashr <8 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+  return vshr_n_s8(a, 8);
+}
+
+uint8x8_t test_shift_vshr_umax(uint8x8_t a) {
+  // CHECK-LABEL: test_shift_vshr_umax
+  // CHECK: ret <8 x i8> zeroinitializer
+  return vshr_n_u8(a, 8);
+}
+
+uint8x8_t test_shift_vsra(uint8x8_t a, uint8x8_t b) {
+  // CHECK-LABEL: test_shift_vsra
+  // CHECK: %[[SHR:.*]] = lshr <8 x i8> %b, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+  // CHECK: %{{.*}} = add <8 x i8> %[[SHR]], %a
+  return vsra_n_u8(a, b, 5);
+}
+
+int8x8_t test_shift_vsra_smax(int8x8_t a, int8x8_t b) {
+  // CHECK-LABEL: test_shift_vsra_smax
+  // CHECK: %[[SHR:.*]] = ashr <8 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+  // CHECK: %{{.*}} = add <8 x i8> %[[SHR]], %a
+  return vsra_n_s8(a, b, 8);
+}
+
+uint8x8_t test_shift_vsra_umax(uint8x8_t a, uint8x8_t b) {
+  // CHECK-LABEL: test_shift_vsra_umax
+  // CHECK: ret <8 x i8> %a
+  return vsra_n_u8(a, b, 8);
+}
diff --git a/test/CodeGen/aarch64-neon-tbl.c b/test/CodeGen/aarch64-neon-tbl.c
new file mode 100644
index 0000000..db78a7a
--- /dev/null
+++ b/test/CodeGen/aarch64-neon-tbl.c
@@ -0,0 +1,463 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
+// RUN:   -ffp-contract=fast -S -O3 -o - %s | FileCheck %s
+
+// Test new aarch64 intrinsics and types
+
+#include <arm_neon.h>
+
+int8x8_t test_vtbl1_s8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vtbl1_s8
+  return vtbl1_s8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbl1_s8(int8x16_t a, int8x8_t b) {
+  // CHECK: test_vqtbl1_s8
+  return vqtbl1_s8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vtbl2_s8(int8x8x2_t a, int8x8_t b) {
+  // CHECK: test_vtbl2_s8
+  return vtbl2_s8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbl2_s8(int8x16x2_t a, int8x8_t b) {
+  // CHECK: test_vqtbl2_s8
+  return vqtbl2_s8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vtbl3_s8(int8x8x3_t a, int8x8_t b) {
+  // CHECK: test_vtbl3_s8
+  return vtbl3_s8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbl3_s8(int8x16x3_t a, int8x8_t b) {
+  // CHECK: test_vqtbl3_s8
+  return vqtbl3_s8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vtbl4_s8(int8x8x4_t a, int8x8_t b) {
+  // CHECK: test_vtbl4_s8
+  return vtbl4_s8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbl4_s8(int8x16x4_t a, int8x8_t b) {
+  // CHECK: test_vqtbl4_s8
+  return vqtbl4_s8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x16_t test_vqtbl1q_s8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vqtbl1q_s8
+  return vqtbl1q_s8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+int8x16_t test_vqtbl2q_s8(int8x16x2_t a, int8x16_t b) {
+  // CHECK: test_vqtbl2q_s8
+  return vqtbl2q_s8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+int8x16_t test_vqtbl3q_s8(int8x16x3_t a, int8x16_t b) {
+  // CHECK: test_vqtbl3q_s8
+  return vqtbl3q_s8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+int8x16_t test_vqtbl4q_s8(int8x16x4_t a, int8x16_t b) {
+  // CHECK: test_vqtbl4q_s8
+  return vqtbl4q_s8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+int8x8_t test_vtbx1_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
+  // CHECK: test_vtbx1_s8
+  return vtbx1_s8(a, b, c);
+  // CHECK: movi {{v[0-9]+}}.8b, #0
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+  // CHECK: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c) {
+  // CHECK: test_vtbx2_s8
+  return vtbx2_s8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vtbx3_s8(int8x8_t a, int8x8x3_t b, int8x8_t c) {
+  // CHECK: test_vtbx3_s8
+  return vtbx3_s8(a, b, c);
+  // CHECK: movi {{v[0-9]+}}.8b, #0
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+  // CHECK: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vtbx4_s8(int8x8_t a, int8x8x4_t b, int8x8_t c) {
+  // CHECK: test_vtbx4_s8
+  return vtbx4_s8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbx1_s8(int8x8_t a, int8x16_t b, int8x8_t c) {
+  // CHECK: test_vqtbx1_s8
+  return vqtbx1_s8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbx2_s8(int8x8_t a, int8x16x2_t b, int8x8_t c) {
+  // CHECK: test_vqtbx2_s8
+  return vqtbx2_s8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbx3_s8(int8x8_t a, int8x16x3_t b, int8x8_t c) {
+  // CHECK: test_vqtbx3_s8
+  return vqtbx3_s8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbx4_s8(int8x8_t a, int8x16x4_t b, int8x8_t c) {
+  // CHECK: test_vqtbx4_s8
+  return vqtbx4_s8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x16_t test_vqtbx1q_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
+  // CHECK: test_vqtbx1q_s8
+  return vqtbx1q_s8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+int8x16_t test_vqtbx2q_s8(int8x16_t a, int8x16x2_t b, int8x16_t c) {
+  // CHECK: test_vqtbx2q_s8
+  return vqtbx2q_s8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+int8x16_t test_vqtbx3q_s8(int8x16_t a, int8x16x3_t b, int8x16_t c) {
+  // CHECK: test_vqtbx3q_s8
+  return vqtbx3q_s8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+int8x16_t test_vqtbx4q_s8(int8x16_t a, int8x16x4_t b, int8x16_t c) {
+  // CHECK: test_vqtbx4q_s8
+  return vqtbx4q_s8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x8_t test_vtbl1_u8(uint8x8_t a, uint8x8_t b) {
+  // CHECK: test_vtbl1_u8
+  return vtbl1_u8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbl1_u8(uint8x16_t a, uint8x8_t b) {
+  // CHECK: test_vqtbl1_u8
+  return vqtbl1_u8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vtbl2_u8(uint8x8x2_t a, uint8x8_t b) {
+  // CHECK: test_vtbl2_u8
+  return vtbl2_u8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbl2_u8(uint8x16x2_t a, uint8x8_t b) {
+  // CHECK: test_vqtbl2_u8
+  return vqtbl2_u8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vtbl3_u8(uint8x8x3_t a, uint8x8_t b) {
+  // CHECK: test_vtbl3_u8
+  return vtbl3_u8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbl3_u8(uint8x16x3_t a, uint8x8_t b) {
+  // CHECK: test_vqtbl3_u8
+  return vqtbl3_u8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vtbl4_u8(uint8x8x4_t a, uint8x8_t b) {
+  // CHECK: test_vtbl4_u8
+  return vtbl4_u8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbl4_u8(uint8x16x4_t a, uint8x8_t b) {
+  // CHECK: test_vqtbl4_u8
+  return vqtbl4_u8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x16_t test_vqtbl1q_u8(uint8x16_t a, uint8x16_t b) {
+  // CHECK: test_vqtbl1q_u8
+  return vqtbl1q_u8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x16_t test_vqtbl2q_u8(uint8x16x2_t a, uint8x16_t b) {
+  // CHECK: test_vqtbl2q_u8
+  return vqtbl2q_u8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x16_t test_vqtbl3q_u8(uint8x16x3_t a, uint8x16_t b) {
+  // CHECK: test_vqtbl3q_u8
+  return vqtbl3q_u8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x16_t test_vqtbl4q_u8(uint8x16x4_t a, uint8x16_t b) {
+  // CHECK: test_vqtbl4q_u8
+  return vqtbl4q_u8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x8_t test_vtbx1_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
+  // CHECK: test_vtbx1_u8
+  return vtbx1_u8(a, b, c);
+  // CHECK: movi {{v[0-9]+}}.8b, #0
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+  // CHECK: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) {
+  // CHECK: test_vtbx2_u8
+  return vtbx2_u8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vtbx3_u8(uint8x8_t a, uint8x8x3_t b, uint8x8_t c) {
+  // CHECK: test_vtbx3_u8
+  return vtbx3_u8(a, b, c);
+  // CHECK: movi {{v[0-9]+}}.8b, #0
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+  // CHECK: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vtbx4_u8(uint8x8_t a, uint8x8x4_t b, uint8x8_t c) {
+  // CHECK: test_vtbx4_u8
+  return vtbx4_u8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbx1_u8(uint8x8_t a, uint8x16_t b, uint8x8_t c) {
+  // CHECK: test_vqtbx1_u8
+  return vqtbx1_u8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbx2_u8(uint8x8_t a, uint8x16x2_t b, uint8x8_t c) {
+  // CHECK: test_vqtbx2_u8
+  return vqtbx2_u8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbx3_u8(uint8x8_t a, uint8x16x3_t b, uint8x8_t c) {
+  // CHECK: test_vqtbx3_u8
+  return vqtbx3_u8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbx4_u8(uint8x8_t a, uint8x16x4_t b, uint8x8_t c) {
+  // CHECK: test_vqtbx4_u8
+  return vqtbx4_u8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x16_t test_vqtbx1q_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
+  // CHECK: test_vqtbx1q_u8
+  return vqtbx1q_u8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x16_t test_vqtbx2q_u8(uint8x16_t a, uint8x16x2_t b, uint8x16_t c) {
+  // CHECK: test_vqtbx2q_u8
+  return vqtbx2q_u8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x16_t test_vqtbx3q_u8(uint8x16_t a, uint8x16x3_t b, uint8x16_t c) {
+  // CHECK: test_vqtbx3q_u8
+  return vqtbx3q_u8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x16_t test_vqtbx4q_u8(uint8x16_t a, uint8x16x4_t b, uint8x16_t c) {
+  // CHECK: test_vqtbx4q_u8
+  return vqtbx4q_u8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x8_t test_vtbl1_p8(poly8x8_t a, uint8x8_t b) {
+  // CHECK: test_vtbl1_p8
+  return vtbl1_p8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbl1_p8(poly8x16_t a, uint8x8_t b) {
+  // CHECK: test_vqtbl1_p8
+  return vqtbl1_p8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vtbl2_p8(poly8x8x2_t a, uint8x8_t b) {
+  // CHECK: test_vtbl2_p8
+  return vtbl2_p8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbl2_p8(poly8x16x2_t a, uint8x8_t b) {
+  // CHECK: test_vqtbl2_p8
+  return vqtbl2_p8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vtbl3_p8(poly8x8x3_t a, uint8x8_t b) {
+  // CHECK: test_vtbl3_p8
+  return vtbl3_p8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbl3_p8(poly8x16x3_t a, uint8x8_t b) {
+  // CHECK: test_vqtbl3_p8
+  return vqtbl3_p8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vtbl4_p8(poly8x8x4_t a, uint8x8_t b) {
+  // CHECK: test_vtbl4_p8
+  return vtbl4_p8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbl4_p8(poly8x16x4_t a, uint8x8_t b) {
+  // CHECK: test_vqtbl4_p8
+  return vqtbl4_p8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x16_t test_vqtbl1q_p8(poly8x16_t a, uint8x16_t b) {
+  // CHECK: test_vqtbl1q_p8
+  return vqtbl1q_p8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x16_t test_vqtbl2q_p8(poly8x16x2_t a, uint8x16_t b) {
+  // CHECK: test_vqtbl2q_p8
+  return vqtbl2q_p8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x16_t test_vqtbl3q_p8(poly8x16x3_t a, uint8x16_t b) {
+  // CHECK: test_vqtbl3q_p8
+  return vqtbl3q_p8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x16_t test_vqtbl4q_p8(poly8x16x4_t a, uint8x16_t b) {
+  // CHECK: test_vqtbl4q_p8
+  return vqtbl4q_p8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x8_t test_vtbx1_p8(poly8x8_t a, poly8x8_t b, uint8x8_t c) {
+  // CHECK: test_vtbx1_p8
+  return vtbx1_p8(a, b, c);
+  // CHECK: movi {{v[0-9]+}}.8b, #0
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+  // CHECK: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c) {
+  // CHECK: test_vtbx2_p8
+  return vtbx2_p8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vtbx3_p8(poly8x8_t a, poly8x8x3_t b, uint8x8_t c) {
+  // CHECK: test_vtbx3_p8
+  return vtbx3_p8(a, b, c);
+  // CHECK: movi {{v[0-9]+}}.8b, #0
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+  // CHECK: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vtbx4_p8(poly8x8_t a, poly8x8x4_t b, uint8x8_t c) {
+  // CHECK: test_vtbx4_p8
+  return vtbx4_p8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbx1_p8(poly8x8_t a, uint8x16_t b, uint8x8_t c) {
+  // CHECK: test_vqtbx1_p8
+  return vqtbx1_p8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbx2_p8(poly8x8_t a, poly8x16x2_t b, uint8x8_t c) {
+  // CHECK: test_vqtbx2_p8
+  return vqtbx2_p8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbx3_p8(poly8x8_t a, poly8x16x3_t b, uint8x8_t c) {
+  // CHECK: test_vqtbx3_p8
+  return vqtbx3_p8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbx4_p8(poly8x8_t a, poly8x16x4_t b, uint8x8_t c) {
+  // CHECK: test_vqtbx4_p8
+  return vqtbx4_p8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x16_t test_vqtbx1q_p8(poly8x16_t a, uint8x16_t b, uint8x16_t c) {
+  // CHECK: test_vqtbx1q_p8
+  return vqtbx1q_p8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x16_t test_vqtbx2q_p8(poly8x16_t a, poly8x16x2_t b, uint8x16_t c) {
+  // CHECK: test_vqtbx2q_p8
+  return vqtbx2q_p8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x16_t test_vqtbx3q_p8(poly8x16_t a, poly8x16x3_t b, uint8x16_t c) {
+  // CHECK: test_vqtbx3q_p8
+  return vqtbx3q_p8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x16_t test_vqtbx4q_p8(poly8x16_t a, poly8x16x4_t b, uint8x16_t c) {
+  // CHECK: test_vqtbx4q_p8
+  return vqtbx4q_p8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
diff --git a/test/CodeGen/aarch64-neon-vcombine.c b/test/CodeGen/aarch64-neon-vcombine.c
new file mode 100644
index 0000000..3e170c8
--- /dev/null
+++ b/test/CodeGen/aarch64-neon-vcombine.c
@@ -0,0 +1,91 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
+// RUN:   -S -O3 -o - %s | FileCheck %s
+
+// Test new aarch64 intrinsics and types
+
+#include <arm_neon.h>
+
+int8x16_t test_vcombine_s8(int8x8_t low, int8x8_t high) {
+  // CHECK-LABEL: test_vcombine_s8:
+  return vcombine_s8(low, high);
+  // CHECK: ins	v0.d[1], v1.d[0]
+}
+
+int16x8_t test_vcombine_s16(int16x4_t low, int16x4_t high) {
+  // CHECK-LABEL: test_vcombine_s16:
+  return vcombine_s16(low, high);
+  // CHECK: ins	v0.d[1], v1.d[0]
+}
+
+int32x4_t test_vcombine_s32(int32x2_t low, int32x2_t high) {
+  // CHECK-LABEL: test_vcombine_s32:
+  return vcombine_s32(low, high);
+  // CHECK: ins	v0.d[1], v1.d[0]
+}
+
+int64x2_t test_vcombine_s64(int64x1_t low, int64x1_t high) {
+  // CHECK-LABEL: test_vcombine_s64:
+  return vcombine_s64(low, high);
+  // CHECK: ins	v0.d[1], v1.d[0]
+}
+
+uint8x16_t test_vcombine_u8(uint8x8_t low, uint8x8_t high) {
+  // CHECK-LABEL: test_vcombine_u8:
+  return vcombine_u8(low, high);
+  // CHECK: ins	v0.d[1], v1.d[0]
+}
+
+uint16x8_t test_vcombine_u16(uint16x4_t low, uint16x4_t high) {
+  // CHECK-LABEL: test_vcombine_u16:
+  return vcombine_u16(low, high);
+  // CHECK: ins	v0.d[1], v1.d[0]
+}
+
+uint32x4_t test_vcombine_u32(uint32x2_t low, uint32x2_t high) {
+  // CHECK-LABEL: test_vcombine_u32:
+  return vcombine_u32(low, high);
+  // CHECK: ins	v0.d[1], v1.d[0]
+}
+
+uint64x2_t test_vcombine_u64(uint64x1_t low, uint64x1_t high) {
+  // CHECK-LABEL: test_vcombine_u64:
+  return vcombine_u64(low, high);
+  // CHECK: ins	v0.d[1], v1.d[0]
+}
+
+poly64x2_t test_vcombine_p64(poly64x1_t low, poly64x1_t high) {
+  // CHECK-LABEL: test_vcombine_p64:
+  return vcombine_p64(low, high);
+  // CHECK: ins	v0.d[1], v1.d[0]
+}
+
+float16x8_t test_vcombine_f16(float16x4_t low, float16x4_t high) {
+  // CHECK-LABEL: test_vcombine_f16:
+  return vcombine_f16(low, high);
+  // CHECK: ins	v0.d[1], v1.d[0]
+}
+
+float32x4_t test_vcombine_f32(float32x2_t low, float32x2_t high) {
+  // CHECK-LABEL: test_vcombine_f32:
+  return vcombine_f32(low, high);
+  // CHECK: ins	v0.d[1], v1.d[0]
+}
+
+poly8x16_t test_vcombine_p8(poly8x8_t low, poly8x8_t high) {
+  // CHECK-LABEL: test_vcombine_p8:
+  return vcombine_p8(low, high);
+  // CHECK: ins	v0.d[1], v1.d[0]
+}
+
+poly16x8_t test_vcombine_p16(poly16x4_t low, poly16x4_t high) {
+  // CHECK-LABEL: test_vcombine_p16:
+  return vcombine_p16(low, high);
+  // CHECK: ins	v0.d[1], v1.d[0]
+}
+
+float64x2_t test_vcombine_f64(float64x1_t low, float64x1_t high) {
+  // CHECK-LABEL: test_vcombine_f64:
+  return vcombine_f64(low, high);
+  // CHECK: ins	v0.d[1], v1.d[0]
+}
diff --git a/test/CodeGen/aarch64-neon-vget-hilo.c b/test/CodeGen/aarch64-neon-vget-hilo.c
new file mode 100644
index 0000000..012b0bb
--- /dev/null
+++ b/test/CodeGen/aarch64-neon-vget-hilo.c
@@ -0,0 +1,176 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
+// RUN:   -ffp-contract=fast -S -O3 -o - %s | FileCheck %s
+
+// Test new aarch64 intrinsics and types
+
+#include <arm_neon.h>
+
+int8x8_t test_vget_high_s8(int8x16_t a) {
+  // CHECK-LABEL: test_vget_high_s8:
+  return vget_high_s8(a);
+  // CHECK: dup d0, {{v[0-9]+}}.d[1]
+}
+
+int16x4_t test_vget_high_s16(int16x8_t a) {
+  // CHECK-LABEL: test_vget_high_s16:
+  return vget_high_s16(a);
+  // CHECK: dup d0, {{v[0-9]+}}.d[1]
+}
+
+int32x2_t test_vget_high_s32(int32x4_t a) {
+  // CHECK-LABEL: test_vget_high_s32:
+  return vget_high_s32(a);
+  // CHECK: dup d0, {{v[0-9]+}}.d[1]
+}
+
+int64x1_t test_vget_high_s64(int64x2_t a) {
+  // CHECK-LABEL: test_vget_high_s64:
+  return vget_high_s64(a);
+  // CHECK: dup d0, {{v[0-9]+}}.d[1]
+}
+
+uint8x8_t test_vget_high_u8(uint8x16_t a) {
+  // CHECK-LABEL: test_vget_high_u8:
+  return vget_high_u8(a);
+  // CHECK: dup d0, {{v[0-9]+}}.d[1]
+}
+
+uint16x4_t test_vget_high_u16(uint16x8_t a) {
+  // CHECK-LABEL: test_vget_high_u16:
+  return vget_high_u16(a);
+  // CHECK: dup d0, {{v[0-9]+}}.d[1]
+}
+
+uint32x2_t test_vget_high_u32(uint32x4_t a) {
+  // CHECK-LABEL: test_vget_high_u32:
+  return vget_high_u32(a);
+  // CHECK: dup d0, {{v[0-9]+}}.d[1]
+}
+
+uint64x1_t test_vget_high_u64(uint64x2_t a) {
+  // CHECK-LABEL: test_vget_high_u64:
+  return vget_high_u64(a);
+  // CHECK: dup d0, {{v[0-9]+}}.d[1]
+}
+
+poly64x1_t test_vget_high_p64(poly64x2_t a) {
+  // CHECK-LABEL: test_vget_high_p64:
+  return vget_high_p64(a);
+  // CHECK: dup d0, {{v[0-9]+}}.d[1]
+}
+
+float16x4_t test_vget_high_f16(float16x8_t a) {
+  // CHECK-LABEL: test_vget_high_f16:
+  return vget_high_f16(a);
+  // CHECK: dup d0, {{v[0-9]+}}.d[1]
+}
+
+float32x2_t test_vget_high_f32(float32x4_t a) {
+  // CHECK-LABEL: test_vget_high_f32:
+  return vget_high_f32(a);
+  // CHECK: dup d0, {{v[0-9]+}}.d[1]
+}
+
+poly8x8_t test_vget_high_p8(poly8x16_t a) {
+  // CHECK-LABEL: test_vget_high_p8:
+  return vget_high_p8(a);
+  // CHECK: dup d0, {{v[0-9]+}}.d[1]
+}
+
+poly16x4_t test_vget_high_p16(poly16x8_t a) {
+  // CHECK-LABEL: test_vget_high_p16
+  return vget_high_p16(a);
+  // CHECK: dup d0, {{v[0-9]+}}.d[1]
+}
+
+float64x1_t test_vget_high_f64(float64x2_t a) {
+  // CHECK-LABEL: test_vget_high_f64
+  return vget_high_f64(a);
+  // CHECK: dup d0, {{v[0-9]+}}.d[1]
+}
+
+int8x8_t test_vget_low_s8(int8x16_t a) {
+  // CHECK-LABEL: test_vget_low_s8:
+  return vget_low_s8(a);
+  // CHECK-NEXT: ret
+}
+
+int16x4_t test_vget_low_s16(int16x8_t a) {
+  // CHECK-LABEL: test_vget_low_s16:
+  return vget_low_s16(a);
+  // CHECK-NEXT: ret
+}
+
+int32x2_t test_vget_low_s32(int32x4_t a) {
+  // CHECK-LABEL: test_vget_low_s32:
+  return vget_low_s32(a);
+  // CHECK-NEXT: ret
+}
+
+int64x1_t test_vget_low_s64(int64x2_t a) {
+  // CHECK-LABEL: test_vget_low_s64:
+  return vget_low_s64(a);
+  // CHECK-NEXT: ret
+}
+
+uint8x8_t test_vget_low_u8(uint8x16_t a) {
+  // CHECK-LABEL: test_vget_low_u8:
+  return vget_low_u8(a);
+  // CHECK-NEXT: ret
+}
+
+uint16x4_t test_vget_low_u16(uint16x8_t a) {
+  // CHECK-LABEL: test_vget_low_u16:
+  return vget_low_u16(a);
+  // CHECK-NEXT: ret
+}
+
+uint32x2_t test_vget_low_u32(uint32x4_t a) {
+  // CHECK-LABEL: test_vget_low_u32:
+  return vget_low_u32(a);
+  // CHECK-NEXT: ret
+}
+
+uint64x1_t test_vget_low_u64(uint64x2_t a) {
+  // CHECK-LABEL: test_vget_low_u64:
+  return vget_low_u64(a);
+  // CHECK-NEXT: ret
+}
+
+poly64x1_t test_vget_low_p64(poly64x2_t a) {
+  // CHECK-LABEL: test_vget_low_p64:
+  return vget_low_p64(a);
+  // CHECK-NEXT: ret
+}
+
+float16x4_t test_vget_low_f16(float16x8_t a) {
+  // CHECK-LABEL: test_vget_low_f16:
+  return vget_low_f16(a);
+  // CHECK-NEXT: ret
+}
+
+float32x2_t test_vget_low_f32(float32x4_t a) {
+  // CHECK-LABEL: test_vget_low_f32:
+  return vget_low_f32(a);
+  // CHECK-NEXT: ret
+}
+
+poly8x8_t test_vget_low_p8(poly8x16_t a) {
+  // CHECK-LABEL: test_vget_low_p8:
+  return vget_low_p8(a);
+  // CHECK-NEXT: ret
+}
+
+poly16x4_t test_vget_low_p16(poly16x8_t a) {
+  // CHECK-LABEL: test_vget_low_p16:
+  return vget_low_p16(a);
+  // CHECK-NEXT: ret
+}
+
+float64x1_t test_vget_low_f64(float64x2_t a) {
+  // CHECK-LABEL: test_vget_low_f64:
+  return vget_low_f64(a);
+  // CHECK-NEXT: ret
+}
+
diff --git a/test/CodeGen/aarch64-poly64.c b/test/CodeGen/aarch64-poly64.c
new file mode 100644
index 0000000..3e19501
--- /dev/null
+++ b/test/CodeGen/aarch64-poly64.c
@@ -0,0 +1,283 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
+// RUN:   -ffp-contract=fast -S -O3 -o - %s | FileCheck %s
+
+// Test new aarch64 intrinsics with poly64
+
+#include <arm_neon.h>
+
+uint64x1_t test_vceq_p64(poly64x1_t a, poly64x1_t b) {
+  // CHECK: test_vceq_p64
+  return vceq_p64(a, b);
+  // CHECK: cmeq {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint64x2_t test_vceqq_p64(poly64x2_t a, poly64x2_t b) {
+  // CHECK: test_vceqq_p64
+  return vceqq_p64(a, b);
+  // CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint64x1_t test_vtst_p64(poly64x1_t a, poly64x1_t b) {
+  // CHECK: test_vtst_p64
+  return vtst_p64(a, b);
+  // CHECK: cmtst {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint64x2_t test_vtstq_p64(poly64x2_t a, poly64x2_t b) {
+  // CHECK: test_vtstq_p64
+  return vtstq_p64(a, b);
+  // CHECK: cmtst {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+poly64x1_t test_vbsl_p64(poly64x1_t a, poly64x1_t b, poly64x1_t c) {
+  // CHECK: test_vbsl_p64
+  return vbsl_p64(a, b, c);
+  // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+poly64x2_t test_vbslq_p64(poly64x2_t a, poly64x2_t b, poly64x2_t c) {
+  // CHECK: test_vbslq_p64
+  return vbslq_p64(a, b, c);
+  // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+poly64_t test_vget_lane_p64(poly64x1_t v) {
+  // CHECK: test_vget_lane_p64
+  return vget_lane_p64(v, 0);
+  // CHECK: fmov  {{x[0-9]+}}, {{d[0-9]+}}
+}
+
+poly64_t test_vgetq_lane_p64(poly64x2_t v) {
+  // CHECK: test_vgetq_lane_p64
+  return vgetq_lane_p64(v, 1);
+  // CHECK: umov  {{x[0-9]+}}, {{v[0-9]+}}.d[1]
+}
+
+poly64x1_t test_vset_lane_p64(poly64_t a, poly64x1_t v) {
+  // CHECK: test_vset_lane_p64
+  return vset_lane_p64(a, v, 0);
+  // CHECK: fmov  {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+poly64x2_t test_vsetq_lane_p64(poly64_t a, poly64x2_t v) {
+  // CHECK: test_vsetq_lane_p64
+  return vsetq_lane_p64(a, v, 1);
+  // CHECK: ins  {{v[0-9]+}}.d[1], {{x[0-9]+}}
+}
+
+poly64x1_t test_vcopy_lane_p64(poly64x1_t a, poly64x1_t b) {
+  // CHECK: test_vcopy_lane_p64
+  return vcopy_lane_p64(a, 0, b, 0);
+  // CHECK: fmov  {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+poly64x2_t test_vcopyq_lane_p64(poly64x2_t a, poly64x1_t b) {
+  // CHECK: test_vcopyq_lane_p64
+  return vcopyq_lane_p64(a, 1, b, 0);
+  // CHECK: ins  {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+poly64x2_t test_vcopyq_laneq_p64(poly64x2_t a, poly64x2_t b) {
+  // CHECK: test_vcopyq_laneq_p64
+  return vcopyq_laneq_p64(a, 1, b, 1);
+  // CHECK: ins  {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[1]
+}
+
+poly64x1_t test_vcreate_p64(uint64_t a) {
+  // CHECK: test_vcreate_p64
+  return vcreate_p64(a);
+  // CHECK: fmov  {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+poly64x1_t test_vdup_n_p64(poly64_t a) {
+  // CHECK: test_vdup_n_p64
+  return vdup_n_p64(a);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+poly64x2_t test_vdupq_n_p64(poly64_t a) {
+  // CHECK: test_vdup_n_p64
+  return vdupq_n_p64(a);
+  // CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}}
+}
+
+poly64x1_t test_vdup_lane_p64(poly64x1_t vec) {
+  // CHECK: test_vdup_lane_p64
+  return vdup_lane_p64(vec, 0);
+  // CHECK: ret
+}
+
+poly64x2_t test_vdupq_lane_p64(poly64x1_t vec) {
+  // CHECK: test_vdupq_lane_p64
+  return vdupq_lane_p64(vec, 0);
+  // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+}
+
+poly64x2_t test_vdupq_laneq_p64(poly64x2_t vec) {
+  // CHECK: test_vdupq_laneq_p64
+  return vdupq_laneq_p64(vec, 1);
+  // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
+}
+
+poly64x2_t test_vcombine_p64(poly64x1_t low, poly64x1_t high) {
+  // CHECK: test_vcombine_p64
+  return vcombine_p64(low, high);
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+poly64x1_t test_vld1_p64(poly64_t const * ptr) {
+  // CHECK: test_vld1_p64
+  return vld1_p64(ptr);
+  // CHECK:  ld1 {{{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+poly64x2_t test_vld1q_p64(poly64_t const * ptr) {
+  // CHECK: test_vld1q_p64
+  return vld1q_p64(ptr);
+  // CHECK:  ld1 {{{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_p64(poly64_t * ptr, poly64x1_t val) {
+  // CHECK: test_vst1_p64
+  return vst1_p64(ptr, val);
+  // CHECK:  st1 {{{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_p64(poly64_t * ptr, poly64x2_t val) {
+  // CHECK: test_vst1q_p64
+  return vst1q_p64(ptr, val);
+  // CHECK:  st1 {{{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+poly64x1x2_t test_vld2_p64(poly64_t const * ptr) {
+  // CHECK: test_vld2_p64
+  return vld2_p64(ptr);
+  // CHECK: ld1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+poly64x2x2_t test_vld2q_p64(poly64_t const * ptr) {
+  // CHECK: test_vld2q_p64
+  return vld2q_p64(ptr);
+  // CHECK: ld2 {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+poly64x1x3_t test_vld3_p64(poly64_t const * ptr) {
+  // CHECK: test_vld3_p64
+  return vld3_p64(ptr);
+  // CHECK: ld1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+poly64x2x3_t test_vld3q_p64(poly64_t const * ptr) {
+  // CHECK: test_vld3q_p64
+  return vld3q_p64(ptr);
+  // CHECK: ld3 {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+poly64x1x4_t test_vld4_p64(poly64_t const * ptr) {
+  // CHECK: test_vld4_p64
+  return vld4_p64(ptr);
+  // CHECK: ld1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+poly64x2x4_t test_vld4q_p64(poly64_t const * ptr) {
+  // CHECK: test_vld4q_p64
+  return vld4q_p64(ptr);
+  // CHECK: ld4 {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_p64(poly64_t * ptr, poly64x1x2_t val) {
+  // CHECK: test_vst2_p64
+  return vst2_p64(ptr, val);
+  // CHECK:  st1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_p64(poly64_t * ptr, poly64x2x2_t val) {
+  // CHECK: test_vst2q_p64
+  return vst2q_p64(ptr, val);
+  // CHECK:  st2 {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_p64(poly64_t * ptr, poly64x1x3_t val) {
+  // CHECK: test_vst3_p64
+  return vst3_p64(ptr, val);
+  // CHECK:  st1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_p64(poly64_t * ptr, poly64x2x3_t val) {
+  // CHECK: test_vst3q_p64
+  return vst3q_p64(ptr, val);
+  // CHECK:  st3 {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_p64(poly64_t * ptr, poly64x1x4_t val) {
+  // CHECK: test_vst4_p64
+  return vst4_p64(ptr, val);
+  // CHECK:  st1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_p64(poly64_t * ptr, poly64x2x4_t val) {
+  // CHECK: test_vst4q_p64
+  return vst4q_p64(ptr, val);
+  // CHECK:  st4 {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+poly64x1_t test_vext_p64(poly64x1_t a, poly64x1_t b) {
+  // CHECK: test_vext_p64
+  return vext_u64(a, b, 0);
+
+}
+
+poly64x2_t test_vextq_p64(poly64x2_t a, poly64x2_t b) {
+  // CHECK: test_vextq_p64
+  return vextq_p64(a, b, 1);
+  // CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x8
+}
+
+poly64x2_t test_vzip1q_p64(poly64x2_t a, poly64x2_t b) {
+  // CHECK: test_vzip1q_p64
+  return vzip1q_p64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+poly64x2_t test_vzip2q_p64(poly64x2_t a, poly64x2_t b) {
+  // CHECK: test_vzip2q_p64
+  return vzip2q_u64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+}
+
+poly64x2_t test_vuzp1q_p64(poly64x2_t a, poly64x2_t b) {
+  // CHECK: test_vuzp1q_p64
+  return vuzp1q_p64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+poly64x2_t test_vuzp2q_p64(poly64x2_t a, poly64x2_t b) {
+  // CHECK: test_vuzp2q_p64
+  return vuzp2q_u64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+}
+
+poly64x2_t test_vtrn1q_p64(poly64x2_t a, poly64x2_t b) {
+  // CHECK: test_vtrn1q_p64
+  return vtrn1q_p64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+poly64x2_t test_vtrn2q_p64(poly64x2_t a, poly64x2_t b) {
+  // CHECK: test_vtrn2q_p64
+  return vtrn2q_u64(a, b);
+  // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+}
+
+poly64x1_t test_vsri_n_p64(poly64x1_t a, poly64x1_t b) {
+  // CHECK: test_vsri_n_p64
+  return vsri_n_p64(a, b, 33);
+  // CHECK: sri {{d[0-9]+}}, {{d[0-9]+}}, #33
+}
+
+poly64x2_t test_vsriq_n_p64(poly64x2_t a, poly64x2_t b) {
+  // CHECK: test_vsriq_n_p64
+  return vsriq_n_p64(a, b, 64);
+  // CHECK: sri {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #64
+}
+
diff --git a/test/CodeGen/aarch64-varargs.c b/test/CodeGen/aarch64-varargs.c
index 324a070..3d9cd86 100644
--- a/test/CodeGen/aarch64-varargs.c
+++ b/test/CodeGen/aarch64-varargs.c
@@ -7,7 +7,7 @@
 va_list the_list;
 
 int simple_int(void) {
-// CHECK: define i32 @simple_int
+// CHECK-LABEL: define i32 @simple_int
   return va_arg(the_list, int);
 // CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 3)
 // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0
@@ -39,7 +39,7 @@ int simple_int(void) {
 }
 
 __int128 aligned_int(void) {
-// CHECK: define i128 @aligned_int
+// CHECK-LABEL: define i128 @aligned_int
   return va_arg(the_list, __int128);
 // CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 3)
 // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0
@@ -81,7 +81,7 @@ struct bigstruct {
 };
 
 struct bigstruct simple_indirect(void) {
-// CHECK: define void @simple_indirect
+// CHECK-LABEL: define void @simple_indirect
   return va_arg(the_list, struct bigstruct);
 // CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 3)
 // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0
@@ -119,7 +119,7 @@ struct aligned_bigstruct {
 };
 
 struct aligned_bigstruct simple_aligned_indirect(void) {
-// CHECK: define void @simple_aligned_indirect
+// CHECK-LABEL: define void @simple_aligned_indirect
   return va_arg(the_list, struct aligned_bigstruct);
 // CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 3)
 // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0
@@ -150,7 +150,7 @@ struct aligned_bigstruct simple_aligned_indirect(void) {
 }
 
 double simple_double(void) {
-// CHECK: define double @simple_double
+// CHECK-LABEL: define double @simple_double
   return va_arg(the_list, double);
 // CHECK: [[VR_OFFS:%[a-z_0-9]+]] = load i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 4)
 // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[VR_OFFS]], 0
@@ -186,7 +186,7 @@ struct hfa {
 };
 
 struct hfa simple_hfa(void) {
-// CHECK: define %struct.hfa @simple_hfa
+// CHECK-LABEL: define %struct.hfa @simple_hfa
   return va_arg(the_list, struct hfa);
 // CHECK: [[VR_OFFS:%[a-z_0-9]+]] = load i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 4)
 // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[VR_OFFS]], 0
@@ -226,7 +226,7 @@ struct hfa simple_hfa(void) {
 }
 
 void check_start(int n, ...) {
-// CHECK: define void @check_start(i32 %n, ...)
+// CHECK-LABEL: define void @check_start(i32 %n, ...)
 
   va_list the_list;
   va_start(the_list, n);
diff --git a/test/CodeGen/address-space.c b/test/CodeGen/address-space.c
index 9de0670..3e865fd 100644
--- a/test/CodeGen/address-space.c
+++ b/test/CodeGen/address-space.c
@@ -9,11 +9,11 @@ int foo __attribute__((address_space(1)));
 // CHECK: @ban = common addrspace(1) global
 int ban[10] __attribute__((address_space(1)));
 
-// CHECK: define i32 @test1() 
+// CHECK-LABEL: define i32 @test1() 
 // CHECK: load i32 addrspace(1)* @foo
 int test1() { return foo; }
 
-// CHECK: define i32 @test2(i32 %i) 
+// CHECK-LABEL: define i32 @test2(i32 %i) 
 // CHECK: load i32 addrspace(1)*
 // CHECK-NEXT: ret i32
 int test2(int i) { return ban[i]; }
@@ -21,7 +21,7 @@ int test2(int i) { return ban[i]; }
 // Both A and B point into addrspace(2).
 __attribute__((address_space(2))) int *A, *B;
 
-// CHECK: define void @test3()
+// CHECK-LABEL: define void @test3()
 // CHECK: load i32 addrspace(2)** @B
 // CHECK: load i32 addrspace(2)* 
 // CHECK: load i32 addrspace(2)** @A
@@ -35,7 +35,7 @@ typedef struct {
   float aData[1];
 } MyStruct;
 
-// CHECK: define void @test4(
+// CHECK-LABEL: define void @test4(
 // CHECK: call void @llvm.memcpy.p0i8.p2i8
 // CHECK: call void @llvm.memcpy.p2i8.p0i8
 void test4(MyStruct __attribute__((address_space(2))) *pPtr) {
diff --git a/test/CodeGen/alias.c b/test/CodeGen/alias.c
index a8380a3..efa94b3 100644
--- a/test/CodeGen/alias.c
+++ b/test/CodeGen/alias.c
@@ -8,18 +8,18 @@ static int bar1 = 42;
 
 extern int g1;
 extern int g1 __attribute((alias("g0")));
-// CHECKBASIC: @g1 = alias i32* @g0
+// CHECKBASIC-DAG: @g1 = alias i32* @g0
 
 void f0(void) { }
 extern void f1(void);
 extern void f1(void) __attribute((alias("f0")));
-// CHECKBASIC: @f1 = alias void ()* @f0
+// CHECKBASIC-DAG: @f1 = alias void ()* @f0
 // CHECKBASIC: define void @f0() [[NUW:#[0-9]+]] {
 
 // Make sure that aliases cause referenced values to be emitted.
 // PR3200
 static inline int foo1() { return 0; }
-// CHECKBASIC: define internal i32 @foo1()
+// CHECKBASIC-LABEL: define internal i32 @foo1()
 int foo() __attribute__((alias("foo1")));
 int bar() __attribute__((alias("bar1")));
 
diff --git a/test/CodeGen/align-param.c b/test/CodeGen/align-param.c
index 8907f66..78e57b5 100644
--- a/test/CodeGen/align-param.c
+++ b/test/CodeGen/align-param.c
@@ -5,7 +5,7 @@
 int test (long long x) {
   return (int)x;
 }
-// CHECK: define i32 @test
+// CHECK-LABEL: define i32 @test
 // CHECK: alloca i64, align 8
 
 
@@ -14,5 +14,5 @@ struct X { int x,y,z,a; };
 int test2(struct X x __attribute((aligned(16)))) {
   return x.z;
 }
-// CHECK: define i32 @test2
+// CHECK-LABEL: define i32 @test2
 // CHECK: alloca %struct.X, align 16
diff --git a/test/CodeGen/align-x68_64.c b/test/CodeGen/align-x68_64.c
new file mode 100644
index 0000000..cf128b4
--- /dev/null
+++ b/test/CodeGen/align-x68_64.c
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// PR5599
+
+void test1_f(void *);
+
+void test1_g(void) {
+  float x[4];
+  test1_f(x);
+}
+// CHECK: @test1_g
+// CHECK: alloca [4 x float], align 16
diff --git a/test/CodeGen/alignment.c b/test/CodeGen/alignment.c
index 98ea01b..04d6aac 100644
--- a/test/CodeGen/alignment.c
+++ b/test/CodeGen/alignment.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -emit-llvm %s -o - | FileCheck %s
 
 __attribute((aligned(16))) float a[128];
 union {int a[4]; __attribute((aligned(16))) float b[4];} b;
@@ -6,7 +6,8 @@ union {int a[4]; __attribute((aligned(16))) float b[4];} b;
 // CHECK: @a = {{.*}}zeroinitializer, align 16
 // CHECK: @b = {{.*}}zeroinitializer, align 16
 
-
+long long int test5[1024];
+// CHECK-DAG: @test5 = common global [1024 x i64] zeroinitializer, align 8
 
 // PR5279 - Reduced alignment on typedef.
 typedef int myint __attribute__((aligned(1)));
diff --git a/test/CodeGen/annotations-var.c b/test/CodeGen/annotations-var.c
index b8ada9f..da9e0b6 100644
--- a/test/CodeGen/annotations-var.c
+++ b/test/CodeGen/annotations-var.c
@@ -31,7 +31,7 @@ int foo(int v __attribute__((annotate("param_ann_0"))) __attribute__((annotate("
 
 void local(void) {
     int localvar __attribute__((annotate("localvar_ann_0"))) __attribute__((annotate("localvar_ann_1"))) = 3;
-// LOCAL: define void @local()
+// LOCAL-LABEL: define void @local()
 // LOCAL:      [[LOCALVAR:%.*]] = alloca i32,
 // LOCAL-NEXT: [[T0:%.*]] = bitcast i32* [[LOCALVAR]] to i8*
 // LOCAL-NEXT: call void @llvm.var.annotation(i8* [[T0]], i8* getelementptr inbounds ([15 x i8]* @{{.*}}), i8* getelementptr inbounds ({{.*}}), i32 33)
@@ -41,7 +41,7 @@ void local(void) {
 
 void undef(void) {
     int undefvar __attribute__((annotate("undefvar_ann_0")));
-// UNDEF: define void @undef()
+// UNDEF-LABEL: define void @undef()
 // UNDEF:      [[UNDEFVAR:%.*]] = alloca i32,
 // UNDEF-NEXT: [[T0:%.*]] = bitcast i32* [[UNDEFVAR]] to i8*
 // UNDEF-NEXT: call void @llvm.var.annotation(i8* [[T0]], i8* getelementptr inbounds ([15 x i8]* @{{.*}}), i8* getelementptr inbounds ({{.*}}), i32 43)
diff --git a/test/CodeGen/arm-aapcs-vfp.c b/test/CodeGen/arm-aapcs-vfp.c
index 7210229..0e102f3 100644
--- a/test/CodeGen/arm-aapcs-vfp.c
+++ b/test/CodeGen/arm-aapcs-vfp.c
@@ -95,6 +95,6 @@ void test_neon(struct neon_struct arg) {
   neon_callee(arg);
 }
 
-// CHECK: define arm_aapcs_vfpcc void @f33(%struct.s33* byval %s)
+// CHECK-LABEL: define arm_aapcs_vfpcc void @f33(%struct.s33* byval %s)
 struct s33 { char buf[32*32]; };
 void f33(struct s33 s) { }
diff --git a/test/CodeGen/arm-arguments.c b/test/CodeGen/arm-arguments.c
index 63ecd4c..b6bac9a 100644
--- a/test/CodeGen/arm-arguments.c
+++ b/test/CodeGen/arm-arguments.c
@@ -1,78 +1,78 @@
 // REQUIRES: arm-registered-target
-// RUN: %clang_cc1 -triple armv7-apple-darwin9 -target-abi apcs-gnu -emit-llvm -w -o - %s | FileCheck -check-prefix=APCS-GNU %s
-// RUN: %clang_cc1 -triple armv7-apple-darwin9 -target-abi aapcs -emit-llvm -w -o - %s | FileCheck -check-prefix=AAPCS %s
+// RUN: %clang_cc1 -triple armv7-apple-darwin9 -target-feature +neon -target-abi apcs-gnu -emit-llvm -w -o - %s | FileCheck -check-prefix=APCS-GNU %s
+// RUN: %clang_cc1 -triple armv7-apple-darwin9 -target-feature +neon -target-abi aapcs -emit-llvm -w -o - %s | FileCheck -check-prefix=AAPCS %s
 
-// APCS-GNU: define signext i8 @f0()
-// AAPCS: define arm_aapcscc signext i8 @f0()
+// APCS-GNU-LABEL: define signext i8 @f0()
+// AAPCS-LABEL: define arm_aapcscc signext i8 @f0()
 char f0(void) {
   return 0;
 }
 
-// APCS-GNU: define i8 @f1()
-// AAPCS: define arm_aapcscc i8 @f1()
+// APCS-GNU-LABEL: define i8 @f1()
+// AAPCS-LABEL: define arm_aapcscc i8 @f1()
 struct s1 { char f0; };
 struct s1 f1(void) {}
 
-// APCS-GNU: define i16 @f2()
-// AAPCS: define arm_aapcscc i16 @f2()
+// APCS-GNU-LABEL: define i16 @f2()
+// AAPCS-LABEL: define arm_aapcscc i16 @f2()
 struct s2 { short f0; };
 struct s2 f2(void) {}
 
-// APCS-GNU: define i32 @f3()
-// AAPCS: define arm_aapcscc i32 @f3()
+// APCS-GNU-LABEL: define i32 @f3()
+// AAPCS-LABEL: define arm_aapcscc i32 @f3()
 struct s3 { int f0; };
 struct s3 f3(void) {}
 
-// APCS-GNU: define i32 @f4()
-// AAPCS: define arm_aapcscc i32 @f4()
+// APCS-GNU-LABEL: define i32 @f4()
+// AAPCS-LABEL: define arm_aapcscc i32 @f4()
 struct s4 { struct s4_0 { int f0; } f0; };
 struct s4 f4(void) {}
 
-// APCS-GNU: define void @f5(
+// APCS-GNU-LABEL: define void @f5(
 // APCS-GNU: struct.s5* noalias sret
-// AAPCS: define arm_aapcscc i32 @f5()
+// AAPCS-LABEL: define arm_aapcscc i32 @f5()
 struct s5 { struct { } f0; int f1; };
 struct s5 f5(void) {}
 
-// APCS-GNU: define void @f6(
+// APCS-GNU-LABEL: define void @f6(
 // APCS-GNU: struct.s6* noalias sret
-// AAPCS: define arm_aapcscc i32 @f6()
+// AAPCS-LABEL: define arm_aapcscc i32 @f6()
 struct s6 { int f0[1]; };
 struct s6 f6(void) {}
 
-// APCS-GNU: define void @f7()
-// AAPCS: define arm_aapcscc void @f7()
+// APCS-GNU-LABEL: define void @f7()
+// AAPCS-LABEL: define arm_aapcscc void @f7()
 struct s7 { struct { int : 0; } f0; };
 struct s7 f7(void) {}
 
-// APCS-GNU: define void @f8(
+// APCS-GNU-LABEL: define void @f8(
 // APCS-GNU: struct.s8* noalias sret
-// AAPCS: define arm_aapcscc void @f8()
+// AAPCS-LABEL: define arm_aapcscc void @f8()
 struct s8 { struct { int : 0; } f0[1]; };
 struct s8 f8(void) {}
 
-// APCS-GNU: define i32 @f9()
-// AAPCS: define arm_aapcscc i32 @f9()
+// APCS-GNU-LABEL: define i32 @f9()
+// AAPCS-LABEL: define arm_aapcscc i32 @f9()
 struct s9 { int f0; int : 0; };
 struct s9 f9(void) {}
 
-// APCS-GNU: define i32 @f10()
-// AAPCS: define arm_aapcscc i32 @f10()
+// APCS-GNU-LABEL: define i32 @f10()
+// AAPCS-LABEL: define arm_aapcscc i32 @f10()
 struct s10 { int f0; int : 0; int : 0; };
 struct s10 f10(void) {}
 
-// APCS-GNU: define void @f11(
+// APCS-GNU-LABEL: define void @f11(
 // APCS-GNU: struct.s11* noalias sret
-// AAPCS: define arm_aapcscc i32 @f11()
+// AAPCS-LABEL: define arm_aapcscc i32 @f11()
 struct s11 { int : 0; int f0; };
 struct s11 f11(void) {}
 
-// APCS-GNU: define i32 @f12()
-// AAPCS: define arm_aapcscc i32 @f12()
+// APCS-GNU-LABEL: define i32 @f12()
+// AAPCS-LABEL: define arm_aapcscc i32 @f12()
 union u12 { char f0; short f1; int f2; };
 union u12 f12(void) {}
 
-// APCS-GNU: define void @f13(
+// APCS-GNU-LABEL: define void @f13(
 // APCS-GNU: struct.s13* noalias sret
 
 // FIXME: This should return a float.
@@ -80,55 +80,55 @@ union u12 f12(void) {}
 struct s13 { float f0; };
 struct s13 f13(void) {}
 
-// APCS-GNU: define void @f14(
+// APCS-GNU-LABEL: define void @f14(
 // APCS-GNU: union.u14* noalias sret
-// AAPCS: define arm_aapcscc i32 @f14()
+// AAPCS-LABEL: define arm_aapcscc i32 @f14()
 union u14 { float f0; };
 union u14 f14(void) {}
 
-// APCS-GNU: define void @f15()
-// AAPCS: define arm_aapcscc void @f15()
+// APCS-GNU-LABEL: define void @f15()
+// AAPCS-LABEL: define arm_aapcscc void @f15()
 void f15(struct s7 a0) {}
 
-// APCS-GNU: define void @f16()
-// AAPCS: define arm_aapcscc void @f16()
+// APCS-GNU-LABEL: define void @f16()
+// AAPCS-LABEL: define arm_aapcscc void @f16()
 void f16(struct s8 a0) {}
 
-// APCS-GNU: define i32 @f17()
-// AAPCS: define arm_aapcscc i32 @f17()
+// APCS-GNU-LABEL: define i32 @f17()
+// AAPCS-LABEL: define arm_aapcscc i32 @f17()
 struct s17 { short f0 : 13; char f1 : 4; };
 struct s17 f17(void) {}
 
-// APCS-GNU: define i32 @f18()
-// AAPCS: define arm_aapcscc i32 @f18()
+// APCS-GNU-LABEL: define i32 @f18()
+// AAPCS-LABEL: define arm_aapcscc i32 @f18()
 struct s18 { short f0; char f1 : 4; };
 struct s18 f18(void) {}
 
-// APCS-GNU: define void @f19(
+// APCS-GNU-LABEL: define void @f19(
 // APCS-GNU: struct.s19* noalias sret
-// AAPCS: define arm_aapcscc i32 @f19()
+// AAPCS-LABEL: define arm_aapcscc i32 @f19()
 struct s19 { int f0; struct s8 f1; };
 struct s19 f19(void) {}
 
-// APCS-GNU: define void @f20(
+// APCS-GNU-LABEL: define void @f20(
 // APCS-GNU: struct.s20* noalias sret
-// AAPCS: define arm_aapcscc i32 @f20()
+// AAPCS-LABEL: define arm_aapcscc i32 @f20()
 struct s20 { struct s8 f1; int f0; };
 struct s20 f20(void) {}
 
-// APCS-GNU: define i8 @f21()
-// AAPCS: define arm_aapcscc i32 @f21()
+// APCS-GNU-LABEL: define i8 @f21()
+// AAPCS-LABEL: define arm_aapcscc i32 @f21()
 struct s21 { struct {} f1; int f0 : 4; };
 struct s21 f21(void) {}
 
-// APCS-GNU: define i16 @f22()
-// APCS-GNU: define i32 @f23()
-// APCS-GNU: define i64 @f24()
-// APCS-GNU: define i128 @f25()
-// APCS-GNU: define i64 @f26()
-// APCS-GNU: define i128 @f27()
-// AAPCS: define arm_aapcscc i16 @f22()
-// AAPCS: define arm_aapcscc i32 @f23()
+// APCS-GNU-LABEL: define i16 @f22()
+// APCS-GNU-LABEL: define i32 @f23()
+// APCS-GNU-LABEL: define i64 @f24()
+// APCS-GNU-LABEL: define i128 @f25()
+// APCS-GNU-LABEL: define i64 @f26()
+// APCS-GNU-LABEL: define i128 @f27()
+// AAPCS-LABEL: define arm_aapcscc i16 @f22()
+// AAPCS-LABEL: define arm_aapcscc i32 @f23()
 // AAPCS: define arm_aapcscc void @f24({{.*}} noalias sret
 // AAPCS: define arm_aapcscc void @f25({{.*}} noalias sret
 // AAPCS: define arm_aapcscc void @f26({{.*}} noalias sret
@@ -140,13 +140,13 @@ _Complex long long  f25(void) {}
 _Complex float      f26(void) {}
 _Complex double     f27(void) {}
 
-// APCS-GNU: define i16 @f28()
-// AAPCS: define arm_aapcscc i16 @f28()
+// APCS-GNU-LABEL: define i16 @f28()
+// AAPCS-LABEL: define arm_aapcscc i16 @f28()
 struct s28 { _Complex char f0; };
 struct s28 f28() {}
 
-// APCS-GNU: define i32 @f29()
-// AAPCS: define arm_aapcscc i32 @f29()
+// APCS-GNU-LABEL: define i32 @f29()
+// AAPCS-LABEL: define arm_aapcscc i32 @f29()
 struct s29 { _Complex short f0; };
 struct s29 f29() {}
 
@@ -176,8 +176,8 @@ void f32(struct s32 s) { }
 // PR13350
 struct s33 { char buf[32*32]; };
 void f33(struct s33 s) { }
-// APCS-GNU: define void @f33(%struct.s33* byval %s)
-// AAPCS: define arm_aapcscc void @f33(%struct.s33* byval %s)
+// APCS-GNU-LABEL: define void @f33(%struct.s33* byval %s)
+// AAPCS-LABEL: define arm_aapcscc void @f33(%struct.s33* byval %s)
 
 // PR14048
 struct s34 { char c; };
@@ -209,14 +209,14 @@ float32x4_t f35(int i, s35_with_align s1, s35_with_align s2) {
                             *(float32x4_t *)&s2);
   return v;
 }
-// APCS-GNU: define <4 x float> @f35(i32 %i, %struct.s35* byval, %struct.s35* byval)
+// APCS-GNU-LABEL: define <4 x float> @f35(i32 %i, %struct.s35* byval, %struct.s35* byval)
 // APCS-GNU: %[[a:.*]] = alloca %struct.s35, align 16
 // APCS-GNU: %[[b:.*]] = bitcast %struct.s35* %[[a]] to i8*
 // APCS-GNU: %[[c:.*]] = bitcast %struct.s35* %0 to i8*
 // APCS-GNU: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[b]], i8* %[[c]]
 // APCS-GNU: %[[d:.*]] = bitcast %struct.s35* %[[a]] to <4 x float>*
 // APCS-GNU: load <4 x float>* %[[d]], align 16
-// AAPCS: define arm_aapcscc <4 x float> @f35(i32 %i, %struct.s35* byval, %struct.s35* byval)
+// AAPCS-LABEL: define arm_aapcscc <4 x float> @f35(i32 %i, %struct.s35* byval, %struct.s35* byval)
 // AAPCS: %[[a:.*]] = alloca %struct.s35, align 16
 // AAPCS: %[[b:.*]] = bitcast %struct.s35* %[[a]] to i8*
 // AAPCS: %[[c:.*]] = bitcast %struct.s35* %0 to i8*
diff --git a/test/CodeGen/arm-asm-diag.c b/test/CodeGen/arm-asm-diag.c
index eea7920..944a271 100644
--- a/test/CodeGen/arm-asm-diag.c
+++ b/test/CodeGen/arm-asm-diag.c
@@ -1,5 +1,5 @@
 // REQUIRES: arm-registered-target
-// RUN: %clang_cc1 -triple armv7 %s -S -o /dev/null 2>&1 | FileCheck %s
+// RUN: not %clang_cc1 -triple armv7 -target-feature +neon %s -S -o /dev/null 2>&1 | FileCheck %s
 
 // rdar://13446483
 typedef __attribute__((neon_vector_type(2))) long long int64x2_t;
@@ -9,10 +9,10 @@ typedef struct int64x2x4_t {
 int64x2x4_t t1(const long long a[]) {
   int64x2x4_t r;
   __asm__("vldm %[a], { %q[r0], %q[r1], %q[r2], %q[r3] }"
-          : [r0] "=r"(r.val[0]), // expected-warning {{the value is truncated when put into register, use a modifier to specify the size}}
-            [r1] "=r"(r.val[1]), // expected-warning {{the value is truncated when put into register, use a modifier to specify the size}}
-            [r2] "=r"(r.val[2]), // expected-warning {{the value is truncated when put into register, use a modifier to specify the size}}
-            [r3] "=r"(r.val[3])  // expected-warning {{the value is truncated when put into register, use a modifier to specify the size}}
+          : [r0] "=r"(r.val[0]), // expected-warning {{value size does not match register size specified by the constraint and modifier}}
+            [r1] "=r"(r.val[1]), // expected-warning {{value size does not match register size specified by the constraint and modifier}}
+            [r2] "=r"(r.val[2]), // expected-warning {{value size does not match register size specified by the constraint and modifier}}
+            [r3] "=r"(r.val[3])  // expected-warning {{value size does not match register size specified by the constraint and modifier}}
           : [a] "r"(a));
   return r;
 }
diff --git a/test/CodeGen/arm-asm-warn.c b/test/CodeGen/arm-asm-warn.c
index 9b52dd6..a5807006 100644
--- a/test/CodeGen/arm-asm-warn.c
+++ b/test/CodeGen/arm-asm-warn.c
@@ -1,5 +1,5 @@
 // REQUIRES: arm-registered-target
-// RUN: %clang_cc1 -triple armv7 %s -emit-llvm -o /dev/null
+// RUN: %clang_cc1 -triple armv7 -target-feature +neon %s -emit-llvm -o /dev/null
 
 char bar();
 
@@ -12,6 +12,7 @@ void t1(int x, char y) {
                    : "+r" (x),
                      "+r" (y)
                    :);
+  __asm__ volatile("ldrb %0, [%1]" : "=r" (y) : "r" (x)); // no warning
 }
 
 // <rdar://problem/12284092>
@@ -22,10 +23,10 @@ typedef struct int64x2x4_t {
 int64x2x4_t t2(const long long a[]) {
   int64x2x4_t r;
   __asm__("vldm %[a], { %q[r0], %q[r1], %q[r2], %q[r3] }"
-          : [r0] "=r"(r.val[0]), // expected-warning {{the value is truncated when put into register, use a modifier to specify the size}}
-            [r1] "=r"(r.val[1]), // expected-warning {{the value is truncated when put into register, use a modifier to specify the size}}
-            [r2] "=r"(r.val[2]), // expected-warning {{the value is truncated when put into register, use a modifier to specify the size}}
-            [r3] "=r"(r.val[3])  // expected-warning {{the value is truncated when put into register, use a modifier to specify the size}}
+          : [r0] "=r"(r.val[0]), // expected-warning {{value size does not match register size specified by the constraint and modifier}}
+            [r1] "=r"(r.val[1]), // expected-warning {{value size does not match register size specified by the constraint and modifier}}
+            [r2] "=r"(r.val[2]), // expected-warning {{value size does not match register size specified by the constraint and modifier}}
+            [r3] "=r"(r.val[3])  // expected-warning {{value size does not match register size specified by the constraint and modifier}}
           : [a] "r"(a));
   return r;
 }
diff --git a/test/CodeGen/arm-cc.c b/test/CodeGen/arm-cc.c
index 80ebe68..8e6aae7 100644
--- a/test/CodeGen/arm-cc.c
+++ b/test/CodeGen/arm-cc.c
@@ -5,13 +5,13 @@
 // RUN: %clang_cc1 -triple arm-none-linux-gnueabi -target-abi aapcs  -emit-llvm -w -o - %s | FileCheck -check-prefix=LINUX-AAPCS %s
 
 
-// DARWIN-APCS: define void @f()
+// DARWIN-APCS-LABEL: define void @f()
 // DARWIN-APCS: call void @g
-// DARWIN-AAPCS: define arm_aapcscc void @f()
+// DARWIN-AAPCS-LABEL: define arm_aapcscc void @f()
 // DARWIN-AAPCS: call arm_aapcscc void @g
-// LINUX-APCS: define arm_apcscc void @f()
+// LINUX-APCS-LABEL: define arm_apcscc void @f()
 // LINUX-APCS: call arm_apcscc void @g
-// LINUX-AAPCS: define void @f()
+// LINUX-AAPCS-LABEL: define void @f()
 // LINUX-AAPCS: call void @g
 void g(void);
 void f(void) {
diff --git a/test/CodeGen/arm-clear.c b/test/CodeGen/arm-clear.c
index 51506df..8ef36756 100644
--- a/test/CodeGen/arm-clear.c
+++ b/test/CodeGen/arm-clear.c
@@ -1,21 +1,8 @@
 // REQUIRES: arm-registered-target
 // RUN: %clang_cc1 -triple armv7-apple-darwin9 -emit-llvm -w -o - %s | FileCheck %s
 
-void clear0(void *ptr) {
-  // CHECK: clear0
-  // CHECK-NOT: load i8**
-  __clear_cache();
-}
-
-void clear1(void *ptr) {
-  // CHECK: clear1
-  // CHECK: load i8**
-  // CHECK-NOT: load i8**
-  __clear_cache(ptr);
-}
-
-void clear2(void *ptr, void *ptr2) {
-  // CHECK: clear2
+void clear(void *ptr, void *ptr2) {
+  // CHECK: clear
   // CHECK: load i8**
   // CHECK: load i8**
   __clear_cache(ptr, ptr2);
diff --git a/test/CodeGen/arm-crc32.c b/test/CodeGen/arm-crc32.c
new file mode 100644
index 0000000..d49f20e
--- /dev/null
+++ b/test/CodeGen/arm-crc32.c
@@ -0,0 +1,63 @@
+// REQUIRES: arm-registered-target
+// RUN: %clang_cc1 -triple armv8-none-linux-gnueabi \
+// RUN:   -O3 -S -emit-llvm -o - %s | FileCheck %s
+
+int crc32b(int a, char b)
+{
+        return __builtin_arm_crc32b(a,b);
+// CHECK: [[T0:%[0-9]+]] = zext i8 %b to i32
+// CHECK: call i32 @llvm.arm.crc32b(i32 %a, i32 [[T0]])
+}
+
+int crc32cb(int a, char b)
+{
+        return __builtin_arm_crc32cb(a,b);
+// CHECK: [[T0:%[0-9]+]] = zext i8 %b to i32
+// CHECK: call i32 @llvm.arm.crc32cb(i32 %a, i32 [[T0]])
+}
+
+int crc32h(int a, short b)
+{
+        return __builtin_arm_crc32h(a,b);
+// CHECK: [[T0:%[0-9]+]] = zext i16 %b to i32
+// CHECK: call i32 @llvm.arm.crc32h(i32 %a, i32 [[T0]])
+}
+
+int crc32ch(int a, short b)
+{
+        return __builtin_arm_crc32ch(a,b);
+// CHECK: [[T0:%[0-9]+]] = zext i16 %b to i32
+// CHECK: call i32 @llvm.arm.crc32ch(i32 %a, i32 [[T0]])
+}
+
+int crc32w(int a, int b)
+{
+        return __builtin_arm_crc32w(a,b);
+// CHECK: call i32 @llvm.arm.crc32w(i32 %a, i32 %b)
+}
+
+int crc32cw(int a, int b)
+{
+        return __builtin_arm_crc32cw(a,b);
+// CHECK: call i32 @llvm.arm.crc32cw(i32 %a, i32 %b)
+}
+
+int crc32d(int a, long long b)
+{
+        return __builtin_arm_crc32d(a,b);
+// CHECK: [[T0:%[0-9]+]] = trunc i64 %b to i32
+// CHECK: [[T1:%[0-9]+]] = lshr i64 %b, 32
+// CHECK: [[T2:%[0-9]+]] = trunc i64 [[T1]] to i32
+// CHECK: [[T3:%[0-9]+]] = tail call i32 @llvm.arm.crc32w(i32 %a, i32 [[T0]])
+// CHECK: call i32 @llvm.arm.crc32w(i32 [[T3]], i32 [[T2]])
+}
+
+int crc32cd(int a, long long b)
+{
+        return __builtin_arm_crc32cd(a,b);
+// CHECK: [[T0:%[0-9]+]] = trunc i64 %b to i32
+// CHECK: [[T1:%[0-9]+]] = lshr i64 %b, 32
+// CHECK: [[T2:%[0-9]+]] = trunc i64 [[T1]] to i32
+// CHECK: [[T3:%[0-9]+]] = tail call i32 @llvm.arm.crc32cw(i32 %a, i32 [[T0]])
+// CHECK: call i32 @llvm.arm.crc32cw(i32 [[T3]], i32 [[T2]])
+}
diff --git a/test/CodeGen/arm-interrupt-attr.c b/test/CodeGen/arm-interrupt-attr.c
new file mode 100644
index 0000000..73f1cfe
--- /dev/null
+++ b/test/CodeGen/arm-interrupt-attr.c
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 -triple thumb-apple-darwin -target-abi aapcs -target-cpu cortex-m3 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple arm-apple-darwin -target-abi apcs-gnu -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-APCS
+
+__attribute__((interrupt)) void test_generic_interrupt() {
+  // CHECK: define arm_aapcscc void @test_generic_interrupt() [[GENERIC_ATTR:#[0-9]+]]
+
+  // CHECK-APCS: define void @test_generic_interrupt() [[GENERIC_ATTR:#[0-9]+]]
+}
+
+__attribute__((interrupt("IRQ"))) void test_irq_interrupt() {
+  // CHECK: define arm_aapcscc void @test_irq_interrupt() [[IRQ_ATTR:#[0-9]+]]
+}
+
+__attribute__((interrupt("FIQ"))) void test_fiq_interrupt() {
+  // CHECK: define arm_aapcscc void @test_fiq_interrupt() [[FIQ_ATTR:#[0-9]+]]
+}
+
+__attribute__((interrupt("SWI"))) void test_swi_interrupt() {
+  // CHECK: define arm_aapcscc void @test_swi_interrupt() [[SWI_ATTR:#[0-9]+]]
+}
+
+__attribute__((interrupt("ABORT"))) void test_abort_interrupt() {
+  // CHECK: define arm_aapcscc void @test_abort_interrupt() [[ABORT_ATTR:#[0-9]+]]
+}
+
+
+__attribute__((interrupt("UNDEF"))) void test_undef_interrupt() {
+  // CHECK: define arm_aapcscc void @test_undef_interrupt() [[UNDEF_ATTR:#[0-9]+]]
+}
+
+// CHECK: attributes [[GENERIC_ATTR]] = { nounwind alignstack=8 {{"interrupt"[^=]}}
+// CHECK: attributes [[IRQ_ATTR]] = { nounwind alignstack=8 "interrupt"="IRQ"
+// CHECK: attributes [[FIQ_ATTR]] = { nounwind alignstack=8 "interrupt"="FIQ"
+// CHECK: attributes [[SWI_ATTR]] = { nounwind alignstack=8 "interrupt"="SWI"
+// CHECK: attributes [[ABORT_ATTR]] = { nounwind alignstack=8 "interrupt"="ABORT"
+// CHECK: attributes [[UNDEF_ATTR]] = { nounwind alignstack=8 "interrupt"="UNDEF"
+
+// CHECK-APCS: attributes [[GENERIC_ATTR]] = { nounwind "interrupt"
diff --git a/test/CodeGen/arm-neon-shifts.c b/test/CodeGen/arm-neon-shifts.c
new file mode 100644
index 0000000..7acfb89
--- /dev/null
+++ b/test/CodeGen/arm-neon-shifts.c
@@ -0,0 +1,45 @@
+// REQUIRES: arm-registered-target
+// RUN: %clang_cc1 -triple thumbv7-apple-darwin \
+// RUN:   -target-cpu cortex-a8 \
+// RUN:   -ffreestanding \
+// RUN:   -emit-llvm -w -O1 -o - %s | FileCheck %s
+
+#include <arm_neon.h>
+
+uint8x8_t test_shift_vshr(uint8x8_t a) {
+  // CHECK-LABEL: test_shift_vshr
+  // CHECK: %{{.*}} = lshr <8 x i8> %a, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+  return vshr_n_u8(a, 5);
+}
+
+int8x8_t test_shift_vshr_smax(int8x8_t a) {
+  // CHECK-LABEL: test_shift_vshr_smax
+  // CHECK: %{{.*}} = ashr <8 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+  return vshr_n_s8(a, 8);
+}
+
+uint8x8_t test_shift_vshr_umax(uint8x8_t a) {
+  // CHECK-LABEL: test_shift_vshr_umax
+  // CHECK: ret <8 x i8> zeroinitializer
+  return vshr_n_u8(a, 8);
+}
+
+uint8x8_t test_shift_vsra(uint8x8_t a, uint8x8_t b) {
+  // CHECK-LABEL: test_shift_vsra
+  // CHECK: %[[SHR:.*]] = lshr <8 x i8> %b, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+  // CHECK: %{{.*}} = add <8 x i8> %[[SHR]], %a
+  return vsra_n_u8(a, b, 5);
+}
+
+int8x8_t test_shift_vsra_smax(int8x8_t a, int8x8_t b) {
+  // CHECK-LABEL: test_shift_vsra_smax
+  // CHECK: %[[SHR:.*]] = ashr <8 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+  // CHECK: %{{.*}} = add <8 x i8> %[[SHR]], %a
+  return vsra_n_s8(a, b, 8);
+}
+
+uint8x8_t test_shift_vsra_umax(uint8x8_t a, uint8x8_t b) {
+  // CHECK-LABEL: test_shift_vsra_umax
+  // CHECK: ret <8 x i8> %a
+  return vsra_n_u8(a, b, 8);
+}
diff --git a/test/CodeGen/arm-neon-vget.c b/test/CodeGen/arm-neon-vget.c
new file mode 100644
index 0000000..4a710a2
--- /dev/null
+++ b/test/CodeGen/arm-neon-vget.c
@@ -0,0 +1,124 @@
+// REQUIRES: arm-registered-target
+// RUN: %clang_cc1 -triple thumbv7-apple-darwin \
+// RUN:   -target-abi apcs-gnu \
+// RUN:   -target-cpu cortex-a8 \
+// RUN:   -mfloat-abi soft \
+// RUN:   -target-feature +soft-float-abi \
+// RUN:   -ffreestanding \
+// RUN:   -emit-llvm -w -O1 -o - %s | FileCheck %s
+
+#include <arm_neon.h>
+
+// Check that the vget_low/vget_high intrinsics generate a single shuffle
+// without any bitcasting.
+int8x8_t low_s8(int8x16_t a) {
+// CHECK: shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  return vget_low_s8(a);
+}
+
+uint8x8_t low_u8 (uint8x16_t a) {
+// CHECK: shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  return vget_low_u8(a);
+}
+
+int16x4_t low_s16( int16x8_t a) {
+// CHECK: shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  return vget_low_s16(a);
+}
+
+uint16x4_t low_u16(uint16x8_t a) {
+// CHECK: shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  return vget_low_u16(a);
+}
+
+int32x2_t low_s32( int32x4_t a) {
+// CHECK: shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+  return vget_low_s32(a);
+}
+
+uint32x2_t low_u32(uint32x4_t a) {
+// CHECK: shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+  return vget_low_u32(a);
+}
+
+int64x1_t low_s64( int64x2_t a) {
+// CHECK: shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer
+  return vget_low_s64(a);
+}
+
+uint64x1_t low_u64(uint64x2_t a) {
+// CHECK: shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer
+  return vget_low_u64(a);
+}
+
+poly8x8_t low_p8 (poly8x16_t a) {
+// CHECK: shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  return vget_low_p8(a);
+}
+
+poly16x4_t low_p16(poly16x8_t a) {
+// CHECK: shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  return vget_low_p16(a);
+}
+
+float32x2_t low_f32(float32x4_t a) {
+// CHECK: shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  return vget_low_f32(a);
+}
+
+
+int8x8_t high_s8(int8x16_t a) {
+// CHECK: shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  return vget_high_s8(a);
+}
+
+uint8x8_t high_u8 (uint8x16_t a) {
+// CHECK: shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  return vget_high_u8(a);
+}
+
+int16x4_t high_s16( int16x8_t a) {
+// CHECK: shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  return vget_high_s16(a);
+}
+
+uint16x4_t high_u16(uint16x8_t a) {
+// CHECK: shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  return vget_high_u16(a);
+}
+
+int32x2_t high_s32( int32x4_t a) {
+// CHECK: shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  return vget_high_s32(a);
+}
+
+uint32x2_t high_u32(uint32x4_t a) {
+// CHECK: shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  return vget_high_u32(a);
+}
+
+int64x1_t high_s64( int64x2_t a) {
+// CHECK: shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> <i32 1>
+  return vget_high_s64(a);
+}
+
+uint64x1_t high_u64(uint64x2_t a) {
+// CHECK: shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> <i32 1>
+  return vget_high_u64(a);
+}
+
+poly8x8_t high_p8 (poly8x16_t a) {
+// CHECK: shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  return vget_high_p8(a);
+}
+
+poly16x4_t high_p16(poly16x8_t a) {
+// CHECK: shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  return vget_high_p16(a);
+}
+
+float32x2_t high_f32(float32x4_t a) {
+// CHECK: shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+  return vget_high_f32(a);
+}
+
diff --git a/test/CodeGen/arm-pcs.c b/test/CodeGen/arm-pcs.c
index fc658c3..269f01e 100644
--- a/test/CodeGen/arm-pcs.c
+++ b/test/CodeGen/arm-pcs.c
@@ -6,7 +6,7 @@ typedef int __attribute__((pcs("aapcs-vfp"))) (*aapcs_vfp_fn)(void);
 aapcs_fn bar;
 
 int foo(aapcs_vfp_fn baz) {
-// CHECK: define i32 @foo
+// CHECK-LABEL: define i32 @foo
 // CHECK: call arm_aapcscc
 // CHECK: call arm_aapcs_vfpcc
   return bar() + baz();
diff --git a/test/CodeGen/arm-pnaclcall.c b/test/CodeGen/arm-pnaclcall.c
index 5025995..2faac1c 100644
--- a/test/CodeGen/arm-pnaclcall.c
+++ b/test/CodeGen/arm-pnaclcall.c
@@ -9,10 +9,10 @@ typedef struct {
   int a;
   int b;
 } s1;
-// CHECK: define i32 @f48(%struct.s1* byval %s)
+// CHECK-LABEL: define i32 @f48(%struct.s1* byval %s)
 int __attribute__((pnaclcall)) f48(s1 s) { return s.a; }
 
-// CHECK: define void @f49(%struct.s1* noalias sret %agg.result)
+// CHECK-LABEL: define void @f49(%struct.s1* noalias sret %agg.result)
 s1 __attribute__((pnaclcall)) f49() { s1 s; s.a = s.b = 1; return s; }
 
 union simple_union {
@@ -20,7 +20,7 @@ union simple_union {
   char b;
 };
 // Unions should be passed as byval structs
-// CHECK: define void @f50(%union.simple_union* byval %s)
+// CHECK-LABEL: define void @f50(%union.simple_union* byval %s)
 void __attribute__((pnaclcall)) f50(union simple_union s) {}
 
 typedef struct {
@@ -29,5 +29,5 @@ typedef struct {
   int b8 : 8;
 } bitfield1;
 // Bitfields should be passed as byval structs
-// CHECK: define void @f51(%struct.bitfield1* byval %bf1)
+// CHECK-LABEL: define void @f51(%struct.bitfield1* byval %bf1)
 void __attribute__((pnaclcall)) f51(bitfield1 bf1) {}
diff --git a/test/CodeGen/arm_neon_intrinsics.c b/test/CodeGen/arm_neon_intrinsics.c
new file mode 100644
index 0000000..1d76e8a
--- /dev/null
+++ b/test/CodeGen/arm_neon_intrinsics.c
@@ -0,0 +1,11636 @@
+// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi apcs-gnu\
+// RUN:  -target-cpu swift -ffreestanding -Os -S -o - %s\
+// RUN:  | FileCheck %s
+
+// REQUIRES: long_tests
+
+#include <arm_neon.h>
+
+// CHECK: test_vaba_s8
+// CHECK: vaba.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vaba_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
+  return vaba_s8(a, b, c);
+}
+
+// CHECK: test_vaba_s16
+// CHECK: vaba.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vaba_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
+  return vaba_s16(a, b, c);
+}
+
+// CHECK: test_vaba_s32
+// CHECK: vaba.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vaba_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
+  return vaba_s32(a, b, c);
+}
+
+// CHECK: test_vaba_u8
+// CHECK: vaba.u8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vaba_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
+  return vaba_u8(a, b, c);
+}
+
+// CHECK: test_vaba_u16
+// CHECK: vaba.u16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vaba_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
+  return vaba_u16(a, b, c);
+}
+
+// CHECK: test_vaba_u32
+// CHECK: vaba.u32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vaba_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
+  return vaba_u32(a, b, c);
+}
+
+// CHECK: test_vabaq_s8
+// CHECK: vaba.s8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int8x16_t test_vabaq_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
+  return vabaq_s8(a, b, c);
+}
+
+// CHECK: test_vabaq_s16
+// CHECK: vaba.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vabaq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
+  return vabaq_s16(a, b, c);
+}
+
+// CHECK: test_vabaq_s32
+// CHECK: vaba.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vabaq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
+  return vabaq_s32(a, b, c);
+}
+
+// CHECK: test_vabaq_u8
+// CHECK: vaba.u8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vabaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
+  return vabaq_u8(a, b, c);
+}
+
+// CHECK: test_vabaq_u16
+// CHECK: vaba.u16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vabaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
+  return vabaq_u16(a, b, c);
+}
+
+// CHECK: test_vabaq_u32
+// CHECK: vaba.u32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vabaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
+  return vabaq_u32(a, b, c);
+}
+
+
+// CHECK: test_vabal_s8
+// CHECK: vabal.s8 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x8_t test_vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
+  return vabal_s8(a, b, c);
+}
+
+// CHECK: test_vabal_s16
+// CHECK: vabal.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
+  return vabal_s16(a, b, c);
+}
+
+// CHECK: test_vabal_s32
+// CHECK: vabal.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
+  return vabal_s32(a, b, c);
+}
+
+// CHECK: test_vabal_u8
+// CHECK: vabal.u8 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x8_t test_vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
+  return vabal_u8(a, b, c);
+}
+
+// CHECK: test_vabal_u16
+// CHECK: vabal.u16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x4_t test_vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
+  return vabal_u16(a, b, c);
+}
+
+// CHECK: test_vabal_u32
+// CHECK: vabal.u32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
+  return vabal_u32(a, b, c);
+}
+
+
+// CHECK: test_vabd_s8
+// CHECK: vabd.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vabd_s8(int8x8_t a, int8x8_t b) {
+  return vabd_s8(a, b);
+}
+
+// CHECK: test_vabd_s16
+// CHECK: vabd.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vabd_s16(int16x4_t a, int16x4_t b) {
+  return vabd_s16(a, b);
+}
+
+// CHECK: test_vabd_s32
+// CHECK: vabd.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vabd_s32(int32x2_t a, int32x2_t b) {
+  return vabd_s32(a, b);
+}
+
+// CHECK: test_vabd_u8
+// CHECK: vabd.u8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vabd_u8(uint8x8_t a, uint8x8_t b) {
+  return vabd_u8(a, b);
+}
+
+// CHECK: test_vabd_u16
+// CHECK: vabd.u16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vabd_u16(uint16x4_t a, uint16x4_t b) {
+  return vabd_u16(a, b);
+}
+
+// CHECK: test_vabd_u32
+// CHECK: vabd.u32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vabd_u32(uint32x2_t a, uint32x2_t b) {
+  return vabd_u32(a, b);
+}
+
+// CHECK: test_vabd_f32
+// CHECK: vabd.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+float32x2_t test_vabd_f32(float32x2_t a, float32x2_t b) {
+  return vabd_f32(a, b);
+}
+
+// CHECK: test_vabdq_s8
+// CHECK: vabd.s8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int8x16_t test_vabdq_s8(int8x16_t a, int8x16_t b) {
+  return vabdq_s8(a, b);
+}
+
+// CHECK: test_vabdq_s16
+// CHECK: vabd.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vabdq_s16(int16x8_t a, int16x8_t b) {
+  return vabdq_s16(a, b);
+}
+
+// CHECK: test_vabdq_s32
+// CHECK: vabd.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vabdq_s32(int32x4_t a, int32x4_t b) {
+  return vabdq_s32(a, b);
+}
+
+// CHECK: test_vabdq_u8
+// CHECK: vabd.u8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vabdq_u8(uint8x16_t a, uint8x16_t b) {
+  return vabdq_u8(a, b);
+}
+
+// CHECK: test_vabdq_u16
+// CHECK: vabd.u16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vabdq_u16(uint16x8_t a, uint16x8_t b) {
+  return vabdq_u16(a, b);
+}
+
+// CHECK: test_vabdq_u32
+// CHECK: vabd.u32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vabdq_u32(uint32x4_t a, uint32x4_t b) {
+  return vabdq_u32(a, b);
+}
+
+// CHECK: test_vabdq_f32
+// CHECK: vabd.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+float32x4_t test_vabdq_f32(float32x4_t a, float32x4_t b) {
+  return vabdq_f32(a, b);
+}
+
+
+// CHECK: test_vabdl_s8
+// CHECK: vabdl.s8 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) {
+  return vabdl_s8(a, b);
+}
+
+// CHECK: test_vabdl_s16
+// CHECK: vabdl.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) {
+  return vabdl_s16(a, b);
+}
+
+// CHECK: test_vabdl_s32
+// CHECK: vabdl.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int64x2_t test_vabdl_s32(int32x2_t a, int32x2_t b) {
+  return vabdl_s32(a, b);
+}
+
+// CHECK: test_vabdl_u8
+// CHECK: vabdl.u8 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) {
+  return vabdl_u8(a, b);
+}
+
+// CHECK: test_vabdl_u16
+// CHECK: vabdl.u16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x4_t test_vabdl_u16(uint16x4_t a, uint16x4_t b) {
+  return vabdl_u16(a, b);
+}
+
+// CHECK: test_vabdl_u32
+// CHECK: vabdl.u32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) {
+  return vabdl_u32(a, b);
+}
+
+
+// CHECK: test_vabs_s8
+// CHECK: vabs.s8 d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vabs_s8(int8x8_t a) {
+  return vabs_s8(a);
+}
+
+// CHECK: test_vabs_s16
+// CHECK: vabs.s16 d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vabs_s16(int16x4_t a) {
+  return vabs_s16(a);
+}
+
+// CHECK: test_vabs_s32
+// CHECK: vabs.s32 d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vabs_s32(int32x2_t a) {
+  return vabs_s32(a);
+}
+
+// CHECK: test_vabs_f32
+// CHECK: vabs.f32 d{{[0-9]+}}, d{{[0-9]+}}
+float32x2_t test_vabs_f32(float32x2_t a) {
+  return vabs_f32(a);
+}
+
+// CHECK: test_vabsq_s8
+// CHECK: vabs.s8 q{{[0-9]+}}, q{{[0-9]+}}
+int8x16_t test_vabsq_s8(int8x16_t a) {
+  return vabsq_s8(a);
+}
+
+// CHECK: test_vabsq_s16
+// CHECK: vabs.s16 q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vabsq_s16(int16x8_t a) {
+  return vabsq_s16(a);
+}
+
+// CHECK: test_vabsq_s32
+// CHECK: vabs.s32 q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vabsq_s32(int32x4_t a) {
+  return vabsq_s32(a);
+}
+
+// CHECK: test_vabsq_f32
+// CHECK: vabs.f32 q{{[0-9]+}}, q{{[0-9]+}}
+float32x4_t test_vabsq_f32(float32x4_t a) {
+  return vabsq_f32(a);
+}
+
+
+// CHECK: test_vadd_s8
+// CHECK: vadd.i8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vadd_s8(int8x8_t a, int8x8_t b) {
+  return vadd_s8(a, b);
+}
+
+// CHECK: test_vadd_s16
+// CHECK: vadd.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vadd_s16(int16x4_t a, int16x4_t b) {
+  return vadd_s16(a, b);
+}
+
+// CHECK: test_vadd_s32
+// CHECK: vadd.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vadd_s32(int32x2_t a, int32x2_t b) {
+  return vadd_s32(a, b);
+}
+
+// CHECK: test_vadd_s64
+// CHECK: vadd.i64 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int64x1_t test_vadd_s64(int64x1_t a, int64x1_t b) {
+  return vadd_s64(a, b);
+}
+
+// CHECK: test_vadd_f32
+// CHECK: vadd.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+float32x2_t test_vadd_f32(float32x2_t a, float32x2_t b) {
+  return vadd_f32(a, b);
+}
+
+// CHECK: test_vadd_u8
+// CHECK: vadd.i8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vadd_u8(uint8x8_t a, uint8x8_t b) {
+  return vadd_u8(a, b);
+}
+
+// CHECK: test_vadd_u16
+// CHECK: vadd.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vadd_u16(uint16x4_t a, uint16x4_t b) {
+  return vadd_u16(a, b);
+}
+
+// CHECK: test_vadd_u32
+// CHECK: vadd.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vadd_u32(uint32x2_t a, uint32x2_t b) {
+  return vadd_u32(a, b);
+}
+
+// CHECK: test_vadd_u64
+// CHECK: vadd.i64 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint64x1_t test_vadd_u64(uint64x1_t a, uint64x1_t b) {
+  return vadd_u64(a, b);
+}
+
+// CHECK: test_vaddq_s8
+// CHECK: vadd.i8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int8x16_t test_vaddq_s8(int8x16_t a, int8x16_t b) {
+  return vaddq_s8(a, b);
+}
+
+// CHECK: test_vaddq_s16
+// CHECK: vadd.i16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vaddq_s16(int16x8_t a, int16x8_t b) {
+  return vaddq_s16(a, b);
+}
+
+// CHECK: test_vaddq_s32
+// CHECK: vadd.i32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vaddq_s32(int32x4_t a, int32x4_t b) {
+  return vaddq_s32(a, b);
+}
+
+// CHECK: test_vaddq_s64
+// CHECK: vadd.i64 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int64x2_t test_vaddq_s64(int64x2_t a, int64x2_t b) {
+  return vaddq_s64(a, b);
+}
+
+// CHECK: test_vaddq_f32
+// CHECK: vadd.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+float32x4_t test_vaddq_f32(float32x4_t a, float32x4_t b) {
+  return vaddq_f32(a, b);
+}
+
+// CHECK: test_vaddq_u8
+// CHECK: vadd.i8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vaddq_u8(uint8x16_t a, uint8x16_t b) {
+  return vaddq_u8(a, b);
+}
+
+// CHECK: test_vaddq_u16
+// CHECK: vadd.i16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vaddq_u16(uint16x8_t a, uint16x8_t b) {
+  return vaddq_u16(a, b);
+}
+
+// CHECK: test_vaddq_u32
+// CHECK: vadd.i32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vaddq_u32(uint32x4_t a, uint32x4_t b) {
+  return vaddq_u32(a, b);
+}
+
+// CHECK: test_vaddq_u64
+// CHECK: vadd.i64 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint64x2_t test_vaddq_u64(uint64x2_t a, uint64x2_t b) {
+  return vaddq_u64(a, b);
+}
+
+
+// CHECK: test_vaddhn_s16
+// CHECK: vaddhn.i16 d{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) {
+  return vaddhn_s16(a, b);
+}
+
+// CHECK: test_vaddhn_s32
+// CHECK: vaddhn.i32 d{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) {
+  return vaddhn_s32(a, b);
+}
+
+// CHECK: test_vaddhn_s64
+// CHECK: vaddhn.i64 d{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) {
+  return vaddhn_s64(a, b);
+}
+
+// CHECK: test_vaddhn_u16
+// CHECK: vaddhn.i16 d{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) {
+  return vaddhn_u16(a, b);
+}
+
+// CHECK: test_vaddhn_u32
+// CHECK: vaddhn.i32 d{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) {
+  return vaddhn_u32(a, b);
+}
+
+// CHECK: test_vaddhn_u64
+// CHECK: vaddhn.i64 d{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) {
+  return vaddhn_u64(a, b);
+}
+
+
+// CHECK: test_vaddl_s8
+// CHECK: vaddl.s8 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x8_t test_vaddl_s8(int8x8_t a, int8x8_t b) {
+  return vaddl_s8(a, b);
+}
+
+// CHECK: test_vaddl_s16
+// CHECK: vaddl.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) {
+  return vaddl_s16(a, b);
+}
+
+// CHECK: test_vaddl_s32
+// CHECK: vaddl.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int64x2_t test_vaddl_s32(int32x2_t a, int32x2_t b) {
+  return vaddl_s32(a, b);
+}
+
+// CHECK: test_vaddl_u8
+// CHECK: vaddl.u8 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x8_t test_vaddl_u8(uint8x8_t a, uint8x8_t b) {
+  return vaddl_u8(a, b);
+}
+
+// CHECK: test_vaddl_u16
+// CHECK: vaddl.u16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x4_t test_vaddl_u16(uint16x4_t a, uint16x4_t b) {
+  return vaddl_u16(a, b);
+}
+
+// CHECK: test_vaddl_u32
+// CHECK: vaddl.u32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint64x2_t test_vaddl_u32(uint32x2_t a, uint32x2_t b) {
+  return vaddl_u32(a, b);
+}
+
+
+// CHECK: test_vaddw_s8
+// CHECK: vaddw.s8 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}
+int16x8_t test_vaddw_s8(int16x8_t a, int8x8_t b) {
+  return vaddw_s8(a, b);
+}
+
+// CHECK: test_vaddw_s16
+// CHECK: vaddw.s16 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}
+int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) {
+  return vaddw_s16(a, b);
+}
+
+// CHECK: test_vaddw_s32
+// CHECK: vaddw.s32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}
+int64x2_t test_vaddw_s32(int64x2_t a, int32x2_t b) {
+  return vaddw_s32(a, b);
+}
+
+// CHECK: test_vaddw_u8
+// CHECK: vaddw.u8 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}
+uint16x8_t test_vaddw_u8(uint16x8_t a, uint8x8_t b) {
+  return vaddw_u8(a, b);
+}
+
+// CHECK: test_vaddw_u16
+// CHECK: vaddw.u16 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}
+uint32x4_t test_vaddw_u16(uint32x4_t a, uint16x4_t b) {
+  return vaddw_u16(a, b);
+}
+
+// CHECK: test_vaddw_u32
+// CHECK: vaddw.u32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}
+uint64x2_t test_vaddw_u32(uint64x2_t a, uint32x2_t b) {
+  return vaddw_u32(a, b);
+}
+
+
+// CHECK: test_vand_s8
+// CHECK: vand d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vand_s8(int8x8_t a, int8x8_t b) {
+  return vand_s8(a, b);
+}
+
+// CHECK: test_vand_s16
+// CHECK: vand d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vand_s16(int16x4_t a, int16x4_t b) {
+  return vand_s16(a, b);
+}
+
+// CHECK: test_vand_s32
+// CHECK: vand d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vand_s32(int32x2_t a, int32x2_t b) {
+  return vand_s32(a, b);
+}
+
+// CHECK: test_vand_s64
+// CHECK: vand d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int64x1_t test_vand_s64(int64x1_t a, int64x1_t b) {
+  return vand_s64(a, b);
+}
+
+// CHECK: test_vand_u8
+// CHECK: vand d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vand_u8(uint8x8_t a, uint8x8_t b) {
+  return vand_u8(a, b);
+}
+
+// CHECK: test_vand_u16
+// CHECK: vand d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vand_u16(uint16x4_t a, uint16x4_t b) {
+  return vand_u16(a, b);
+}
+
+// CHECK: test_vand_u32
+// CHECK: vand d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vand_u32(uint32x2_t a, uint32x2_t b) {
+  return vand_u32(a, b);
+}
+
+// CHECK: test_vand_u64
+// CHECK: vand d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint64x1_t test_vand_u64(uint64x1_t a, uint64x1_t b) {
+  return vand_u64(a, b);
+}
+
+// CHECK: test_vandq_s8
+// CHECK: vand q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int8x16_t test_vandq_s8(int8x16_t a, int8x16_t b) {
+  return vandq_s8(a, b);
+}
+
+// CHECK: test_vandq_s16
+// CHECK: vand q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vandq_s16(int16x8_t a, int16x8_t b) {
+  return vandq_s16(a, b);
+}
+
+// CHECK: test_vandq_s32
+// CHECK: vand q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vandq_s32(int32x4_t a, int32x4_t b) {
+  return vandq_s32(a, b);
+}
+
+// CHECK: test_vandq_s64
+// CHECK: vand q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int64x2_t test_vandq_s64(int64x2_t a, int64x2_t b) {
+  return vandq_s64(a, b);
+}
+
+// CHECK: test_vandq_u8
+// CHECK: vand q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vandq_u8(uint8x16_t a, uint8x16_t b) {
+  return vandq_u8(a, b);
+}
+
+// CHECK: test_vandq_u16
+// CHECK: vand q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vandq_u16(uint16x8_t a, uint16x8_t b) {
+  return vandq_u16(a, b);
+}
+
+// CHECK: test_vandq_u32
+// CHECK: vand q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vandq_u32(uint32x4_t a, uint32x4_t b) {
+  return vandq_u32(a, b);
+}
+
+// CHECK: test_vandq_u64
+// CHECK: vand q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint64x2_t test_vandq_u64(uint64x2_t a, uint64x2_t b) {
+  return vandq_u64(a, b);
+}
+
+
+// CHECK: test_vbic_s8
+// CHECK: vbic d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vbic_s8(int8x8_t a, int8x8_t b) {
+  return vbic_s8(a, b);
+}
+
+// CHECK: test_vbic_s16
+// CHECK: vbic d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vbic_s16(int16x4_t a, int16x4_t b) {
+  return vbic_s16(a, b);
+}
+
+// CHECK: test_vbic_s32
+// CHECK: vbic d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vbic_s32(int32x2_t a, int32x2_t b) {
+  return vbic_s32(a, b);
+}
+
+// CHECK: test_vbic_s64
+// CHECK: vbic d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int64x1_t test_vbic_s64(int64x1_t a, int64x1_t b) {
+  return vbic_s64(a, b);
+}
+
+// CHECK: test_vbic_u8
+// CHECK: vbic d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vbic_u8(uint8x8_t a, uint8x8_t b) {
+  return vbic_u8(a, b);
+}
+
+// CHECK: test_vbic_u16
+// CHECK: vbic d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vbic_u16(uint16x4_t a, uint16x4_t b) {
+  return vbic_u16(a, b);
+}
+
+// CHECK: test_vbic_u32
+// CHECK: vbic d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vbic_u32(uint32x2_t a, uint32x2_t b) {
+  return vbic_u32(a, b);
+}
+
+// CHECK: test_vbic_u64
+// CHECK: vbic d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint64x1_t test_vbic_u64(uint64x1_t a, uint64x1_t b) {
+  return vbic_u64(a, b);
+}
+
+// CHECK: test_vbicq_s8
+// CHECK: vbic q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int8x16_t test_vbicq_s8(int8x16_t a, int8x16_t b) {
+  return vbicq_s8(a, b);
+}
+
+// CHECK: test_vbicq_s16
+// CHECK: vbic q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vbicq_s16(int16x8_t a, int16x8_t b) {
+  return vbicq_s16(a, b);
+}
+
+// CHECK: test_vbicq_s32
+// CHECK: vbic q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vbicq_s32(int32x4_t a, int32x4_t b) {
+  return vbicq_s32(a, b);
+}
+
+// CHECK: test_vbicq_s64
+// CHECK: vbic q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int64x2_t test_vbicq_s64(int64x2_t a, int64x2_t b) {
+  return vbicq_s64(a, b);
+}
+
+// CHECK: test_vbicq_u8
+// CHECK: vbic q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vbicq_u8(uint8x16_t a, uint8x16_t b) {
+  return vbicq_u8(a, b);
+}
+
+// CHECK: test_vbicq_u16
+// CHECK: vbic q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vbicq_u16(uint16x8_t a, uint16x8_t b) {
+  return vbicq_u16(a, b);
+}
+
+// CHECK: test_vbicq_u32
+// CHECK: vbic q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vbicq_u32(uint32x4_t a, uint32x4_t b) {
+  return vbicq_u32(a, b);
+}
+
+// CHECK: test_vbicq_u64
+// CHECK: vbic q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint64x2_t test_vbicq_u64(uint64x2_t a, uint64x2_t b) {
+  return vbicq_u64(a, b);
+}
+
+
+// CHECK: test_vbsl_s8
+// CHECK: vbsl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vbsl_s8(uint8x8_t a, int8x8_t b, int8x8_t c) {
+  return vbsl_s8(a, b, c);
+}
+
+// CHECK: test_vbsl_s16
+// CHECK: vbsl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vbsl_s16(uint16x4_t a, int16x4_t b, int16x4_t c) {
+  return vbsl_s16(a, b, c);
+}
+
+// CHECK: test_vbsl_s32
+// CHECK: vbsl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vbsl_s32(uint32x2_t a, int32x2_t b, int32x2_t c) {
+  return vbsl_s32(a, b, c);
+}
+
+// CHECK: test_vbsl_s64
+// CHECK: vbsl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int64x1_t test_vbsl_s64(uint64x1_t a, int64x1_t b, int64x1_t c) {
+  return vbsl_s64(a, b, c);
+}
+
+// CHECK: test_vbsl_u8
+// CHECK: vbsl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vbsl_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
+  return vbsl_u8(a, b, c);
+}
+
+// CHECK: test_vbsl_u16
+// CHECK: vbsl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vbsl_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
+  return vbsl_u16(a, b, c);
+}
+
+// CHECK: test_vbsl_u32
+// CHECK: vbsl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vbsl_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
+  return vbsl_u32(a, b, c);
+}
+
+// CHECK: test_vbsl_u64
+// CHECK: vbsl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint64x1_t test_vbsl_u64(uint64x1_t a, uint64x1_t b, uint64x1_t c) {
+  return vbsl_u64(a, b, c);
+}
+
+// CHECK: test_vbsl_f32
+// CHECK: vbsl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+float32x2_t test_vbsl_f32(uint32x2_t a, float32x2_t b, float32x2_t c) {
+  return vbsl_f32(a, b, c);
+}
+
+// CHECK: test_vbsl_p8
+// CHECK: vbsl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+poly8x8_t test_vbsl_p8(uint8x8_t a, poly8x8_t b, poly8x8_t c) {
+  return vbsl_p8(a, b, c);
+}
+
+// CHECK: test_vbsl_p16
+// CHECK: vbsl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+poly16x4_t test_vbsl_p16(uint16x4_t a, poly16x4_t b, poly16x4_t c) {
+  return vbsl_p16(a, b, c);
+}
+
+// CHECK: test_vbslq_s8
+// CHECK: vbsl q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int8x16_t test_vbslq_s8(uint8x16_t a, int8x16_t b, int8x16_t c) {
+  return vbslq_s8(a, b, c);
+}
+
+// CHECK: test_vbslq_s16
+// CHECK: vbsl q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vbslq_s16(uint16x8_t a, int16x8_t b, int16x8_t c) {
+  return vbslq_s16(a, b, c);
+}
+
+// CHECK: test_vbslq_s32
+// CHECK: vbsl q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vbslq_s32(uint32x4_t a, int32x4_t b, int32x4_t c) {
+  return vbslq_s32(a, b, c);
+}
+
+// CHECK: test_vbslq_s64
+// CHECK: vbsl q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int64x2_t test_vbslq_s64(uint64x2_t a, int64x2_t b, int64x2_t c) {
+  return vbslq_s64(a, b, c);
+}
+
+// CHECK: test_vbslq_u8
+// CHECK: vbsl q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vbslq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
+  return vbslq_u8(a, b, c);
+}
+
+// CHECK: test_vbslq_u16
+// CHECK: vbsl q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vbslq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
+  return vbslq_u16(a, b, c);
+}
+
+// CHECK: test_vbslq_u32
+// CHECK: vbsl q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vbslq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
+  return vbslq_u32(a, b, c);
+}
+
+// CHECK: test_vbslq_u64
+// CHECK: vbsl q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint64x2_t test_vbslq_u64(uint64x2_t a, uint64x2_t b, uint64x2_t c) {
+  return vbslq_u64(a, b, c);
+}
+
+// CHECK: test_vbslq_f32
+// CHECK: vbsl q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+float32x4_t test_vbslq_f32(uint32x4_t a, float32x4_t b, float32x4_t c) {
+  return vbslq_f32(a, b, c);
+}
+
+// CHECK: test_vbslq_p8
+// CHECK: vbsl q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+poly8x16_t test_vbslq_p8(uint8x16_t a, poly8x16_t b, poly8x16_t c) {
+  return vbslq_p8(a, b, c);
+}
+
+// CHECK: test_vbslq_p16
+// CHECK: vbsl q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+poly16x8_t test_vbslq_p16(uint16x8_t a, poly16x8_t b, poly16x8_t c) {
+  return vbslq_p16(a, b, c);
+}
+
+
+// CHECK: test_vcage_f32
+// CHECK: vacge.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vcage_f32(float32x2_t a, float32x2_t b) {
+  return vcage_f32(a, b);
+}
+
+// CHECK: test_vcageq_f32
+// CHECK: vacge.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vcageq_f32(float32x4_t a, float32x4_t b) {
+  return vcageq_f32(a, b);
+}
+
+
+// CHECK: test_vcagt_f32
+// CHECK: vacgt.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vcagt_f32(float32x2_t a, float32x2_t b) {
+  return vcagt_f32(a, b);
+}
+
+// CHECK: test_vcagtq_f32
+// CHECK: vacgt.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vcagtq_f32(float32x4_t a, float32x4_t b) {
+  return vcagtq_f32(a, b);
+}
+
+
+// CHECK: test_vcale_f32
+// CHECK: vacge.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vcale_f32(float32x2_t a, float32x2_t b) {
+  return vcale_f32(a, b);
+}
+
+// CHECK: test_vcaleq_f32
+// CHECK: vacge.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vcaleq_f32(float32x4_t a, float32x4_t b) {
+  return vcaleq_f32(a, b);
+}
+
+
+// CHECK: test_vcalt_f32
+// CHECK: vacgt.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vcalt_f32(float32x2_t a, float32x2_t b) {
+  return vcalt_f32(a, b);
+}
+
+// CHECK: test_vcaltq_f32
+// CHECK: vacgt.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vcaltq_f32(float32x4_t a, float32x4_t b) {
+  return vcaltq_f32(a, b);
+}
+
+
+// CHECK: test_vceq_s8
+// CHECK: vceq.i8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vceq_s8(int8x8_t a, int8x8_t b) {
+  return vceq_s8(a, b);
+}
+
+// CHECK: test_vceq_s16
+// CHECK: vceq.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vceq_s16(int16x4_t a, int16x4_t b) {
+  return vceq_s16(a, b);
+}
+
+// CHECK: test_vceq_s32
+// CHECK: vceq.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vceq_s32(int32x2_t a, int32x2_t b) {
+  return vceq_s32(a, b);
+}
+
+// CHECK: test_vceq_f32
+// CHECK: vceq.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vceq_f32(float32x2_t a, float32x2_t b) {
+  return vceq_f32(a, b);
+}
+
+// CHECK: test_vceq_u8
+// CHECK: vceq.i8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vceq_u8(uint8x8_t a, uint8x8_t b) {
+  return vceq_u8(a, b);
+}
+
+// CHECK: test_vceq_u16
+// CHECK: vceq.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vceq_u16(uint16x4_t a, uint16x4_t b) {
+  return vceq_u16(a, b);
+}
+
+// CHECK: test_vceq_u32
+// CHECK: vceq.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vceq_u32(uint32x2_t a, uint32x2_t b) {
+  return vceq_u32(a, b);
+}
+
+// CHECK: test_vceq_p8
+// CHECK: vceq.i8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vceq_p8(poly8x8_t a, poly8x8_t b) {
+  return vceq_p8(a, b);
+}
+
+// CHECK: test_vceqq_s8
+// CHECK: vceq.i8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vceqq_s8(int8x16_t a, int8x16_t b) {
+  return vceqq_s8(a, b);
+}
+
+// CHECK: test_vceqq_s16
+// CHECK: vceq.i16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vceqq_s16(int16x8_t a, int16x8_t b) {
+  return vceqq_s16(a, b);
+}
+
+// CHECK: test_vceqq_s32
+// CHECK: vceq.i32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vceqq_s32(int32x4_t a, int32x4_t b) {
+  return vceqq_s32(a, b);
+}
+
+// CHECK: test_vceqq_f32
+// CHECK: vceq.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vceqq_f32(float32x4_t a, float32x4_t b) {
+  return vceqq_f32(a, b);
+}
+
+// CHECK: test_vceqq_u8
+// CHECK: vceq.i8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vceqq_u8(uint8x16_t a, uint8x16_t b) {
+  return vceqq_u8(a, b);
+}
+
+// CHECK: test_vceqq_u16
+// CHECK: vceq.i16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vceqq_u16(uint16x8_t a, uint16x8_t b) {
+  return vceqq_u16(a, b);
+}
+
+// CHECK: test_vceqq_u32
+// CHECK: vceq.i32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vceqq_u32(uint32x4_t a, uint32x4_t b) {
+  return vceqq_u32(a, b);
+}
+
+// CHECK: test_vceqq_p8
+// CHECK: vceq.i8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vceqq_p8(poly8x16_t a, poly8x16_t b) {
+  return vceqq_p8(a, b);
+}
+
+
+// CHECK: test_vcge_s8
+// CHECK: vcge.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vcge_s8(int8x8_t a, int8x8_t b) {
+  return vcge_s8(a, b);
+}
+
+// CHECK: test_vcge_s16
+// CHECK: vcge.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vcge_s16(int16x4_t a, int16x4_t b) {
+  return vcge_s16(a, b);
+}
+
+// CHECK: test_vcge_s32
+// CHECK: vcge.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vcge_s32(int32x2_t a, int32x2_t b) {
+  return vcge_s32(a, b);
+}
+
+// CHECK: test_vcge_f32
+// CHECK: vcge.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vcge_f32(float32x2_t a, float32x2_t b) {
+  return vcge_f32(a, b);
+}
+
+// CHECK: test_vcge_u8
+// CHECK: vcge.u8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vcge_u8(uint8x8_t a, uint8x8_t b) {
+  return vcge_u8(a, b);
+}
+
+// CHECK: test_vcge_u16
+// CHECK: vcge.u16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vcge_u16(uint16x4_t a, uint16x4_t b) {
+  return vcge_u16(a, b);
+}
+
+// CHECK: test_vcge_u32
+// CHECK: vcge.u32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vcge_u32(uint32x2_t a, uint32x2_t b) {
+  return vcge_u32(a, b);
+}
+
+// CHECK: test_vcgeq_s8
+// CHECK: vcge.s8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vcgeq_s8(int8x16_t a, int8x16_t b) {
+  return vcgeq_s8(a, b);
+}
+
+// CHECK: test_vcgeq_s16
+// CHECK: vcge.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vcgeq_s16(int16x8_t a, int16x8_t b) {
+  return vcgeq_s16(a, b);
+}
+
+// CHECK: test_vcgeq_s32
+// CHECK: vcge.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vcgeq_s32(int32x4_t a, int32x4_t b) {
+  return vcgeq_s32(a, b);
+}
+
+// CHECK: test_vcgeq_f32
+// CHECK: vcge.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vcgeq_f32(float32x4_t a, float32x4_t b) {
+  return vcgeq_f32(a, b);
+}
+
+// CHECK: test_vcgeq_u8
+// CHECK: vcge.u8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vcgeq_u8(uint8x16_t a, uint8x16_t b) {
+  return vcgeq_u8(a, b);
+}
+
+// CHECK: test_vcgeq_u16
+// CHECK: vcge.u16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vcgeq_u16(uint16x8_t a, uint16x8_t b) {
+  return vcgeq_u16(a, b);
+}
+
+// CHECK: test_vcgeq_u32
+// CHECK: vcge.u32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vcgeq_u32(uint32x4_t a, uint32x4_t b) {
+  return vcgeq_u32(a, b);
+}
+
+
+// CHECK: test_vcgt_s8
+// CHECK: vcgt.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vcgt_s8(int8x8_t a, int8x8_t b) {
+  return vcgt_s8(a, b);
+}
+
+// CHECK: test_vcgt_s16
+// CHECK: vcgt.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vcgt_s16(int16x4_t a, int16x4_t b) {
+  return vcgt_s16(a, b);
+}
+
+// CHECK: test_vcgt_s32
+// CHECK: vcgt.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vcgt_s32(int32x2_t a, int32x2_t b) {
+  return vcgt_s32(a, b);
+}
+
+// CHECK: test_vcgt_f32
+// CHECK: vcgt.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vcgt_f32(float32x2_t a, float32x2_t b) {
+  return vcgt_f32(a, b);
+}
+
+// CHECK: test_vcgt_u8
+// CHECK: vcgt.u8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vcgt_u8(uint8x8_t a, uint8x8_t b) {
+  return vcgt_u8(a, b);
+}
+
+// CHECK: test_vcgt_u16
+// CHECK: vcgt.u16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vcgt_u16(uint16x4_t a, uint16x4_t b) {
+  return vcgt_u16(a, b);
+}
+
+// CHECK: test_vcgt_u32
+// CHECK: vcgt.u32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vcgt_u32(uint32x2_t a, uint32x2_t b) {
+  return vcgt_u32(a, b);
+}
+
+// CHECK: test_vcgtq_s8
+// CHECK: vcgt.s8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vcgtq_s8(int8x16_t a, int8x16_t b) {
+  return vcgtq_s8(a, b);
+}
+
+// CHECK: test_vcgtq_s16
+// CHECK: vcgt.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vcgtq_s16(int16x8_t a, int16x8_t b) {
+  return vcgtq_s16(a, b);
+}
+
+// CHECK: test_vcgtq_s32
+// CHECK: vcgt.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vcgtq_s32(int32x4_t a, int32x4_t b) {
+  return vcgtq_s32(a, b);
+}
+
+// CHECK: test_vcgtq_f32
+// CHECK: vcgt.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vcgtq_f32(float32x4_t a, float32x4_t b) {
+  return vcgtq_f32(a, b);
+}
+
+// CHECK: test_vcgtq_u8
+// CHECK: vcgt.u8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vcgtq_u8(uint8x16_t a, uint8x16_t b) {
+  return vcgtq_u8(a, b);
+}
+
+// CHECK: test_vcgtq_u16
+// CHECK: vcgt.u16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vcgtq_u16(uint16x8_t a, uint16x8_t b) {
+  return vcgtq_u16(a, b);
+}
+
+// CHECK: test_vcgtq_u32
+// CHECK: vcgt.u32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vcgtq_u32(uint32x4_t a, uint32x4_t b) {
+  return vcgtq_u32(a, b);
+}
+
+
+// CHECK: test_vcle_s8
+// CHECK: vcge.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vcle_s8(int8x8_t a, int8x8_t b) {
+  return vcle_s8(a, b);
+}
+
+// CHECK: test_vcle_s16
+// CHECK: vcge.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vcle_s16(int16x4_t a, int16x4_t b) {
+  return vcle_s16(a, b);
+}
+
+// CHECK: test_vcle_s32
+// CHECK: vcge.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vcle_s32(int32x2_t a, int32x2_t b) {
+  return vcle_s32(a, b);
+}
+
+// CHECK: test_vcle_f32
+// CHECK: vcge.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vcle_f32(float32x2_t a, float32x2_t b) {
+  return vcle_f32(a, b);
+}
+
+// CHECK: test_vcle_u8
+// CHECK: vcge.u8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vcle_u8(uint8x8_t a, uint8x8_t b) {
+  return vcle_u8(a, b);
+}
+
+// CHECK: test_vcle_u16
+// CHECK: vcge.u16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vcle_u16(uint16x4_t a, uint16x4_t b) {
+  return vcle_u16(a, b);
+}
+
+// CHECK: test_vcle_u32
+// CHECK: vcge.u32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vcle_u32(uint32x2_t a, uint32x2_t b) {
+  return vcle_u32(a, b);
+}
+
+// CHECK: test_vcleq_s8
+// CHECK: vcge.s8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vcleq_s8(int8x16_t a, int8x16_t b) {
+  return vcleq_s8(a, b);
+}
+
+// CHECK: test_vcleq_s16
+// CHECK: vcge.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vcleq_s16(int16x8_t a, int16x8_t b) {
+  return vcleq_s16(a, b);
+}
+
+// CHECK: test_vcleq_s32
+// CHECK: vcge.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vcleq_s32(int32x4_t a, int32x4_t b) {
+  return vcleq_s32(a, b);
+}
+
+// CHECK: test_vcleq_f32
+// CHECK: vcge.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vcleq_f32(float32x4_t a, float32x4_t b) {
+  return vcleq_f32(a, b);
+}
+
+// CHECK: test_vcleq_u8
+// CHECK: vcge.u8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vcleq_u8(uint8x16_t a, uint8x16_t b) {
+  return vcleq_u8(a, b);
+}
+
+// CHECK: test_vcleq_u16
+// CHECK: vcge.u16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vcleq_u16(uint16x8_t a, uint16x8_t b) {
+  return vcleq_u16(a, b);
+}
+
+// CHECK: test_vcleq_u32
+// CHECK: vcge.u32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vcleq_u32(uint32x4_t a, uint32x4_t b) {
+  return vcleq_u32(a, b);
+}
+
+
+// CHECK: test_vcls_s8
+// CHECK: vcls.s8 d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vcls_s8(int8x8_t a) {
+  return vcls_s8(a);
+}
+
+// CHECK: test_vcls_s16
+// CHECK: vcls.s16 d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vcls_s16(int16x4_t a) {
+  return vcls_s16(a);
+}
+
+// CHECK: test_vcls_s32
+// CHECK: vcls.s32 d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vcls_s32(int32x2_t a) {
+  return vcls_s32(a);
+}
+
+// CHECK: test_vclsq_s8
+// CHECK: vcls.s8 q{{[0-9]+}}, q{{[0-9]+}}
+int8x16_t test_vclsq_s8(int8x16_t a) {
+  return vclsq_s8(a);
+}
+
+// CHECK: test_vclsq_s16
+// CHECK: vcls.s16 q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vclsq_s16(int16x8_t a) {
+  return vclsq_s16(a);
+}
+
+// CHECK: test_vclsq_s32
+// CHECK: vcls.s32 q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vclsq_s32(int32x4_t a) {
+  return vclsq_s32(a);
+}
+
+
+// CHECK: test_vclt_s8
+// CHECK: vcgt.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vclt_s8(int8x8_t a, int8x8_t b) {
+  return vclt_s8(a, b);
+}
+
+// CHECK: test_vclt_s16
+// CHECK: vcgt.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vclt_s16(int16x4_t a, int16x4_t b) {
+  return vclt_s16(a, b);
+}
+
+// CHECK: test_vclt_s32
+// CHECK: vcgt.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vclt_s32(int32x2_t a, int32x2_t b) {
+  return vclt_s32(a, b);
+}
+
+// CHECK: test_vclt_f32
+// CHECK: vcgt.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vclt_f32(float32x2_t a, float32x2_t b) {
+  return vclt_f32(a, b);
+}
+
+// CHECK: test_vclt_u8
+// CHECK: vcgt.u8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vclt_u8(uint8x8_t a, uint8x8_t b) {
+  return vclt_u8(a, b);
+}
+
+// CHECK: test_vclt_u16
+// CHECK: vcgt.u16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vclt_u16(uint16x4_t a, uint16x4_t b) {
+  return vclt_u16(a, b);
+}
+
+// CHECK: test_vclt_u32
+// CHECK: vcgt.u32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vclt_u32(uint32x2_t a, uint32x2_t b) {
+  return vclt_u32(a, b);
+}
+
+// CHECK: test_vcltq_s8
+// CHECK: vcgt.s8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vcltq_s8(int8x16_t a, int8x16_t b) {
+  return vcltq_s8(a, b);
+}
+
+// CHECK: test_vcltq_s16
+// CHECK: vcgt.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vcltq_s16(int16x8_t a, int16x8_t b) {
+  return vcltq_s16(a, b);
+}
+
+// CHECK: test_vcltq_s32
+// CHECK: vcgt.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vcltq_s32(int32x4_t a, int32x4_t b) {
+  return vcltq_s32(a, b);
+}
+
+// CHECK: test_vcltq_f32
+// CHECK: vcgt.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vcltq_f32(float32x4_t a, float32x4_t b) {
+  return vcltq_f32(a, b);
+}
+
+// CHECK: test_vcltq_u8
+// CHECK: vcgt.u8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vcltq_u8(uint8x16_t a, uint8x16_t b) {
+  return vcltq_u8(a, b);
+}
+
+// CHECK: test_vcltq_u16
+// CHECK: vcgt.u16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vcltq_u16(uint16x8_t a, uint16x8_t b) {
+  return vcltq_u16(a, b);
+}
+
+// CHECK: test_vcltq_u32
+// CHECK: vcgt.u32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vcltq_u32(uint32x4_t a, uint32x4_t b) {
+  return vcltq_u32(a, b);
+}
+
+
+// CHECK: test_vclz_s8
+// CHECK: vclz.i8 d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vclz_s8(int8x8_t a) {
+  return vclz_s8(a);
+}
+
+// CHECK: test_vclz_s16
+// CHECK: vclz.i16 d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vclz_s16(int16x4_t a) {
+  return vclz_s16(a);
+}
+
+// CHECK: test_vclz_s32
+// CHECK: vclz.i32 d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vclz_s32(int32x2_t a) {
+  return vclz_s32(a);
+}
+
+// CHECK: test_vclz_u8
+// CHECK: vclz.i8 d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vclz_u8(uint8x8_t a) {
+  return vclz_u8(a);
+}
+
+// CHECK: test_vclz_u16
+// CHECK: vclz.i16 d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vclz_u16(uint16x4_t a) {
+  return vclz_u16(a);
+}
+
+// CHECK: test_vclz_u32
+// CHECK: vclz.i32 d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vclz_u32(uint32x2_t a) {
+  return vclz_u32(a);
+}
+
+// CHECK: test_vclzq_s8
+// CHECK: vclz.i8 q{{[0-9]+}}, q{{[0-9]+}}
+int8x16_t test_vclzq_s8(int8x16_t a) {
+  return vclzq_s8(a);
+}
+
+// CHECK: test_vclzq_s16
+// CHECK: vclz.i16 q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vclzq_s16(int16x8_t a) {
+  return vclzq_s16(a);
+}
+
+// CHECK: test_vclzq_s32
+// CHECK: vclz.i32 q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vclzq_s32(int32x4_t a) {
+  return vclzq_s32(a);
+}
+
+// CHECK: test_vclzq_u8
+// CHECK: vclz.i8 q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vclzq_u8(uint8x16_t a) {
+  return vclzq_u8(a);
+}
+
+// CHECK: test_vclzq_u16
+// CHECK: vclz.i16 q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vclzq_u16(uint16x8_t a) {
+  return vclzq_u16(a);
+}
+
+// CHECK: test_vclzq_u32
+// CHECK: vclz.i32 q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vclzq_u32(uint32x4_t a) {
+  return vclzq_u32(a);
+}
+
+
+// CHECK: test_vcnt_u8
+// CHECK: vcnt.8 d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vcnt_u8(uint8x8_t a) {
+  return vcnt_u8(a);
+}
+
+// CHECK: test_vcnt_s8
+// CHECK: vcnt.8 d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vcnt_s8(int8x8_t a) {
+  return vcnt_s8(a);
+}
+
+// CHECK: test_vcnt_p8
+// CHECK: vcnt.8 d{{[0-9]+}}, d{{[0-9]+}}
+poly8x8_t test_vcnt_p8(poly8x8_t a) {
+  return vcnt_p8(a);
+}
+
+// CHECK: test_vcntq_u8
+// CHECK: vcnt.8 q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vcntq_u8(uint8x16_t a) {
+  return vcntq_u8(a);
+}
+
+// CHECK: test_vcntq_s8
+// CHECK: vcnt.8 q{{[0-9]+}}, q{{[0-9]+}}
+int8x16_t test_vcntq_s8(int8x16_t a) {
+  return vcntq_s8(a);
+}
+
+// CHECK: test_vcntq_p8
+// CHECK: vcnt.8 q{{[0-9]+}}, q{{[0-9]+}}
+poly8x16_t test_vcntq_p8(poly8x16_t a) {
+  return vcntq_p8(a);
+}
+
+
+// CHECK: test_vcombine_s8
+int8x16_t test_vcombine_s8(int8x8_t a, int8x8_t b) {
+  return vcombine_s8(a, b);
+}
+
+// CHECK: test_vcombine_s16
+int16x8_t test_vcombine_s16(int16x4_t a, int16x4_t b) {
+  return vcombine_s16(a, b);
+}
+
+// CHECK: test_vcombine_s32
+int32x4_t test_vcombine_s32(int32x2_t a, int32x2_t b) {
+  return vcombine_s32(a, b);
+}
+
+// CHECK: test_vcombine_s64
+int64x2_t test_vcombine_s64(int64x1_t a, int64x1_t b) {
+  return vcombine_s64(a, b);
+}
+
+// CHECK: test_vcombine_f16
+float16x8_t test_vcombine_f16(float16x4_t a, float16x4_t b) {
+  return vcombine_f16(a, b);
+}
+
+// CHECK: test_vcombine_f32
+float32x4_t test_vcombine_f32(float32x2_t a, float32x2_t b) {
+  return vcombine_f32(a, b);
+}
+
+// CHECK: test_vcombine_u8
+uint8x16_t test_vcombine_u8(uint8x8_t a, uint8x8_t b) {
+  return vcombine_u8(a, b);
+}
+
+// CHECK: test_vcombine_u16
+uint16x8_t test_vcombine_u16(uint16x4_t a, uint16x4_t b) {
+  return vcombine_u16(a, b);
+}
+
+// CHECK: test_vcombine_u32
+uint32x4_t test_vcombine_u32(uint32x2_t a, uint32x2_t b) {
+  return vcombine_u32(a, b);
+}
+
+// CHECK: test_vcombine_u64
+uint64x2_t test_vcombine_u64(uint64x1_t a, uint64x1_t b) {
+  return vcombine_u64(a, b);
+}
+
+// CHECK: test_vcombine_p8
+poly8x16_t test_vcombine_p8(poly8x8_t a, poly8x8_t b) {
+  return vcombine_p8(a, b);
+}
+
+// CHECK: test_vcombine_p16
+poly16x8_t test_vcombine_p16(poly16x4_t a, poly16x4_t b) {
+  return vcombine_p16(a, b);
+}
+
+
+// CHECK: test_vcreate_s8
+int8x8_t test_vcreate_s8(uint64_t a) {
+  return vcreate_s8(a);
+}
+
+// CHECK: test_vcreate_s16
+int16x4_t test_vcreate_s16(uint64_t a) {
+  return vcreate_s16(a);
+}
+
+// CHECK: test_vcreate_s32
+int32x2_t test_vcreate_s32(uint64_t a) {
+  return vcreate_s32(a);
+}
+
+// CHECK: test_vcreate_f16
+float16x4_t test_vcreate_f16(uint64_t a) {
+  return vcreate_f16(a);
+}
+
+// CHECK: test_vcreate_f32
+float32x2_t test_vcreate_f32(uint64_t a) {
+  return vcreate_f32(a);
+}
+
+// CHECK: test_vcreate_u8
+uint8x8_t test_vcreate_u8(uint64_t a) {
+  return vcreate_u8(a);
+}
+
+// CHECK: test_vcreate_u16
+uint16x4_t test_vcreate_u16(uint64_t a) {
+  return vcreate_u16(a);
+}
+
+// CHECK: test_vcreate_u32
+uint32x2_t test_vcreate_u32(uint64_t a) {
+  return vcreate_u32(a);
+}
+
+// CHECK: test_vcreate_u64
+uint64x1_t test_vcreate_u64(uint64_t a) {
+  return vcreate_u64(a);
+}
+
+// CHECK: test_vcreate_p8
+poly8x8_t test_vcreate_p8(uint64_t a) {
+  return vcreate_p8(a);
+}
+
+// CHECK: test_vcreate_p16
+poly16x4_t test_vcreate_p16(uint64_t a) {
+  return vcreate_p16(a);
+}
+
+// CHECK: test_vcreate_s64
+int64x1_t test_vcreate_s64(uint64_t a) {
+  return vcreate_s64(a);
+}
+
+
+// CHECK: test_vcvt_f16_f32
+// CHECK: vcvt.f16.f32 d{{[0-9]+}}, q{{[0-9]+}}
+float16x4_t test_vcvt_f16_f32(float32x4_t a) {
+  return vcvt_f16_f32(a);
+}
+
+
+// CHECK: test_vcvt_f32_s32
+// CHECK: vcvt.f32.s32 d{{[0-9]+}}, d{{[0-9]+}}
+float32x2_t test_vcvt_f32_s32(int32x2_t a) {
+  return vcvt_f32_s32(a);
+}
+
+// CHECK: test_vcvt_f32_u32
+// CHECK: vcvt.f32.u32 d{{[0-9]+}}, d{{[0-9]+}}
+float32x2_t test_vcvt_f32_u32(uint32x2_t a) {
+  return vcvt_f32_u32(a);
+}
+
+// CHECK: test_vcvtq_f32_s32
+// CHECK: vcvt.f32.s32 q{{[0-9]+}}, q{{[0-9]+}}
+float32x4_t test_vcvtq_f32_s32(int32x4_t a) {
+  return vcvtq_f32_s32(a);
+}
+
+// CHECK: test_vcvtq_f32_u32
+// CHECK: vcvt.f32.u32 q{{[0-9]+}}, q{{[0-9]+}}
+float32x4_t test_vcvtq_f32_u32(uint32x4_t a) {
+  return vcvtq_f32_u32(a);
+}
+
+
+// CHECK: test_vcvt_f32_f16
+// CHECK: vcvt.f32.f16
+float32x4_t test_vcvt_f32_f16(float16x4_t a) {
+  return vcvt_f32_f16(a);
+}
+
+
+// CHECK: test_vcvt_n_f32_s32
+// CHECK: vcvt.f32.s32 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+float32x2_t test_vcvt_n_f32_s32(int32x2_t a) {
+  return vcvt_n_f32_s32(a, 1);
+}
+
+// CHECK: test_vcvt_n_f32_u32
+// CHECK: vcvt.f32.u32 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+float32x2_t test_vcvt_n_f32_u32(uint32x2_t a) {
+  return vcvt_n_f32_u32(a, 1);
+}
+
+// CHECK: test_vcvtq_n_f32_s32
+// CHECK: vcvt.f32.s32 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+float32x4_t test_vcvtq_n_f32_s32(int32x4_t a) {
+  return vcvtq_n_f32_s32(a, 3);
+}
+
+// CHECK: test_vcvtq_n_f32_u32
+// CHECK: vcvt.f32.u32 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+float32x4_t test_vcvtq_n_f32_u32(uint32x4_t a) {
+  return vcvtq_n_f32_u32(a, 3);
+}
+
+
+// CHECK: test_vcvt_n_s32_f32
+// CHECK: vcvt.s32.f32 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int32x2_t test_vcvt_n_s32_f32(float32x2_t a) {
+  return vcvt_n_s32_f32(a, 1);
+}
+
+// CHECK: test_vcvtq_n_s32_f32
+// CHECK: vcvt.s32.f32 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int32x4_t test_vcvtq_n_s32_f32(float32x4_t a) {
+  return vcvtq_n_s32_f32(a, 3);
+}
+
+
+// CHECK: test_vcvt_n_u32_f32
+// CHECK: vcvt.u32.f32 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint32x2_t test_vcvt_n_u32_f32(float32x2_t a) {
+  return vcvt_n_u32_f32(a, 1);
+}
+
+// CHECK: test_vcvtq_n_u32_f32
+// CHECK: vcvt.u32.f32 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint32x4_t test_vcvtq_n_u32_f32(float32x4_t a) {
+  return vcvtq_n_u32_f32(a, 3);
+}
+
+
+// CHECK: test_vcvt_s32_f32
+// CHECK: vcvt.s32.f32 d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vcvt_s32_f32(float32x2_t a) {
+  return vcvt_s32_f32(a);
+}
+
+// CHECK: test_vcvtq_s32_f32
+// CHECK: vcvt.s32.f32 q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vcvtq_s32_f32(float32x4_t a) {
+  return vcvtq_s32_f32(a);
+}
+
+
+// CHECK: test_vcvt_u32_f32
+// CHECK: vcvt.u32.f32 d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vcvt_u32_f32(float32x2_t a) {
+  return vcvt_u32_f32(a);
+}
+
+// CHECK: test_vcvtq_u32_f32
+// CHECK: vcvt.u32.f32 q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vcvtq_u32_f32(float32x4_t a) {
+  return vcvtq_u32_f32(a);
+}
+
+
+// CHECK: test_vdup_lane_u8
+// CHECK: vdup.8 d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+uint8x8_t test_vdup_lane_u8(uint8x8_t a) {
+  return vdup_lane_u8(a, 7);
+}
+
+// CHECK: test_vdup_lane_u16
+// CHECK: vdup.16 d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+uint16x4_t test_vdup_lane_u16(uint16x4_t a) {
+  return vdup_lane_u16(a, 3);
+}
+
+// CHECK: test_vdup_lane_u32
+// CHECK: vdup.32 d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+uint32x2_t test_vdup_lane_u32(uint32x2_t a) {
+  return vdup_lane_u32(a, 1);
+}
+
+// CHECK: test_vdup_lane_s8
+// CHECK: vdup.8 d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int8x8_t test_vdup_lane_s8(int8x8_t a) {
+  return vdup_lane_s8(a, 7);
+}
+
+// CHECK: test_vdup_lane_s16
+// CHECK: vdup.16 d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int16x4_t test_vdup_lane_s16(int16x4_t a) {
+  return vdup_lane_s16(a, 3);
+}
+
+// CHECK: test_vdup_lane_s32
+// CHECK: vdup.32 d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int32x2_t test_vdup_lane_s32(int32x2_t a) {
+  return vdup_lane_s32(a, 1);
+}
+
+// CHECK: test_vdup_lane_p8
+// CHECK: vdup.8 d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+poly8x8_t test_vdup_lane_p8(poly8x8_t a) {
+  return vdup_lane_p8(a, 7);
+}
+
+// CHECK: test_vdup_lane_p16
+// CHECK: vdup.16 d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+poly16x4_t test_vdup_lane_p16(poly16x4_t a) {
+  return vdup_lane_p16(a, 3);
+}
+
+// CHECK: test_vdup_lane_f32
+// CHECK: vdup.32 d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+float32x2_t test_vdup_lane_f32(float32x2_t a) {
+  return vdup_lane_f32(a, 1);
+}
+
+// CHECK: test_vdupq_lane_u8
+// CHECK: vdup.8 q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+uint8x16_t test_vdupq_lane_u8(uint8x8_t a) {
+  return vdupq_lane_u8(a, 7);
+}
+
+// CHECK: test_vdupq_lane_u16
+// CHECK: vdup.16 q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+uint16x8_t test_vdupq_lane_u16(uint16x4_t a) {
+  return vdupq_lane_u16(a, 3);
+}
+
+// CHECK: test_vdupq_lane_u32
+// CHECK: vdup.32 q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+uint32x4_t test_vdupq_lane_u32(uint32x2_t a) {
+  return vdupq_lane_u32(a, 1);
+}
+
+// CHECK: test_vdupq_lane_s8
+// CHECK: vdup.8 q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int8x16_t test_vdupq_lane_s8(int8x8_t a) {
+  return vdupq_lane_s8(a, 7);
+}
+
+// CHECK: test_vdupq_lane_s16
+// CHECK: vdup.16 q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int16x8_t test_vdupq_lane_s16(int16x4_t a) {
+  return vdupq_lane_s16(a, 3);
+}
+
+// CHECK: test_vdupq_lane_s32
+// CHECK: vdup.32 q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int32x4_t test_vdupq_lane_s32(int32x2_t a) {
+  return vdupq_lane_s32(a, 1);
+}
+
+// CHECK: test_vdupq_lane_p8
+// CHECK: vdup.8 q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+poly8x16_t test_vdupq_lane_p8(poly8x8_t a) {
+  return vdupq_lane_p8(a, 7);
+}
+
+// CHECK: test_vdupq_lane_p16
+// CHECK: vdup.16 q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+poly16x8_t test_vdupq_lane_p16(poly16x4_t a) {
+  return vdupq_lane_p16(a, 3);
+}
+
+// CHECK: test_vdupq_lane_f32
+// CHECK: vdup.32 q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+float32x4_t test_vdupq_lane_f32(float32x2_t a) {
+  return vdupq_lane_f32(a, 1);
+}
+
+// CHECK: test_vdup_lane_s64
+int64x1_t test_vdup_lane_s64(int64x1_t a) {
+  return vdup_lane_s64(a, 0);
+}
+
+// CHECK: test_vdup_lane_u64
+uint64x1_t test_vdup_lane_u64(uint64x1_t a) {
+  return vdup_lane_u64(a, 0);
+}
+
+// CHECK: test_vdupq_lane_s64
+// CHECK: {{vmov|vdup}}
+int64x2_t test_vdupq_lane_s64(int64x1_t a) {
+  return vdupq_lane_s64(a, 0);
+}
+
+// CHECK: test_vdupq_lane_u64
+// CHECK: {{vmov|vdup}}
+uint64x2_t test_vdupq_lane_u64(uint64x1_t a) {
+  return vdupq_lane_u64(a, 0);
+}
+
+
+// CHECK: test_vdup_n_u8
+// CHECK: vmov 
+uint8x8_t test_vdup_n_u8(uint8_t a) {
+  return vdup_n_u8(a);
+}
+
+// CHECK: test_vdup_n_u16
+// CHECK: vmov 
+uint16x4_t test_vdup_n_u16(uint16_t a) {
+  return vdup_n_u16(a);
+}
+
+// CHECK: test_vdup_n_u32
+// CHECK: vmov 
+uint32x2_t test_vdup_n_u32(uint32_t a) {
+  return vdup_n_u32(a);
+}
+
+// CHECK: test_vdup_n_s8
+// CHECK: vmov 
+int8x8_t test_vdup_n_s8(int8_t a) {
+  return vdup_n_s8(a);
+}
+
+// CHECK: test_vdup_n_s16
+// CHECK: vmov 
+int16x4_t test_vdup_n_s16(int16_t a) {
+  return vdup_n_s16(a);
+}
+
+// CHECK: test_vdup_n_s32
+// CHECK: vmov 
+int32x2_t test_vdup_n_s32(int32_t a) {
+  return vdup_n_s32(a);
+}
+
+// CHECK: test_vdup_n_p8
+// CHECK: vmov 
+poly8x8_t test_vdup_n_p8(poly8_t a) {
+  return vdup_n_p8(a);
+}
+
+// CHECK: test_vdup_n_p16
+// CHECK: vmov 
+poly16x4_t test_vdup_n_p16(poly16_t a) {
+  return vdup_n_p16(a);
+}
+
+// CHECK: test_vdup_n_f32
+// CHECK: vmov 
+float32x2_t test_vdup_n_f32(float32_t a) {
+  return vdup_n_f32(a);
+}
+
+// CHECK: test_vdupq_n_u8
+// CHECK: vmov 
+uint8x16_t test_vdupq_n_u8(uint8_t a) {
+  return vdupq_n_u8(a);
+}
+
+// CHECK: test_vdupq_n_u16
+// CHECK: vmov 
+uint16x8_t test_vdupq_n_u16(uint16_t a) {
+  return vdupq_n_u16(a);
+}
+
+// CHECK: test_vdupq_n_u32
+// CHECK: vmov 
+uint32x4_t test_vdupq_n_u32(uint32_t a) {
+  return vdupq_n_u32(a);
+}
+
+// CHECK: test_vdupq_n_s8
+// CHECK: vmov 
+int8x16_t test_vdupq_n_s8(int8_t a) {
+  return vdupq_n_s8(a);
+}
+
+// CHECK: test_vdupq_n_s16
+// CHECK: vmov 
+int16x8_t test_vdupq_n_s16(int16_t a) {
+  return vdupq_n_s16(a);
+}
+
+// CHECK: test_vdupq_n_s32
+// CHECK: vmov 
+int32x4_t test_vdupq_n_s32(int32_t a) {
+  return vdupq_n_s32(a);
+}
+
+// CHECK: test_vdupq_n_p8
+// CHECK: vmov 
+poly8x16_t test_vdupq_n_p8(poly8_t a) {
+  return vdupq_n_p8(a);
+}
+
+// CHECK: test_vdupq_n_p16
+// CHECK: vmov 
+poly16x8_t test_vdupq_n_p16(poly16_t a) {
+  return vdupq_n_p16(a);
+}
+
+// CHECK: test_vdupq_n_f32
+// CHECK: vmov 
+float32x4_t test_vdupq_n_f32(float32_t a) {
+  return vdupq_n_f32(a);
+}
+
+// CHECK: test_vdup_n_s64
+// CHECK: vmov 
+int64x1_t test_vdup_n_s64(int64_t a) {
+  return vdup_n_s64(a);
+}
+
+// CHECK: test_vdup_n_u64
+// CHECK: vmov 
+uint64x1_t test_vdup_n_u64(uint64_t a) {
+  return vdup_n_u64(a);
+}
+
+// CHECK: test_vdupq_n_s64
+// CHECK: vmov 
+int64x2_t test_vdupq_n_s64(int64_t a) {
+  return vdupq_n_s64(a);
+}
+
+// CHECK: test_vdupq_n_u64
+// CHECK: vmov 
+uint64x2_t test_vdupq_n_u64(uint64_t a) {
+  return vdupq_n_u64(a);
+}
+
+
+// CHECK: test_veor_s8
+// CHECK: veor d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_veor_s8(int8x8_t a, int8x8_t b) {
+  return veor_s8(a, b);
+}
+
+// CHECK: test_veor_s16
+// CHECK: veor d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_veor_s16(int16x4_t a, int16x4_t b) {
+  return veor_s16(a, b);
+}
+
+// CHECK: test_veor_s32
+// CHECK: veor d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_veor_s32(int32x2_t a, int32x2_t b) {
+  return veor_s32(a, b);
+}
+
+// CHECK: test_veor_s64
+// CHECK: veor d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int64x1_t test_veor_s64(int64x1_t a, int64x1_t b) {
+  return veor_s64(a, b);
+}
+
+// CHECK: test_veor_u8
+// CHECK: veor d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_veor_u8(uint8x8_t a, uint8x8_t b) {
+  return veor_u8(a, b);
+}
+
+// CHECK: test_veor_u16
+// CHECK: veor d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_veor_u16(uint16x4_t a, uint16x4_t b) {
+  return veor_u16(a, b);
+}
+
+// CHECK: test_veor_u32
+// CHECK: veor d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_veor_u32(uint32x2_t a, uint32x2_t b) {
+  return veor_u32(a, b);
+}
+
+// CHECK: test_veor_u64
+// CHECK: veor d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint64x1_t test_veor_u64(uint64x1_t a, uint64x1_t b) {
+  return veor_u64(a, b);
+}
+
+// CHECK: test_veorq_s8
+// CHECK: veor q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int8x16_t test_veorq_s8(int8x16_t a, int8x16_t b) {
+  return veorq_s8(a, b);
+}
+
+// CHECK: test_veorq_s16
+// CHECK: veor q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_veorq_s16(int16x8_t a, int16x8_t b) {
+  return veorq_s16(a, b);
+}
+
+// CHECK: test_veorq_s32
+// CHECK: veor q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_veorq_s32(int32x4_t a, int32x4_t b) {
+  return veorq_s32(a, b);
+}
+
+// CHECK: test_veorq_s64
+// CHECK: veor q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int64x2_t test_veorq_s64(int64x2_t a, int64x2_t b) {
+  return veorq_s64(a, b);
+}
+
+// CHECK: test_veorq_u8
+// CHECK: veor q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_veorq_u8(uint8x16_t a, uint8x16_t b) {
+  return veorq_u8(a, b);
+}
+
+// CHECK: test_veorq_u16
+// CHECK: veor q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_veorq_u16(uint16x8_t a, uint16x8_t b) {
+  return veorq_u16(a, b);
+}
+
+// CHECK: test_veorq_u32
+// CHECK: veor q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_veorq_u32(uint32x4_t a, uint32x4_t b) {
+  return veorq_u32(a, b);
+}
+
+// CHECK: test_veorq_u64
+// CHECK: veor q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint64x2_t test_veorq_u64(uint64x2_t a, uint64x2_t b) {
+  return veorq_u64(a, b);
+}
+
+
+// CHECK: test_vext_s8
+// CHECK: vext.8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int8x8_t test_vext_s8(int8x8_t a, int8x8_t b) {
+  return vext_s8(a, b, 7);
+}
+
+// CHECK: test_vext_u8
+// CHECK: vext.8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint8x8_t test_vext_u8(uint8x8_t a, uint8x8_t b) {
+  return vext_u8(a, b, 7);
+}
+
+// CHECK: test_vext_p8
+// CHECK: vext.8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+poly8x8_t test_vext_p8(poly8x8_t a, poly8x8_t b) {
+  return vext_p8(a, b, 7);
+}
+
+// CHECK: test_vext_s16
+// CHECK: vext.16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int16x4_t test_vext_s16(int16x4_t a, int16x4_t b) {
+  return vext_s16(a, b, 3);
+}
+
+// CHECK: test_vext_u16
+// CHECK: vext.16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint16x4_t test_vext_u16(uint16x4_t a, uint16x4_t b) {
+  return vext_u16(a, b, 3);
+}
+
+// CHECK: test_vext_p16
+// CHECK: vext.16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+poly16x4_t test_vext_p16(poly16x4_t a, poly16x4_t b) {
+  return vext_p16(a, b, 3);
+}
+
+// CHECK: test_vext_s32
+// CHECK: vext.32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int32x2_t test_vext_s32(int32x2_t a, int32x2_t b) {
+  return vext_s32(a, b, 1);
+}
+
+// CHECK: test_vext_u32
+// CHECK: vext.32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint32x2_t test_vext_u32(uint32x2_t a, uint32x2_t b) {
+  return vext_u32(a, b, 1);
+}
+
+// CHECK: test_vext_s64
+int64x1_t test_vext_s64(int64x1_t a, int64x1_t b) {
+  return vext_s64(a, b, 0);
+}
+
+// CHECK: test_vext_u64
+uint64x1_t test_vext_u64(uint64x1_t a, uint64x1_t b) {
+  return vext_u64(a, b, 0);
+}
+
+// CHECK: test_vext_f32
+// CHECK: vext.32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+float32x2_t test_vext_f32(float32x2_t a, float32x2_t b) {
+  return vext_f32(a, b, 1);
+}
+
+// CHECK: test_vextq_s8
+// CHECK: vext.8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int8x16_t test_vextq_s8(int8x16_t a, int8x16_t b) {
+  return vextq_s8(a, b, 15);
+}
+
+// CHECK: test_vextq_u8
+// CHECK: vext.8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint8x16_t test_vextq_u8(uint8x16_t a, uint8x16_t b) {
+  return vextq_u8(a, b, 15);
+}
+
+// CHECK: test_vextq_p8
+// CHECK: vext.8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+poly8x16_t test_vextq_p8(poly8x16_t a, poly8x16_t b) {
+  return vextq_p8(a, b, 15);
+}
+
+// CHECK: test_vextq_s16
+// CHECK: vext.16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int16x8_t test_vextq_s16(int16x8_t a, int16x8_t b) {
+  return vextq_s16(a, b, 7);
+}
+
+// CHECK: test_vextq_u16
+// CHECK: vext.16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint16x8_t test_vextq_u16(uint16x8_t a, uint16x8_t b) {
+  return vextq_u16(a, b, 7);
+}
+
+// CHECK: test_vextq_p16
+// CHECK: vext.16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+poly16x8_t test_vextq_p16(poly16x8_t a, poly16x8_t b) {
+  return vextq_p16(a, b, 7);
+}
+
+// CHECK: test_vextq_s32
+// CHECK: vext.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int32x4_t test_vextq_s32(int32x4_t a, int32x4_t b) {
+  return vextq_s32(a, b, 3);
+}
+
+// CHECK: test_vextq_u32
+// CHECK: vext.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint32x4_t test_vextq_u32(uint32x4_t a, uint32x4_t b) {
+  return vextq_u32(a, b, 3);
+}
+
+// CHECK: test_vextq_s64
+// CHECK: {{vmov|vdup}}
+int64x2_t test_vextq_s64(int64x2_t a, int64x2_t b) {
+  return vextq_s64(a, b, 1);
+}
+
+// CHECK: test_vextq_u64
+// CHECK: {{vmov|vdup}}
+uint64x2_t test_vextq_u64(uint64x2_t a, uint64x2_t b) {
+  return vextq_u64(a, b, 1);
+}
+
+// CHECK: test_vextq_f32
+// CHECK: vext.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+float32x4_t test_vextq_f32(float32x4_t a, float32x4_t b) {
+  return vextq_f32(a, b, 3);
+}
+
+
+// CHECK: test_vfma_f32
+// CHECK: vfma.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+float32x2_t test_vfma_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
+  return vfma_f32(a, b, c);
+}
+
+// CHECK: test_vfmaq_f32
+// CHECK: vfma.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+float32x4_t test_vfmaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
+  return vfmaq_f32(a, b, c);
+}
+
+
+// CHECK: test_vget_high_s8
+int8x8_t test_vget_high_s8(int8x16_t a) {
+  return vget_high_s8(a);
+}
+
+// CHECK: test_vget_high_s16
+int16x4_t test_vget_high_s16(int16x8_t a) {
+  return vget_high_s16(a);
+}
+
+// CHECK: test_vget_high_s32
+int32x2_t test_vget_high_s32(int32x4_t a) {
+  return vget_high_s32(a);
+}
+
+// CHECK: test_vget_high_s64
+int64x1_t test_vget_high_s64(int64x2_t a) {
+  return vget_high_s64(a);
+}
+
+// CHECK: test_vget_high_f16
+float16x4_t test_vget_high_f16(float16x8_t a) {
+  return vget_high_f16(a);
+}
+
+// CHECK: test_vget_high_f32
+float32x2_t test_vget_high_f32(float32x4_t a) {
+  return vget_high_f32(a);
+}
+
+// CHECK: test_vget_high_u8
+uint8x8_t test_vget_high_u8(uint8x16_t a) {
+  return vget_high_u8(a);
+}
+
+// CHECK: test_vget_high_u16
+uint16x4_t test_vget_high_u16(uint16x8_t a) {
+  return vget_high_u16(a);
+}
+
+// CHECK: test_vget_high_u32
+uint32x2_t test_vget_high_u32(uint32x4_t a) {
+  return vget_high_u32(a);
+}
+
+// CHECK: test_vget_high_u64
+uint64x1_t test_vget_high_u64(uint64x2_t a) {
+  return vget_high_u64(a);
+}
+
+// CHECK: test_vget_high_p8
+poly8x8_t test_vget_high_p8(poly8x16_t a) {
+  return vget_high_p8(a);
+}
+
+// CHECK: test_vget_high_p16
+poly16x4_t test_vget_high_p16(poly16x8_t a) {
+  return vget_high_p16(a);
+}
+
+
+// CHECK: test_vget_lane_u8
+// CHECK: vmov 
+uint8_t test_vget_lane_u8(uint8x8_t a) {
+  return vget_lane_u8(a, 7);
+}
+
+// CHECK: test_vget_lane_u16
+// CHECK: vmov 
+uint16_t test_vget_lane_u16(uint16x4_t a) {
+  return vget_lane_u16(a, 3);
+}
+
+// CHECK: test_vget_lane_u32
+// CHECK: vmov 
+uint32_t test_vget_lane_u32(uint32x2_t a) {
+  return vget_lane_u32(a, 1);
+}
+
+// CHECK: test_vget_lane_s8
+// CHECK: vmov 
+int8_t test_vget_lane_s8(int8x8_t a) {
+  return vget_lane_s8(a, 7);
+}
+
+// CHECK: test_vget_lane_s16
+// CHECK: vmov 
+int16_t test_vget_lane_s16(int16x4_t a) {
+  return vget_lane_s16(a, 3);
+}
+
+// CHECK: test_vget_lane_s32
+// CHECK: vmov 
+int32_t test_vget_lane_s32(int32x2_t a) {
+  return vget_lane_s32(a, 1);
+}
+
+// CHECK: test_vget_lane_p8
+// CHECK: vmov 
+poly8_t test_vget_lane_p8(poly8x8_t a) {
+  return vget_lane_p8(a, 7);
+}
+
+// CHECK: test_vget_lane_p16
+// CHECK: vmov 
+poly16_t test_vget_lane_p16(poly16x4_t a) {
+  return vget_lane_p16(a, 3);
+}
+
+// CHECK: test_vget_lane_f32
+// CHECK: vmov 
+float32_t test_vget_lane_f32(float32x2_t a) {
+  return vget_lane_f32(a, 1);
+}
+
+// CHECK: test_vgetq_lane_u8
+// CHECK: vmov 
+uint8_t test_vgetq_lane_u8(uint8x16_t a) {
+  return vgetq_lane_u8(a, 15);
+}
+
+// CHECK: test_vgetq_lane_u16
+// CHECK: vmov 
+uint16_t test_vgetq_lane_u16(uint16x8_t a) {
+  return vgetq_lane_u16(a, 7);
+}
+
+// CHECK: test_vgetq_lane_u32
+// CHECK: vmov 
+uint32_t test_vgetq_lane_u32(uint32x4_t a) {
+  return vgetq_lane_u32(a, 3);
+}
+
+// CHECK: test_vgetq_lane_s8
+// CHECK: vmov 
+int8_t test_vgetq_lane_s8(int8x16_t a) {
+  return vgetq_lane_s8(a, 15);
+}
+
+// CHECK: test_vgetq_lane_s16
+// CHECK: vmov 
+int16_t test_vgetq_lane_s16(int16x8_t a) {
+  return vgetq_lane_s16(a, 7);
+}
+
+// CHECK: test_vgetq_lane_s32
+// CHECK: vmov 
+int32_t test_vgetq_lane_s32(int32x4_t a) {
+  return vgetq_lane_s32(a, 3);
+}
+
+// CHECK: test_vgetq_lane_p8
+// CHECK: vmov 
+poly8_t test_vgetq_lane_p8(poly8x16_t a) {
+  return vgetq_lane_p8(a, 15);
+}
+
+// CHECK: test_vgetq_lane_p16
+// CHECK: vmov 
+poly16_t test_vgetq_lane_p16(poly16x8_t a) {
+  return vgetq_lane_p16(a, 7);
+}
+
+// CHECK: test_vgetq_lane_f32
+// CHECK: vmov 
+float32_t test_vgetq_lane_f32(float32x4_t a) {
+  return vgetq_lane_f32(a, 3);
+}
+
+// CHECK: test_vget_lane_s64
+// CHECK: vmov 
+int64_t test_vget_lane_s64(int64x1_t a) {
+  return vget_lane_s64(a, 0);
+}
+
+// CHECK: test_vget_lane_u64
+// CHECK: vmov 
+uint64_t test_vget_lane_u64(uint64x1_t a) {
+  return vget_lane_u64(a, 0);
+}
+
+// CHECK: test_vgetq_lane_s64
+// CHECK: vmov 
+int64_t test_vgetq_lane_s64(int64x2_t a) {
+  return vgetq_lane_s64(a, 1);
+}
+
+// CHECK: test_vgetq_lane_u64
+// CHECK: vmov 
+uint64_t test_vgetq_lane_u64(uint64x2_t a) {
+  return vgetq_lane_u64(a, 1);
+}
+
+
+// CHECK: test_vget_low_s8
+int8x8_t test_vget_low_s8(int8x16_t a) {
+  return vget_low_s8(a);
+}
+
+// CHECK: test_vget_low_s16
+int16x4_t test_vget_low_s16(int16x8_t a) {
+  return vget_low_s16(a);
+}
+
+// CHECK: test_vget_low_s32
+int32x2_t test_vget_low_s32(int32x4_t a) {
+  return vget_low_s32(a);
+}
+
+// CHECK: test_vget_low_s64
+int64x1_t test_vget_low_s64(int64x2_t a) {
+  return vget_low_s64(a);
+}
+
+// CHECK: test_vget_low_f16
+float16x4_t test_vget_low_f16(float16x8_t a) {
+  return vget_low_f16(a);
+}
+
+// CHECK: test_vget_low_f32
+float32x2_t test_vget_low_f32(float32x4_t a) {
+  return vget_low_f32(a);
+}
+
+// CHECK: test_vget_low_u8
+uint8x8_t test_vget_low_u8(uint8x16_t a) {
+  return vget_low_u8(a);
+}
+
+// CHECK: test_vget_low_u16
+uint16x4_t test_vget_low_u16(uint16x8_t a) {
+  return vget_low_u16(a);
+}
+
+// CHECK: test_vget_low_u32
+uint32x2_t test_vget_low_u32(uint32x4_t a) {
+  return vget_low_u32(a);
+}
+
+// CHECK: test_vget_low_u64
+uint64x1_t test_vget_low_u64(uint64x2_t a) {
+  return vget_low_u64(a);
+}
+
+// CHECK: test_vget_low_p8
+poly8x8_t test_vget_low_p8(poly8x16_t a) {
+  return vget_low_p8(a);
+}
+
+// CHECK: test_vget_low_p16
+poly16x4_t test_vget_low_p16(poly16x8_t a) {
+  return vget_low_p16(a);
+}
+
+
+// CHECK: test_vhadd_s8
+// CHECK: vhadd.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vhadd_s8(int8x8_t a, int8x8_t b) {
+  return vhadd_s8(a, b);
+}
+
+// CHECK: test_vhadd_s16
+// CHECK: vhadd.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vhadd_s16(int16x4_t a, int16x4_t b) {
+  return vhadd_s16(a, b);
+}
+
+// CHECK: test_vhadd_s32
+// CHECK: vhadd.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vhadd_s32(int32x2_t a, int32x2_t b) {
+  return vhadd_s32(a, b);
+}
+
+// CHECK: test_vhadd_u8
+// CHECK: vhadd.u8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vhadd_u8(uint8x8_t a, uint8x8_t b) {
+  return vhadd_u8(a, b);
+}
+
+// CHECK: test_vhadd_u16
+// CHECK: vhadd.u16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vhadd_u16(uint16x4_t a, uint16x4_t b) {
+  return vhadd_u16(a, b);
+}
+
+// CHECK: test_vhadd_u32
+// CHECK: vhadd.u32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vhadd_u32(uint32x2_t a, uint32x2_t b) {
+  return vhadd_u32(a, b);
+}
+
+// CHECK: test_vhaddq_s8
+// CHECK: vhadd.s8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int8x16_t test_vhaddq_s8(int8x16_t a, int8x16_t b) {
+  return vhaddq_s8(a, b);
+}
+
+// CHECK: test_vhaddq_s16
+// CHECK: vhadd.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vhaddq_s16(int16x8_t a, int16x8_t b) {
+  return vhaddq_s16(a, b);
+}
+
+// CHECK: test_vhaddq_s32
+// CHECK: vhadd.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vhaddq_s32(int32x4_t a, int32x4_t b) {
+  return vhaddq_s32(a, b);
+}
+
+// CHECK: test_vhaddq_u8
+// CHECK: vhadd.u8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vhaddq_u8(uint8x16_t a, uint8x16_t b) {
+  return vhaddq_u8(a, b);
+}
+
+// CHECK: test_vhaddq_u16
+// CHECK: vhadd.u16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vhaddq_u16(uint16x8_t a, uint16x8_t b) {
+  return vhaddq_u16(a, b);
+}
+
+// CHECK: test_vhaddq_u32
+// CHECK: vhadd.u32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vhaddq_u32(uint32x4_t a, uint32x4_t b) {
+  return vhaddq_u32(a, b);
+}
+
+
+// CHECK: test_vhsub_s8
+// CHECK: vhsub.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vhsub_s8(int8x8_t a, int8x8_t b) {
+  return vhsub_s8(a, b);
+}
+
+// CHECK: test_vhsub_s16
+// CHECK: vhsub.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vhsub_s16(int16x4_t a, int16x4_t b) {
+  return vhsub_s16(a, b);
+}
+
+// CHECK: test_vhsub_s32
+// CHECK: vhsub.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vhsub_s32(int32x2_t a, int32x2_t b) {
+  return vhsub_s32(a, b);
+}
+
+// CHECK: test_vhsub_u8
+// CHECK: vhsub.u8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vhsub_u8(uint8x8_t a, uint8x8_t b) {
+  return vhsub_u8(a, b);
+}
+
+// CHECK: test_vhsub_u16
+// CHECK: vhsub.u16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vhsub_u16(uint16x4_t a, uint16x4_t b) {
+  return vhsub_u16(a, b);
+}
+
+// CHECK: test_vhsub_u32
+// CHECK: vhsub.u32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vhsub_u32(uint32x2_t a, uint32x2_t b) {
+  return vhsub_u32(a, b);
+}
+
+// CHECK: test_vhsubq_s8
+// CHECK: vhsub.s8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int8x16_t test_vhsubq_s8(int8x16_t a, int8x16_t b) {
+  return vhsubq_s8(a, b);
+}
+
+// CHECK: test_vhsubq_s16
+// CHECK: vhsub.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vhsubq_s16(int16x8_t a, int16x8_t b) {
+  return vhsubq_s16(a, b);
+}
+
+// CHECK: test_vhsubq_s32
+// CHECK: vhsub.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vhsubq_s32(int32x4_t a, int32x4_t b) {
+  return vhsubq_s32(a, b);
+}
+
+// CHECK: test_vhsubq_u8
+// CHECK: vhsub.u8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vhsubq_u8(uint8x16_t a, uint8x16_t b) {
+  return vhsubq_u8(a, b);
+}
+
+// CHECK: test_vhsubq_u16
+// CHECK: vhsub.u16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vhsubq_u16(uint16x8_t a, uint16x8_t b) {
+  return vhsubq_u16(a, b);
+}
+
+// CHECK: test_vhsubq_u32
+// CHECK: vhsub.u32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vhsubq_u32(uint32x4_t a, uint32x4_t b) {
+  return vhsubq_u32(a, b);
+}
+
+
+// CHECK: test_vld1q_u8
+// CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+uint8x16_t test_vld1q_u8(uint8_t const * a) {
+  return vld1q_u8(a);
+}
+
+// CHECK: test_vld1q_u16
+// CHECK: vld1.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+uint16x8_t test_vld1q_u16(uint16_t const * a) {
+  return vld1q_u16(a);
+}
+
+// CHECK: test_vld1q_u32
+// CHECK: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+uint32x4_t test_vld1q_u32(uint32_t const * a) {
+  return vld1q_u32(a);
+}
+
+// CHECK: test_vld1q_u64
+// CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+uint64x2_t test_vld1q_u64(uint64_t const * a) {
+  return vld1q_u64(a);
+}
+
+// CHECK: test_vld1q_s8
+// CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+int8x16_t test_vld1q_s8(int8_t const * a) {
+  return vld1q_s8(a);
+}
+
+// CHECK: test_vld1q_s16
+// CHECK: vld1.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+int16x8_t test_vld1q_s16(int16_t const * a) {
+  return vld1q_s16(a);
+}
+
+// CHECK: test_vld1q_s32
+// CHECK: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+int32x4_t test_vld1q_s32(int32_t const * a) {
+  return vld1q_s32(a);
+}
+
+// CHECK: test_vld1q_s64
+// CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+int64x2_t test_vld1q_s64(int64_t const * a) {
+  return vld1q_s64(a);
+}
+
+// CHECK: test_vld1q_f16
+// CHECK: vld1.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+float16x8_t test_vld1q_f16(float16_t const * a) {
+  return vld1q_f16(a);
+}
+
+// CHECK: test_vld1q_f32
+// CHECK: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+float32x4_t test_vld1q_f32(float32_t const * a) {
+  return vld1q_f32(a);
+}
+
+// CHECK: test_vld1q_p8
+// CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+poly8x16_t test_vld1q_p8(poly8_t const * a) {
+  return vld1q_p8(a);
+}
+
+// CHECK: test_vld1q_p16
+// CHECK: vld1.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+poly16x8_t test_vld1q_p16(poly16_t const * a) {
+  return vld1q_p16(a);
+}
+
+// CHECK: test_vld1_u8
+// CHECK: vld1.8 {d{{[0-9]+}}}, [r{{[0-9]+}}]
+uint8x8_t test_vld1_u8(uint8_t const * a) {
+  return vld1_u8(a);
+}
+
+// CHECK: test_vld1_u16
+// CHECK: vld1.16 {d{{[0-9]+}}}, [r{{[0-9]+}}]
+uint16x4_t test_vld1_u16(uint16_t const * a) {
+  return vld1_u16(a);
+}
+
+// CHECK: test_vld1_u32
+// CHECK: vld1.32 {d{{[0-9]+}}}, [r{{[0-9]+}}]
+uint32x2_t test_vld1_u32(uint32_t const * a) {
+  return vld1_u32(a);
+}
+
+// CHECK: test_vld1_u64
+// CHECK: vld1.64 {d{{[0-9]+}}}, [r{{[0-9]+}}]
+uint64x1_t test_vld1_u64(uint64_t const * a) {
+  return vld1_u64(a);
+}
+
+// CHECK: test_vld1_s8
+// CHECK: vld1.8 {d{{[0-9]+}}}, [r{{[0-9]+}}]
+int8x8_t test_vld1_s8(int8_t const * a) {
+  return vld1_s8(a);
+}
+
+// CHECK: test_vld1_s16
+// CHECK: vld1.16 {d{{[0-9]+}}}, [r{{[0-9]+}}]
+int16x4_t test_vld1_s16(int16_t const * a) {
+  return vld1_s16(a);
+}
+
+// CHECK: test_vld1_s32
+// CHECK: vld1.32 {d{{[0-9]+}}}, [r{{[0-9]+}}]
+int32x2_t test_vld1_s32(int32_t const * a) {
+  return vld1_s32(a);
+}
+
+// CHECK: test_vld1_s64
+// CHECK: vld1.64 {d{{[0-9]+}}}, [r{{[0-9]+}}]
+int64x1_t test_vld1_s64(int64_t const * a) {
+  return vld1_s64(a);
+}
+
+// CHECK: test_vld1_f16
+// CHECK: vld1.16 {d{{[0-9]+}}}, [r{{[0-9]+}}]
+float16x4_t test_vld1_f16(float16_t const * a) {
+  return vld1_f16(a);
+}
+
+// CHECK: test_vld1_f32
+// CHECK: vld1.32 {d{{[0-9]+}}}, [r{{[0-9]+}}]
+float32x2_t test_vld1_f32(float32_t const * a) {
+  return vld1_f32(a);
+}
+
+// CHECK: test_vld1_p8
+// CHECK: vld1.8 {d{{[0-9]+}}}, [r{{[0-9]+}}]
+poly8x8_t test_vld1_p8(poly8_t const * a) {
+  return vld1_p8(a);
+}
+
+// CHECK: test_vld1_p16
+// CHECK: vld1.16 {d{{[0-9]+}}}, [r{{[0-9]+}}]
+poly16x4_t test_vld1_p16(poly16_t const * a) {
+  return vld1_p16(a);
+}
+
+
+// CHECK: test_vld1q_dup_u8
+// CHECK: vld1.8 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+uint8x16_t test_vld1q_dup_u8(uint8_t const * a) {
+  return vld1q_dup_u8(a);
+}
+
+// CHECK: test_vld1q_dup_u16
+// CHECK: vld1.16 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}:16]
+uint16x8_t test_vld1q_dup_u16(uint16_t const * a) {
+  return vld1q_dup_u16(a);
+}
+
+// CHECK: test_vld1q_dup_u32
+// CHECK: vld1.32 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}:32]
+uint32x4_t test_vld1q_dup_u32(uint32_t const * a) {
+  return vld1q_dup_u32(a);
+}
+
+// CHECK: test_vld1q_dup_u64
+// CHECK: {{ldr|vldr|vmov}}
+uint64x2_t test_vld1q_dup_u64(uint64_t const * a) {
+  return vld1q_dup_u64(a);
+}
+
+// CHECK: test_vld1q_dup_s8
+// CHECK: vld1.8 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+int8x16_t test_vld1q_dup_s8(int8_t const * a) {
+  return vld1q_dup_s8(a);
+}
+
+// CHECK: test_vld1q_dup_s16
+// CHECK: vld1.16 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}:16]
+int16x8_t test_vld1q_dup_s16(int16_t const * a) {
+  return vld1q_dup_s16(a);
+}
+
+// CHECK: test_vld1q_dup_s32
+// CHECK: vld1.32 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}:32]
+int32x4_t test_vld1q_dup_s32(int32_t const * a) {
+  return vld1q_dup_s32(a);
+}
+
+// CHECK: test_vld1q_dup_s64
+// CHECK: {{ldr|vldr|vmov}}
+int64x2_t test_vld1q_dup_s64(int64_t const * a) {
+  return vld1q_dup_s64(a);
+}
+
+// CHECK: test_vld1q_dup_f16
+// CHECK: vld1.16 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}:16]
+float16x8_t test_vld1q_dup_f16(float16_t const * a) {
+  return vld1q_dup_f16(a);
+}
+
+// CHECK: test_vld1q_dup_f32
+// CHECK: vld1.32 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}:32]
+float32x4_t test_vld1q_dup_f32(float32_t const * a) {
+  return vld1q_dup_f32(a);
+}
+
+// CHECK: test_vld1q_dup_p8
+// CHECK: vld1.8 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+poly8x16_t test_vld1q_dup_p8(poly8_t const * a) {
+  return vld1q_dup_p8(a);
+}
+
+// CHECK: test_vld1q_dup_p16
+// CHECK: vld1.16 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}:16]
+poly16x8_t test_vld1q_dup_p16(poly16_t const * a) {
+  return vld1q_dup_p16(a);
+}
+
+// CHECK: test_vld1_dup_u8
+// CHECK: vld1.8 {d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+uint8x8_t test_vld1_dup_u8(uint8_t const * a) {
+  return vld1_dup_u8(a);
+}
+
+// CHECK: test_vld1_dup_u16
+// CHECK: vld1.16 {d{{[0-9]+}}[]}, [r{{[0-9]+}}:16]
+uint16x4_t test_vld1_dup_u16(uint16_t const * a) {
+  return vld1_dup_u16(a);
+}
+
+// CHECK: test_vld1_dup_u32
+// CHECK: vld1.32 {d{{[0-9]+}}[]}, [r{{[0-9]+}}:32]
+uint32x2_t test_vld1_dup_u32(uint32_t const * a) {
+  return vld1_dup_u32(a);
+}
+
+// CHECK: test_vld1_dup_u64
+// CHECK: {{ldr|vldr|vmov}}
+uint64x1_t test_vld1_dup_u64(uint64_t const * a) {
+  return vld1_dup_u64(a);
+}
+
+// CHECK: test_vld1_dup_s8
+// CHECK: vld1.8 {d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+int8x8_t test_vld1_dup_s8(int8_t const * a) {
+  return vld1_dup_s8(a);
+}
+
+// CHECK: test_vld1_dup_s16
+// CHECK: vld1.16 {d{{[0-9]+}}[]}, [r{{[0-9]+}}:16]
+int16x4_t test_vld1_dup_s16(int16_t const * a) {
+  return vld1_dup_s16(a);
+}
+
+// CHECK: test_vld1_dup_s32
+// CHECK: vld1.32 {d{{[0-9]+}}[]}, [r{{[0-9]+}}:32]
+int32x2_t test_vld1_dup_s32(int32_t const * a) {
+  return vld1_dup_s32(a);
+}
+
+// CHECK: test_vld1_dup_s64
+// CHECK: {{ldr|vldr|vmov}}
+int64x1_t test_vld1_dup_s64(int64_t const * a) {
+  return vld1_dup_s64(a);
+}
+
+// CHECK: test_vld1_dup_f16
+// CHECK: vld1.16 {d{{[0-9]+}}[]}, [r{{[0-9]+}}:16]
+float16x4_t test_vld1_dup_f16(float16_t const * a) {
+  return vld1_dup_f16(a);
+}
+
+// CHECK: test_vld1_dup_f32
+// CHECK: vld1.32 {d{{[0-9]+}}[]}, [r{{[0-9]+}}:32]
+float32x2_t test_vld1_dup_f32(float32_t const * a) {
+  return vld1_dup_f32(a);
+}
+
+// CHECK: test_vld1_dup_p8
+// CHECK: vld1.8 {d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+poly8x8_t test_vld1_dup_p8(poly8_t const * a) {
+  return vld1_dup_p8(a);
+}
+
+// CHECK: test_vld1_dup_p16
+// CHECK: vld1.16 {d{{[0-9]+}}[]}, [r{{[0-9]+}}:16]
+poly16x4_t test_vld1_dup_p16(poly16_t const * a) {
+  return vld1_dup_p16(a);
+}
+
+
+// CHECK: test_vld1q_lane_u8
+// CHECK: vld1.8 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+uint8x16_t test_vld1q_lane_u8(uint8_t const * a, uint8x16_t b) {
+  return vld1q_lane_u8(a, b, 15);
+}
+
+// CHECK: test_vld1q_lane_u16
+// CHECK: vld1.16 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:16]
+uint16x8_t test_vld1q_lane_u16(uint16_t const * a, uint16x8_t b) {
+  return vld1q_lane_u16(a, b, 7);
+}
+
+// CHECK: test_vld1q_lane_u32
+// CHECK: vld1.32 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:32]
+uint32x4_t test_vld1q_lane_u32(uint32_t const * a, uint32x4_t b) {
+  return vld1q_lane_u32(a, b, 3);
+}
+
+// CHECK: test_vld1q_lane_u64
+// CHECK: {{ldr|vldr|vmov}}
+uint64x2_t test_vld1q_lane_u64(uint64_t const * a, uint64x2_t b) {
+  return vld1q_lane_u64(a, b, 1);
+}
+
+// CHECK: test_vld1q_lane_s8
+// CHECK: vld1.8 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+int8x16_t test_vld1q_lane_s8(int8_t const * a, int8x16_t b) {
+  return vld1q_lane_s8(a, b, 15);
+}
+
+// CHECK: test_vld1q_lane_s16
+// CHECK: vld1.16 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:16]
+int16x8_t test_vld1q_lane_s16(int16_t const * a, int16x8_t b) {
+  return vld1q_lane_s16(a, b, 7);
+}
+
+// CHECK: test_vld1q_lane_s32
+// CHECK: vld1.32 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:32]
+int32x4_t test_vld1q_lane_s32(int32_t const * a, int32x4_t b) {
+  return vld1q_lane_s32(a, b, 3);
+}
+
+// CHECK: test_vld1q_lane_s64
+// CHECK: {{ldr|vldr|vmov}}
+int64x2_t test_vld1q_lane_s64(int64_t const * a, int64x2_t b) {
+  return vld1q_lane_s64(a, b, 1);
+}
+
+// CHECK: test_vld1q_lane_f16
+// CHECK: vld1.16 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:16]
+float16x8_t test_vld1q_lane_f16(float16_t const * a, float16x8_t b) {
+  return vld1q_lane_f16(a, b, 7);
+}
+
+// CHECK: test_vld1q_lane_f32
+// CHECK: vld1.32 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:32]
+float32x4_t test_vld1q_lane_f32(float32_t const * a, float32x4_t b) {
+  return vld1q_lane_f32(a, b, 3);
+}
+
+// CHECK: test_vld1q_lane_p8
+// CHECK: vld1.8 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+poly8x16_t test_vld1q_lane_p8(poly8_t const * a, poly8x16_t b) {
+  return vld1q_lane_p8(a, b, 15);
+}
+
+// CHECK: test_vld1q_lane_p16
+// CHECK: vld1.16 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:16]
+poly16x8_t test_vld1q_lane_p16(poly16_t const * a, poly16x8_t b) {
+  return vld1q_lane_p16(a, b, 7);
+}
+
+// CHECK: test_vld1_lane_u8
+// CHECK: vld1.8 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+uint8x8_t test_vld1_lane_u8(uint8_t const * a, uint8x8_t b) {
+  return vld1_lane_u8(a, b, 7);
+}
+
+// CHECK: test_vld1_lane_u16
+// CHECK: vld1.16 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:16]
+uint16x4_t test_vld1_lane_u16(uint16_t const * a, uint16x4_t b) {
+  return vld1_lane_u16(a, b, 3);
+}
+
+// CHECK: test_vld1_lane_u32
+// CHECK: vld1.32 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:32]
+uint32x2_t test_vld1_lane_u32(uint32_t const * a, uint32x2_t b) {
+  return vld1_lane_u32(a, b, 1);
+}
+
+// CHECK: test_vld1_lane_u64
+// CHECK: {{ldr|vldr|vmov}}
+uint64x1_t test_vld1_lane_u64(uint64_t const * a, uint64x1_t b) {
+  return vld1_lane_u64(a, b, 0);
+}
+
+// CHECK: test_vld1_lane_s8
+// CHECK: vld1.8 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+int8x8_t test_vld1_lane_s8(int8_t const * a, int8x8_t b) {
+  return vld1_lane_s8(a, b, 7);
+}
+
+// CHECK: test_vld1_lane_s16
+// CHECK: vld1.16 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:16]
+int16x4_t test_vld1_lane_s16(int16_t const * a, int16x4_t b) {
+  return vld1_lane_s16(a, b, 3);
+}
+
+// CHECK: test_vld1_lane_s32
+// CHECK: vld1.32 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:32]
+int32x2_t test_vld1_lane_s32(int32_t const * a, int32x2_t b) {
+  return vld1_lane_s32(a, b, 1);
+}
+
+// CHECK: test_vld1_lane_s64
+// CHECK: {{ldr|vldr|vmov}}
+int64x1_t test_vld1_lane_s64(int64_t const * a, int64x1_t b) {
+  return vld1_lane_s64(a, b, 0);
+}
+
+// CHECK: test_vld1_lane_f16
+// CHECK: vld1.16 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:16]
+float16x4_t test_vld1_lane_f16(float16_t const * a, float16x4_t b) {
+  return vld1_lane_f16(a, b, 3);
+}
+
+// CHECK: test_vld1_lane_f32
+// CHECK: vld1.32 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:32]
+float32x2_t test_vld1_lane_f32(float32_t const * a, float32x2_t b) {
+  return vld1_lane_f32(a, b, 1);
+}
+
+// CHECK: test_vld1_lane_p8
+// CHECK: vld1.8 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+poly8x8_t test_vld1_lane_p8(poly8_t const * a, poly8x8_t b) {
+  return vld1_lane_p8(a, b, 7);
+}
+
+// CHECK: test_vld1_lane_p16
+// CHECK: vld1.16 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:16]
+poly16x4_t test_vld1_lane_p16(poly16_t const * a, poly16x4_t b) {
+  return vld1_lane_p16(a, b, 3);
+}
+
+
+// CHECK: test_vld2q_u8
+// CHECK: vld2.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+uint8x16x2_t test_vld2q_u8(uint8_t const * a) {
+  return vld2q_u8(a);
+}
+
+// CHECK: test_vld2q_u16
+// CHECK: vld2.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+uint16x8x2_t test_vld2q_u16(uint16_t const * a) {
+  return vld2q_u16(a);
+}
+
+// CHECK: test_vld2q_u32
+// CHECK: vld2.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+uint32x4x2_t test_vld2q_u32(uint32_t const * a) {
+  return vld2q_u32(a);
+}
+
+// CHECK: test_vld2q_s8
+// CHECK: vld2.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+int8x16x2_t test_vld2q_s8(int8_t const * a) {
+  return vld2q_s8(a);
+}
+
+// CHECK: test_vld2q_s16
+// CHECK: vld2.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+int16x8x2_t test_vld2q_s16(int16_t const * a) {
+  return vld2q_s16(a);
+}
+
+// CHECK: test_vld2q_s32
+// CHECK: vld2.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+int32x4x2_t test_vld2q_s32(int32_t const * a) {
+  return vld2q_s32(a);
+}
+
+// CHECK: test_vld2q_f16
+// CHECK: vld2.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+float16x8x2_t test_vld2q_f16(float16_t const * a) {
+  return vld2q_f16(a);
+}
+
+// CHECK: test_vld2q_f32
+// CHECK: vld2.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+float32x4x2_t test_vld2q_f32(float32_t const * a) {
+  return vld2q_f32(a);
+}
+
+// CHECK: test_vld2q_p8
+// CHECK: vld2.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+poly8x16x2_t test_vld2q_p8(poly8_t const * a) {
+  return vld2q_p8(a);
+}
+
+// CHECK: test_vld2q_p16
+// CHECK: vld2.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+poly16x8x2_t test_vld2q_p16(poly16_t const * a) {
+  return vld2q_p16(a);
+}
+
+// CHECK: test_vld2_u8
+// CHECK: vld2.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+uint8x8x2_t test_vld2_u8(uint8_t const * a) {
+  return vld2_u8(a);
+}
+
+// CHECK: test_vld2_u16
+// CHECK: vld2.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+uint16x4x2_t test_vld2_u16(uint16_t const * a) {
+  return vld2_u16(a);
+}
+
+// CHECK: test_vld2_u32
+// CHECK: vld2.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+uint32x2x2_t test_vld2_u32(uint32_t const * a) {
+  return vld2_u32(a);
+}
+
+// CHECK: test_vld2_u64
+// CHECK: vld1.64
+uint64x1x2_t test_vld2_u64(uint64_t const * a) {
+  return vld2_u64(a);
+}
+
+// CHECK: test_vld2_s8
+// CHECK: vld2.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+int8x8x2_t test_vld2_s8(int8_t const * a) {
+  return vld2_s8(a);
+}
+
+// CHECK: test_vld2_s16
+// CHECK: vld2.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+int16x4x2_t test_vld2_s16(int16_t const * a) {
+  return vld2_s16(a);
+}
+
+// CHECK: test_vld2_s32
+// CHECK: vld2.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+int32x2x2_t test_vld2_s32(int32_t const * a) {
+  return vld2_s32(a);
+}
+
+// CHECK: test_vld2_s64
+// CHECK: vld1.64
+int64x1x2_t test_vld2_s64(int64_t const * a) {
+  return vld2_s64(a);
+}
+
+// CHECK: test_vld2_f16
+// CHECK: vld2.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+float16x4x2_t test_vld2_f16(float16_t const * a) {
+  return vld2_f16(a);
+}
+
+// CHECK: test_vld2_f32
+// CHECK: vld2.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+float32x2x2_t test_vld2_f32(float32_t const * a) {
+  return vld2_f32(a);
+}
+
+// CHECK: test_vld2_p8
+// CHECK: vld2.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+poly8x8x2_t test_vld2_p8(poly8_t const * a) {
+  return vld2_p8(a);
+}
+
+// CHECK: test_vld2_p16
+// CHECK: vld2.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+poly16x4x2_t test_vld2_p16(poly16_t const * a) {
+  return vld2_p16(a);
+}
+
+
+// CHECK: test_vld2_dup_u8
+// CHECK: vld2.8 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+uint8x8x2_t test_vld2_dup_u8(uint8_t const * a) {
+  return vld2_dup_u8(a);
+}
+
+// CHECK: test_vld2_dup_u16
+// CHECK: vld2.16 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+uint16x4x2_t test_vld2_dup_u16(uint16_t const * a) {
+  return vld2_dup_u16(a);
+}
+
+// CHECK: test_vld2_dup_u32
+// CHECK: vld2.32 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+uint32x2x2_t test_vld2_dup_u32(uint32_t const * a) {
+  return vld2_dup_u32(a);
+}
+
+// CHECK: test_vld2_dup_u64
+// CHECK: vld1.64
+uint64x1x2_t test_vld2_dup_u64(uint64_t const * a) {
+  return vld2_dup_u64(a);
+}
+
+// CHECK: test_vld2_dup_s8
+// CHECK: vld2.8 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+int8x8x2_t test_vld2_dup_s8(int8_t const * a) {
+  return vld2_dup_s8(a);
+}
+
+// CHECK: test_vld2_dup_s16
+// CHECK: vld2.16 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+int16x4x2_t test_vld2_dup_s16(int16_t const * a) {
+  return vld2_dup_s16(a);
+}
+
+// CHECK: test_vld2_dup_s32
+// CHECK: vld2.32 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+int32x2x2_t test_vld2_dup_s32(int32_t const * a) {
+  return vld2_dup_s32(a);
+}
+
+// CHECK: test_vld2_dup_s64
+// CHECK: vld1.64
+int64x1x2_t test_vld2_dup_s64(int64_t const * a) {
+  return vld2_dup_s64(a);
+}
+
+// CHECK: test_vld2_dup_f16
+// CHECK: vld2.16 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+float16x4x2_t test_vld2_dup_f16(float16_t const * a) {
+  return vld2_dup_f16(a);
+}
+
+// CHECK: test_vld2_dup_f32
+// CHECK: vld2.32 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+float32x2x2_t test_vld2_dup_f32(float32_t const * a) {
+  return vld2_dup_f32(a);
+}
+
+// CHECK: test_vld2_dup_p8
+// CHECK: vld2.8 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+poly8x8x2_t test_vld2_dup_p8(poly8_t const * a) {
+  return vld2_dup_p8(a);
+}
+
+// CHECK: test_vld2_dup_p16
+// CHECK: vld2.16 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+poly16x4x2_t test_vld2_dup_p16(poly16_t const * a) {
+  return vld2_dup_p16(a);
+}
+
+
+// CHECK: test_vld2q_lane_u16
+// CHECK: vld2.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+uint16x8x2_t test_vld2q_lane_u16(uint16_t const * a, uint16x8x2_t b) {
+  return vld2q_lane_u16(a, b, 7);
+}
+
+// CHECK: test_vld2q_lane_u32
+// CHECK: vld2.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+uint32x4x2_t test_vld2q_lane_u32(uint32_t const * a, uint32x4x2_t b) {
+  return vld2q_lane_u32(a, b, 3);
+}
+
+// CHECK: test_vld2q_lane_s16
+// CHECK: vld2.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+int16x8x2_t test_vld2q_lane_s16(int16_t const * a, int16x8x2_t b) {
+  return vld2q_lane_s16(a, b, 7);
+}
+
+// CHECK: test_vld2q_lane_s32
+// CHECK: vld2.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+int32x4x2_t test_vld2q_lane_s32(int32_t const * a, int32x4x2_t b) {
+  return vld2q_lane_s32(a, b, 3);
+}
+
+// CHECK: test_vld2q_lane_f16
+// CHECK: vld2.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+float16x8x2_t test_vld2q_lane_f16(float16_t const * a, float16x8x2_t b) {
+  return vld2q_lane_f16(a, b, 7);
+}
+
+// CHECK: test_vld2q_lane_f32
+// CHECK: vld2.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+float32x4x2_t test_vld2q_lane_f32(float32_t const * a, float32x4x2_t b) {
+  return vld2q_lane_f32(a, b, 3);
+}
+
+// CHECK: test_vld2q_lane_p16
+// CHECK: vld2.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+poly16x8x2_t test_vld2q_lane_p16(poly16_t const * a, poly16x8x2_t b) {
+  return vld2q_lane_p16(a, b, 7);
+}
+
+// CHECK: test_vld2_lane_u8
+// CHECK: vld2.8 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+uint8x8x2_t test_vld2_lane_u8(uint8_t const * a, uint8x8x2_t b) {
+  return vld2_lane_u8(a, b, 7);
+}
+
+// CHECK: test_vld2_lane_u16
+// CHECK: vld2.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+uint16x4x2_t test_vld2_lane_u16(uint16_t const * a, uint16x4x2_t b) {
+  return vld2_lane_u16(a, b, 3);
+}
+
+// CHECK: test_vld2_lane_u32
+// CHECK: vld2.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+uint32x2x2_t test_vld2_lane_u32(uint32_t const * a, uint32x2x2_t b) {
+  return vld2_lane_u32(a, b, 1);
+}
+
+// CHECK: test_vld2_lane_s8
+// CHECK: vld2.8 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+int8x8x2_t test_vld2_lane_s8(int8_t const * a, int8x8x2_t b) {
+  return vld2_lane_s8(a, b, 7);
+}
+
+// CHECK: test_vld2_lane_s16
+// CHECK: vld2.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+int16x4x2_t test_vld2_lane_s16(int16_t const * a, int16x4x2_t b) {
+  return vld2_lane_s16(a, b, 3);
+}
+
+// CHECK: test_vld2_lane_s32
+// CHECK: vld2.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+int32x2x2_t test_vld2_lane_s32(int32_t const * a, int32x2x2_t b) {
+  return vld2_lane_s32(a, b, 1);
+}
+
+// CHECK: test_vld2_lane_f16
+// CHECK: vld2.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+float16x4x2_t test_vld2_lane_f16(float16_t const * a, float16x4x2_t b) {
+  return vld2_lane_f16(a, b, 3);
+}
+
+// CHECK: test_vld2_lane_f32
+// CHECK: vld2.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+float32x2x2_t test_vld2_lane_f32(float32_t const * a, float32x2x2_t b) {
+  return vld2_lane_f32(a, b, 1);
+}
+
+// CHECK: test_vld2_lane_p8
+// CHECK: vld2.8 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+poly8x8x2_t test_vld2_lane_p8(poly8_t const * a, poly8x8x2_t b) {
+  return vld2_lane_p8(a, b, 7);
+}
+
+// CHECK: test_vld2_lane_p16
+// CHECK: vld2.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+poly16x4x2_t test_vld2_lane_p16(poly16_t const * a, poly16x4x2_t b) {
+  return vld2_lane_p16(a, b, 3);
+}
+
+
+// CHECK: test_vld3q_u8
+// CHECK: vld3.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+uint8x16x3_t test_vld3q_u8(uint8_t const * a) {
+  return vld3q_u8(a);
+}
+
+// CHECK: test_vld3q_u16
+// CHECK: vld3.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+uint16x8x3_t test_vld3q_u16(uint16_t const * a) {
+  return vld3q_u16(a);
+}
+
+// CHECK: test_vld3q_u32
+// CHECK: vld3.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+uint32x4x3_t test_vld3q_u32(uint32_t const * a) {
+  return vld3q_u32(a);
+}
+
+// CHECK: test_vld3q_s8
+// CHECK: vld3.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+int8x16x3_t test_vld3q_s8(int8_t const * a) {
+  return vld3q_s8(a);
+}
+
+// CHECK: test_vld3q_s16
+// CHECK: vld3.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+int16x8x3_t test_vld3q_s16(int16_t const * a) {
+  return vld3q_s16(a);
+}
+
+// CHECK: test_vld3q_s32
+// CHECK: vld3.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+int32x4x3_t test_vld3q_s32(int32_t const * a) {
+  return vld3q_s32(a);
+}
+
+// CHECK: test_vld3q_f16
+// CHECK: vld3.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+float16x8x3_t test_vld3q_f16(float16_t const * a) {
+  return vld3q_f16(a);
+}
+
+// CHECK: test_vld3q_f32
+// CHECK: vld3.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+float32x4x3_t test_vld3q_f32(float32_t const * a) {
+  return vld3q_f32(a);
+}
+
+// CHECK: test_vld3q_p8
+// CHECK: vld3.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+poly8x16x3_t test_vld3q_p8(poly8_t const * a) {
+  return vld3q_p8(a);
+}
+
+// CHECK: test_vld3q_p16
+// CHECK: vld3.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+poly16x8x3_t test_vld3q_p16(poly16_t const * a) {
+  return vld3q_p16(a);
+}
+
+// CHECK: test_vld3_u8
+// CHECK: vld3.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+uint8x8x3_t test_vld3_u8(uint8_t const * a) {
+  return vld3_u8(a);
+}
+
+// CHECK: test_vld3_u16
+// CHECK: vld3.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+uint16x4x3_t test_vld3_u16(uint16_t const * a) {
+  return vld3_u16(a);
+}
+
+// CHECK: test_vld3_u32
+// CHECK: vld3.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+uint32x2x3_t test_vld3_u32(uint32_t const * a) {
+  return vld3_u32(a);
+}
+
+// CHECK: test_vld3_u64
+// CHECK: vld1.64
+uint64x1x3_t test_vld3_u64(uint64_t const * a) {
+  return vld3_u64(a);
+}
+
+// CHECK: test_vld3_s8
+// CHECK: vld3.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+int8x8x3_t test_vld3_s8(int8_t const * a) {
+  return vld3_s8(a);
+}
+
+// CHECK: test_vld3_s16
+// CHECK: vld3.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+int16x4x3_t test_vld3_s16(int16_t const * a) {
+  return vld3_s16(a);
+}
+
+// CHECK: test_vld3_s32
+// CHECK: vld3.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+int32x2x3_t test_vld3_s32(int32_t const * a) {
+  return vld3_s32(a);
+}
+
+// CHECK: test_vld3_s64
+// CHECK: vld1.64
+int64x1x3_t test_vld3_s64(int64_t const * a) {
+  return vld3_s64(a);
+}
+
+// CHECK: test_vld3_f16
+// CHECK: vld3.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+float16x4x3_t test_vld3_f16(float16_t const * a) {
+  return vld3_f16(a);
+}
+
+// CHECK: test_vld3_f32
+// CHECK: vld3.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+float32x2x3_t test_vld3_f32(float32_t const * a) {
+  return vld3_f32(a);
+}
+
+// CHECK: test_vld3_p8
+// CHECK: vld3.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+poly8x8x3_t test_vld3_p8(poly8_t const * a) {
+  return vld3_p8(a);
+}
+
+// CHECK: test_vld3_p16
+// CHECK: vld3.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+poly16x4x3_t test_vld3_p16(poly16_t const * a) {
+  return vld3_p16(a);
+}
+
+
+// CHECK: test_vld3_dup_u8
+// CHECK: vld3.8 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+uint8x8x3_t test_vld3_dup_u8(uint8_t const * a) {
+  return vld3_dup_u8(a);
+}
+
+// CHECK: test_vld3_dup_u16
+// CHECK: vld3.16 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+uint16x4x3_t test_vld3_dup_u16(uint16_t const * a) {
+  return vld3_dup_u16(a);
+}
+
+// CHECK: test_vld3_dup_u32
+// CHECK: vld3.32 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+uint32x2x3_t test_vld3_dup_u32(uint32_t const * a) {
+  return vld3_dup_u32(a);
+}
+
+// CHECK: test_vld3_dup_u64
+// CHECK: vld1.64
+uint64x1x3_t test_vld3_dup_u64(uint64_t const * a) {
+  return vld3_dup_u64(a);
+}
+
+// CHECK: test_vld3_dup_s8
+// CHECK: vld3.8 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+int8x8x3_t test_vld3_dup_s8(int8_t const * a) {
+  return vld3_dup_s8(a);
+}
+
+// CHECK: test_vld3_dup_s16
+// CHECK: vld3.16 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+int16x4x3_t test_vld3_dup_s16(int16_t const * a) {
+  return vld3_dup_s16(a);
+}
+
+// CHECK: test_vld3_dup_s32
+// CHECK: vld3.32 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+int32x2x3_t test_vld3_dup_s32(int32_t const * a) {
+  return vld3_dup_s32(a);
+}
+
+// CHECK: test_vld3_dup_s64
+// CHECK: vld1.64
+int64x1x3_t test_vld3_dup_s64(int64_t const * a) {
+  return vld3_dup_s64(a);
+}
+
+// CHECK: test_vld3_dup_f16
+// CHECK: vld3.16 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+float16x4x3_t test_vld3_dup_f16(float16_t const * a) {
+  return vld3_dup_f16(a);
+}
+
+// CHECK: test_vld3_dup_f32
+// CHECK: vld3.32 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+float32x2x3_t test_vld3_dup_f32(float32_t const * a) {
+  return vld3_dup_f32(a);
+}
+
+// CHECK: test_vld3_dup_p8
+// CHECK: vld3.8 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+poly8x8x3_t test_vld3_dup_p8(poly8_t const * a) {
+  return vld3_dup_p8(a);
+}
+
+// CHECK: test_vld3_dup_p16
+// CHECK: vld3.16 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+poly16x4x3_t test_vld3_dup_p16(poly16_t const * a) {
+  return vld3_dup_p16(a);
+}
+
+
+// CHECK: test_vld3q_lane_u16
+// CHECK: vld3.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+uint16x8x3_t test_vld3q_lane_u16(uint16_t const * a, uint16x8x3_t b) {
+  return vld3q_lane_u16(a, b, 7);
+}
+
+// CHECK: test_vld3q_lane_u32
+// CHECK: vld3.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+uint32x4x3_t test_vld3q_lane_u32(uint32_t const * a, uint32x4x3_t b) {
+  return vld3q_lane_u32(a, b, 3);
+}
+
+// CHECK: test_vld3q_lane_s16
+// CHECK: vld3.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+int16x8x3_t test_vld3q_lane_s16(int16_t const * a, int16x8x3_t b) {
+  return vld3q_lane_s16(a, b, 7);
+}
+
+// CHECK: test_vld3q_lane_s32
+// CHECK: vld3.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+int32x4x3_t test_vld3q_lane_s32(int32_t const * a, int32x4x3_t b) {
+  return vld3q_lane_s32(a, b, 3);
+}
+
+// CHECK: test_vld3q_lane_f16
+// CHECK: vld3.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+float16x8x3_t test_vld3q_lane_f16(float16_t const * a, float16x8x3_t b) {
+  return vld3q_lane_f16(a, b, 7);
+}
+
+// CHECK: test_vld3q_lane_f32
+// CHECK: vld3.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+float32x4x3_t test_vld3q_lane_f32(float32_t const * a, float32x4x3_t b) {
+  return vld3q_lane_f32(a, b, 3);
+}
+
+// CHECK: test_vld3q_lane_p16
+// CHECK: vld3.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+poly16x8x3_t test_vld3q_lane_p16(poly16_t const * a, poly16x8x3_t b) {
+  return vld3q_lane_p16(a, b, 7);
+}
+
+// CHECK: test_vld3_lane_u8
+// CHECK: vld3.8 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+uint8x8x3_t test_vld3_lane_u8(uint8_t const * a, uint8x8x3_t b) {
+  return vld3_lane_u8(a, b, 7);
+}
+
+// CHECK: test_vld3_lane_u16
+// CHECK: vld3.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+uint16x4x3_t test_vld3_lane_u16(uint16_t const * a, uint16x4x3_t b) {
+  return vld3_lane_u16(a, b, 3);
+}
+
+// CHECK: test_vld3_lane_u32
+// CHECK: vld3.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+uint32x2x3_t test_vld3_lane_u32(uint32_t const * a, uint32x2x3_t b) {
+  return vld3_lane_u32(a, b, 1);
+}
+
+// CHECK: test_vld3_lane_s8
+// CHECK: vld3.8 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+int8x8x3_t test_vld3_lane_s8(int8_t const * a, int8x8x3_t b) {
+  return vld3_lane_s8(a, b, 7);
+}
+
+// CHECK: test_vld3_lane_s16
+// CHECK: vld3.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+int16x4x3_t test_vld3_lane_s16(int16_t const * a, int16x4x3_t b) {
+  return vld3_lane_s16(a, b, 3);
+}
+
+// CHECK: test_vld3_lane_s32
+// CHECK: vld3.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+int32x2x3_t test_vld3_lane_s32(int32_t const * a, int32x2x3_t b) {
+  return vld3_lane_s32(a, b, 1);
+}
+
+// CHECK: test_vld3_lane_f16
+// CHECK: vld3.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+float16x4x3_t test_vld3_lane_f16(float16_t const * a, float16x4x3_t b) {
+  return vld3_lane_f16(a, b, 3);
+}
+
+// CHECK: test_vld3_lane_f32
+// CHECK: vld3.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+float32x2x3_t test_vld3_lane_f32(float32_t const * a, float32x2x3_t b) {
+  return vld3_lane_f32(a, b, 1);
+}
+
+// CHECK: test_vld3_lane_p8
+// CHECK: vld3.8 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+poly8x8x3_t test_vld3_lane_p8(poly8_t const * a, poly8x8x3_t b) {
+  return vld3_lane_p8(a, b, 7);
+}
+
+// CHECK: test_vld3_lane_p16
+// CHECK: vld3.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+poly16x4x3_t test_vld3_lane_p16(poly16_t const * a, poly16x4x3_t b) {
+  return vld3_lane_p16(a, b, 3);
+}
+
+
+// CHECK: test_vld4q_u8
+// CHECK: vld4.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+uint8x16x4_t test_vld4q_u8(uint8_t const * a) {
+  return vld4q_u8(a);
+}
+
+// CHECK: test_vld4q_u16
+// CHECK: vld4.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+uint16x8x4_t test_vld4q_u16(uint16_t const * a) {
+  return vld4q_u16(a);
+}
+
+// CHECK: test_vld4q_u32
+// CHECK: vld4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+uint32x4x4_t test_vld4q_u32(uint32_t const * a) {
+  return vld4q_u32(a);
+}
+
+// CHECK: test_vld4q_s8
+// CHECK: vld4.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+int8x16x4_t test_vld4q_s8(int8_t const * a) {
+  return vld4q_s8(a);
+}
+
+// CHECK: test_vld4q_s16
+// CHECK: vld4.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+int16x8x4_t test_vld4q_s16(int16_t const * a) {
+  return vld4q_s16(a);
+}
+
+// CHECK: test_vld4q_s32
+// CHECK: vld4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+int32x4x4_t test_vld4q_s32(int32_t const * a) {
+  return vld4q_s32(a);
+}
+
+// CHECK: test_vld4q_f16
+// CHECK: vld4.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+float16x8x4_t test_vld4q_f16(float16_t const * a) {
+  return vld4q_f16(a);
+}
+
+// CHECK: test_vld4q_f32
+// CHECK: vld4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+float32x4x4_t test_vld4q_f32(float32_t const * a) {
+  return vld4q_f32(a);
+}
+
+// CHECK: test_vld4q_p8
+// CHECK: vld4.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+poly8x16x4_t test_vld4q_p8(poly8_t const * a) {
+  return vld4q_p8(a);
+}
+
+// CHECK: test_vld4q_p16
+// CHECK: vld4.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+poly16x8x4_t test_vld4q_p16(poly16_t const * a) {
+  return vld4q_p16(a);
+}
+
+// CHECK: test_vld4_u8
+// CHECK: vld4.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+uint8x8x4_t test_vld4_u8(uint8_t const * a) {
+  return vld4_u8(a);
+}
+
+// CHECK: test_vld4_u16
+// CHECK: vld4.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+uint16x4x4_t test_vld4_u16(uint16_t const * a) {
+  return vld4_u16(a);
+}
+
+// CHECK: test_vld4_u32
+// CHECK: vld4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+uint32x2x4_t test_vld4_u32(uint32_t const * a) {
+  return vld4_u32(a);
+}
+
+// CHECK: test_vld4_u64
+// CHECK: vld1.64
+uint64x1x4_t test_vld4_u64(uint64_t const * a) {
+  return vld4_u64(a);
+}
+
+// CHECK: test_vld4_s8
+// CHECK: vld4.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+int8x8x4_t test_vld4_s8(int8_t const * a) {
+  return vld4_s8(a);
+}
+
+// CHECK: test_vld4_s16
+// CHECK: vld4.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+int16x4x4_t test_vld4_s16(int16_t const * a) {
+  return vld4_s16(a);
+}
+
+// CHECK: test_vld4_s32
+// CHECK: vld4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+int32x2x4_t test_vld4_s32(int32_t const * a) {
+  return vld4_s32(a);
+}
+
+// CHECK: test_vld4_s64
+// CHECK: vld1.64
+int64x1x4_t test_vld4_s64(int64_t const * a) {
+  return vld4_s64(a);
+}
+
+// CHECK: test_vld4_f16
+// CHECK: vld4.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+float16x4x4_t test_vld4_f16(float16_t const * a) {
+  return vld4_f16(a);
+}
+
+// CHECK: test_vld4_f32
+// CHECK: vld4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+float32x2x4_t test_vld4_f32(float32_t const * a) {
+  return vld4_f32(a);
+}
+
+// CHECK: test_vld4_p8
+// CHECK: vld4.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+poly8x8x4_t test_vld4_p8(poly8_t const * a) {
+  return vld4_p8(a);
+}
+
+// CHECK: test_vld4_p16
+// CHECK: vld4.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+poly16x4x4_t test_vld4_p16(poly16_t const * a) {
+  return vld4_p16(a);
+}
+
+
+// CHECK: test_vld4_dup_u8
+// CHECK: vld4.8 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+uint8x8x4_t test_vld4_dup_u8(uint8_t const * a) {
+  return vld4_dup_u8(a);
+}
+
+// CHECK: test_vld4_dup_u16
+// CHECK: vld4.16 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+uint16x4x4_t test_vld4_dup_u16(uint16_t const * a) {
+  return vld4_dup_u16(a);
+}
+
+// CHECK: test_vld4_dup_u32
+// CHECK: vld4.32 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+uint32x2x4_t test_vld4_dup_u32(uint32_t const * a) {
+  return vld4_dup_u32(a);
+}
+
+// CHECK: test_vld4_dup_u64
+// CHECK: vld1.64
+uint64x1x4_t test_vld4_dup_u64(uint64_t const * a) {
+  return vld4_dup_u64(a);
+}
+
+// CHECK: test_vld4_dup_s8
+// CHECK: vld4.8 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+int8x8x4_t test_vld4_dup_s8(int8_t const * a) {
+  return vld4_dup_s8(a);
+}
+
+// CHECK: test_vld4_dup_s16
+// CHECK: vld4.16 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+int16x4x4_t test_vld4_dup_s16(int16_t const * a) {
+  return vld4_dup_s16(a);
+}
+
+// CHECK: test_vld4_dup_s32
+// CHECK: vld4.32 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+int32x2x4_t test_vld4_dup_s32(int32_t const * a) {
+  return vld4_dup_s32(a);
+}
+
+// CHECK: test_vld4_dup_s64
+// CHECK: vld1.64
+int64x1x4_t test_vld4_dup_s64(int64_t const * a) {
+  return vld4_dup_s64(a);
+}
+
+// CHECK: test_vld4_dup_f16
+// CHECK: vld4.16 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+float16x4x4_t test_vld4_dup_f16(float16_t const * a) {
+  return vld4_dup_f16(a);
+}
+
+// CHECK: test_vld4_dup_f32
+// CHECK: vld4.32 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+float32x2x4_t test_vld4_dup_f32(float32_t const * a) {
+  return vld4_dup_f32(a);
+}
+
+// CHECK: test_vld4_dup_p8
+// CHECK: vld4.8 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+poly8x8x4_t test_vld4_dup_p8(poly8_t const * a) {
+  return vld4_dup_p8(a);
+}
+
+// CHECK: test_vld4_dup_p16
+// CHECK: vld4.16 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+poly16x4x4_t test_vld4_dup_p16(poly16_t const * a) {
+  return vld4_dup_p16(a);
+}
+
+
+// CHECK: test_vld4q_lane_u16
+// CHECK: vld4.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+uint16x8x4_t test_vld4q_lane_u16(uint16_t const * a, uint16x8x4_t b) {
+  return vld4q_lane_u16(a, b, 7);
+}
+
+// CHECK: test_vld4q_lane_u32
+// CHECK: vld4.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+uint32x4x4_t test_vld4q_lane_u32(uint32_t const * a, uint32x4x4_t b) {
+  return vld4q_lane_u32(a, b, 3);
+}
+
+// CHECK: test_vld4q_lane_s16
+// CHECK: vld4.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+int16x8x4_t test_vld4q_lane_s16(int16_t const * a, int16x8x4_t b) {
+  return vld4q_lane_s16(a, b, 7);
+}
+
+// CHECK: test_vld4q_lane_s32
+// CHECK: vld4.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+int32x4x4_t test_vld4q_lane_s32(int32_t const * a, int32x4x4_t b) {
+  return vld4q_lane_s32(a, b, 3);
+}
+
+// CHECK: test_vld4q_lane_f16
+// CHECK: vld4.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+float16x8x4_t test_vld4q_lane_f16(float16_t const * a, float16x8x4_t b) {
+  return vld4q_lane_f16(a, b, 7);
+}
+
+// CHECK: test_vld4q_lane_f32
+// CHECK: vld4.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+float32x4x4_t test_vld4q_lane_f32(float32_t const * a, float32x4x4_t b) {
+  return vld4q_lane_f32(a, b, 3);
+}
+
+// CHECK: test_vld4q_lane_p16
+// CHECK: vld4.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+poly16x8x4_t test_vld4q_lane_p16(poly16_t const * a, poly16x8x4_t b) {
+  return vld4q_lane_p16(a, b, 7);
+}
+
+// CHECK: test_vld4_lane_u8
+// CHECK: vld4.8 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+uint8x8x4_t test_vld4_lane_u8(uint8_t const * a, uint8x8x4_t b) {
+  return vld4_lane_u8(a, b, 7);
+}
+
+// CHECK: test_vld4_lane_u16
+// CHECK: vld4.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+uint16x4x4_t test_vld4_lane_u16(uint16_t const * a, uint16x4x4_t b) {
+  return vld4_lane_u16(a, b, 3);
+}
+
+// CHECK: test_vld4_lane_u32
+// CHECK: vld4.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+uint32x2x4_t test_vld4_lane_u32(uint32_t const * a, uint32x2x4_t b) {
+  return vld4_lane_u32(a, b, 1);
+}
+
+// CHECK: test_vld4_lane_s8
+// CHECK: vld4.8 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+int8x8x4_t test_vld4_lane_s8(int8_t const * a, int8x8x4_t b) {
+  return vld4_lane_s8(a, b, 7);
+}
+
+// CHECK: test_vld4_lane_s16
+// CHECK: vld4.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+int16x4x4_t test_vld4_lane_s16(int16_t const * a, int16x4x4_t b) {
+  return vld4_lane_s16(a, b, 3);
+}
+
+// CHECK: test_vld4_lane_s32
+// CHECK: vld4.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+int32x2x4_t test_vld4_lane_s32(int32_t const * a, int32x2x4_t b) {
+  return vld4_lane_s32(a, b, 1);
+}
+
+// CHECK: test_vld4_lane_f16
+// CHECK: vld4.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+float16x4x4_t test_vld4_lane_f16(float16_t const * a, float16x4x4_t b) {
+  return vld4_lane_f16(a, b, 3);
+}
+
+// CHECK: test_vld4_lane_f32
+// CHECK: vld4.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+float32x2x4_t test_vld4_lane_f32(float32_t const * a, float32x2x4_t b) {
+  return vld4_lane_f32(a, b, 1);
+}
+
+// CHECK: test_vld4_lane_p8
+// CHECK: vld4.8 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+poly8x8x4_t test_vld4_lane_p8(poly8_t const * a, poly8x8x4_t b) {
+  return vld4_lane_p8(a, b, 7);
+}
+
+// CHECK: test_vld4_lane_p16
+// CHECK: vld4.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+poly16x4x4_t test_vld4_lane_p16(poly16_t const * a, poly16x4x4_t b) {
+  return vld4_lane_p16(a, b, 3);
+}
+
+
+// CHECK: test_vmax_s8
+// CHECK: vmax.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) {
+  return vmax_s8(a, b);
+}
+
+// CHECK: test_vmax_s16
+// CHECK: vmax.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) {
+  return vmax_s16(a, b);
+}
+
+// CHECK: test_vmax_s32
+// CHECK: vmax.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) {
+  return vmax_s32(a, b);
+}
+
+// CHECK: test_vmax_u8
+// CHECK: vmax.u8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) {
+  return vmax_u8(a, b);
+}
+
+// CHECK: test_vmax_u16
+// CHECK: vmax.u16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) {
+  return vmax_u16(a, b);
+}
+
+// CHECK: test_vmax_u32
+// CHECK: vmax.u32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) {
+  return vmax_u32(a, b);
+}
+
+// CHECK: test_vmax_f32
+// CHECK: vmax.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) {
+  return vmax_f32(a, b);
+}
+
+// CHECK: test_vmaxq_s8
+// CHECK: vmax.s8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) {
+  return vmaxq_s8(a, b);
+}
+
+// CHECK: test_vmaxq_s16
+// CHECK: vmax.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) {
+  return vmaxq_s16(a, b);
+}
+
+// CHECK: test_vmaxq_s32
+// CHECK: vmax.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) {
+  return vmaxq_s32(a, b);
+}
+
+// CHECK: test_vmaxq_u8
+// CHECK: vmax.u8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) {
+  return vmaxq_u8(a, b);
+}
+
+// CHECK: test_vmaxq_u16
+// CHECK: vmax.u16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) {
+  return vmaxq_u16(a, b);
+}
+
+// CHECK: test_vmaxq_u32
+// CHECK: vmax.u32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) {
+  return vmaxq_u32(a, b);
+}
+
+// CHECK: test_vmaxq_f32
+// CHECK: vmax.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) {
+  return vmaxq_f32(a, b);
+}
+
+
+// CHECK: test_vmin_s8
+// CHECK: vmin.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) {
+  return vmin_s8(a, b);
+}
+
+// CHECK: test_vmin_s16
+// CHECK: vmin.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vmin_s16(int16x4_t a, int16x4_t b) {
+  return vmin_s16(a, b);
+}
+
+// CHECK: test_vmin_s32
+// CHECK: vmin.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vmin_s32(int32x2_t a, int32x2_t b) {
+  return vmin_s32(a, b);
+}
+
+// CHECK: test_vmin_u8
+// CHECK: vmin.u8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vmin_u8(uint8x8_t a, uint8x8_t b) {
+  return vmin_u8(a, b);
+}
+
+// CHECK: test_vmin_u16
+// CHECK: vmin.u16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vmin_u16(uint16x4_t a, uint16x4_t b) {
+  return vmin_u16(a, b);
+}
+
+// CHECK: test_vmin_u32
+// CHECK: vmin.u32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vmin_u32(uint32x2_t a, uint32x2_t b) {
+  return vmin_u32(a, b);
+}
+
+// CHECK: test_vmin_f32
+// CHECK: vmin.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+float32x2_t test_vmin_f32(float32x2_t a, float32x2_t b) {
+  return vmin_f32(a, b);
+}
+
+// CHECK: test_vminq_s8
+// CHECK: vmin.s8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int8x16_t test_vminq_s8(int8x16_t a, int8x16_t b) {
+  return vminq_s8(a, b);
+}
+
+// CHECK: test_vminq_s16
+// CHECK: vmin.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) {
+  return vminq_s16(a, b);
+}
+
+// CHECK: test_vminq_s32
+// CHECK: vmin.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vminq_s32(int32x4_t a, int32x4_t b) {
+  return vminq_s32(a, b);
+}
+
+// CHECK: test_vminq_u8
+// CHECK: vmin.u8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) {
+  return vminq_u8(a, b);
+}
+
+// CHECK: test_vminq_u16
+// CHECK: vmin.u16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vminq_u16(uint16x8_t a, uint16x8_t b) {
+  return vminq_u16(a, b);
+}
+
+// CHECK: test_vminq_u32
+// CHECK: vmin.u32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) {
+  return vminq_u32(a, b);
+}
+
+// CHECK: test_vminq_f32
+// CHECK: vmin.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+float32x4_t test_vminq_f32(float32x4_t a, float32x4_t b) {
+  return vminq_f32(a, b);
+}
+
+
+// CHECK: test_vmla_s8
+// CHECK: vmla.i8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vmla_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
+  return vmla_s8(a, b, c);
+}
+
+// CHECK: test_vmla_s16
+// CHECK: vmla.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vmla_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
+  return vmla_s16(a, b, c);
+}
+
+// CHECK: test_vmla_s32
+// CHECK: vmla.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vmla_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
+  return vmla_s32(a, b, c);
+}
+
+// CHECK: test_vmla_f32
+// CHECK: vmul.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK: vadd.f32
+float32x2_t test_vmla_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
+  return vmla_f32(a, b, c);
+}
+
+// CHECK: test_vmla_u8
+// CHECK: vmla.i8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vmla_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
+  return vmla_u8(a, b, c);
+}
+
+// CHECK: test_vmla_u16
+// CHECK: vmla.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vmla_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
+  return vmla_u16(a, b, c);
+}
+
+// CHECK: test_vmla_u32
+// CHECK: vmla.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vmla_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
+  return vmla_u32(a, b, c);
+}
+
+// CHECK: test_vmlaq_s8
+// CHECK: vmla.i8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int8x16_t test_vmlaq_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
+  return vmlaq_s8(a, b, c);
+}
+
+// CHECK: test_vmlaq_s16
+// CHECK: vmla.i16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vmlaq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
+  return vmlaq_s16(a, b, c);
+}
+
+// CHECK: test_vmlaq_s32
+// CHECK: vmla.i32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vmlaq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
+  return vmlaq_s32(a, b, c);
+}
+
+// CHECK: test_vmlaq_f32
+// CHECK: vmul.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK: vadd.f32
+float32x4_t test_vmlaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
+  return vmlaq_f32(a, b, c);
+}
+
+// CHECK: test_vmlaq_u8
+// CHECK: vmla.i8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vmlaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
+  return vmlaq_u8(a, b, c);
+}
+
+// CHECK: test_vmlaq_u16
+// CHECK: vmla.i16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vmlaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
+  return vmlaq_u16(a, b, c);
+}
+
+// CHECK: test_vmlaq_u32
+// CHECK: vmla.i32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vmlaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
+  return vmlaq_u32(a, b, c);
+}
+
+
+// CHECK: test_vmlal_s8
+// CHECK: vmlal.s8 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
+  return vmlal_s8(a, b, c);
+}
+
+// CHECK: test_vmlal_s16
+// CHECK: vmlal.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
+  return vmlal_s16(a, b, c);
+}
+
+// CHECK: test_vmlal_s32
+// CHECK: vmlal.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
+  return vmlal_s32(a, b, c);
+}
+
+// CHECK: test_vmlal_u8
+// CHECK: vmlal.u8 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
+  return vmlal_u8(a, b, c);
+}
+
+// CHECK: test_vmlal_u16
+// CHECK: vmlal.u16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
+  return vmlal_u16(a, b, c);
+}
+
+// CHECK: test_vmlal_u32
+// CHECK: vmlal.u32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
+  return vmlal_u32(a, b, c);
+}
+
+
+// CHECK: test_vmlal_lane_s16
+// CHECK: vmlal.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int32x4_t test_vmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
+  return vmlal_lane_s16(a, b, c, 3);
+}
+
+// CHECK: test_vmlal_lane_s32
+// CHECK: vmlal.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int64x2_t test_vmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
+  return vmlal_lane_s32(a, b, c, 1);
+}
+
+// CHECK: test_vmlal_lane_u16
+// CHECK: vmlal.u16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+uint32x4_t test_vmlal_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
+  return vmlal_lane_u16(a, b, c, 3);
+}
+
+// CHECK: test_vmlal_lane_u32
+// CHECK: vmlal.u32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+uint64x2_t test_vmlal_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
+  return vmlal_lane_u32(a, b, c, 1);
+}
+
+
+// CHECK: test_vmlal_n_s16
+// CHECK: vmlal.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x4_t test_vmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
+  return vmlal_n_s16(a, b, c);
+}
+
+// CHECK: test_vmlal_n_s32
+// CHECK: vmlal.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int64x2_t test_vmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
+  return vmlal_n_s32(a, b, c);
+}
+
+// CHECK: test_vmlal_n_u16
+// CHECK: vmlal.u16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x4_t test_vmlal_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) {
+  return vmlal_n_u16(a, b, c);
+}
+
+// CHECK: test_vmlal_n_u32
+// CHECK: vmlal.u32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint64x2_t test_vmlal_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) {
+  return vmlal_n_u32(a, b, c);
+}
+
+
+// CHECK: test_vmla_lane_s16
+// CHECK: vmla.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int16x4_t test_vmla_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
+  return vmla_lane_s16(a, b, c, 3);
+}
+
+// CHECK: test_vmla_lane_s32
+// CHECK: vmla.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int32x2_t test_vmla_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
+  return vmla_lane_s32(a, b, c, 1);
+}
+
+// CHECK: test_vmla_lane_u16
+// CHECK: vmla.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+uint16x4_t test_vmla_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
+  return vmla_lane_u16(a, b, c, 3);
+}
+
+// CHECK: test_vmla_lane_u32
+// CHECK: vmla.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+uint32x2_t test_vmla_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
+  return vmla_lane_u32(a, b, c, 1);
+}
+
+// CHECK: test_vmla_lane_f32
+// CHECK: vmul.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK: vadd.f32
+float32x2_t test_vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
+  return vmla_lane_f32(a, b, c, 1);
+}
+
+// CHECK: test_vmlaq_lane_s16
+// CHECK: vmla.i16 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int16x8_t test_vmlaq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) {
+  return vmlaq_lane_s16(a, b, c, 3);
+}
+
+// CHECK: test_vmlaq_lane_s32
+// CHECK: vmla.i32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int32x4_t test_vmlaq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) {
+  return vmlaq_lane_s32(a, b, c, 1);
+}
+
+// CHECK: test_vmlaq_lane_u16
+// CHECK: vmla.i16 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+uint16x8_t test_vmlaq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t c) {
+  return vmlaq_lane_u16(a, b, c, 3);
+}
+
+// CHECK: test_vmlaq_lane_u32
+// CHECK: vmla.i32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+uint32x4_t test_vmlaq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t c) {
+  return vmlaq_lane_u32(a, b, c, 1);
+}
+
+// CHECK: test_vmlaq_lane_f32
+// CHECK: vmul.f32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK: vadd.f32
+float32x4_t test_vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t c) {
+  return vmlaq_lane_f32(a, b, c, 1);
+}
+
+
+// CHECK: test_vmla_n_s16
+// CHECK: vmla.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vmla_n_s16(int16x4_t a, int16x4_t b, int16_t c) {
+  return vmla_n_s16(a, b, c);
+}
+
+// CHECK: test_vmla_n_s32
+// CHECK: vmla.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vmla_n_s32(int32x2_t a, int32x2_t b, int32_t c) {
+  return vmla_n_s32(a, b, c);
+}
+
+// CHECK: test_vmla_n_u16
+// CHECK: vmla.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vmla_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) {
+  return vmla_n_u16(a, b, c);
+}
+
+// CHECK: test_vmla_n_u32
+// CHECK: vmla.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vmla_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) {
+  return vmla_n_u32(a, b, c);
+}
+
+// CHECK: test_vmla_n_f32
+// CHECK: vmul.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK: vadd.f32
+float32x2_t test_vmla_n_f32(float32x2_t a, float32x2_t b, float32_t c) {
+  return vmla_n_f32(a, b, c);
+}
+
+// CHECK: test_vmlaq_n_s16
+// CHECK: vmla.i16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vmlaq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
+  return vmlaq_n_s16(a, b, c);
+}
+
+// CHECK: test_vmlaq_n_s32
+// CHECK: vmla.i32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vmlaq_n_s32(int32x4_t a, int32x4_t b, int32_t c) {
+  return vmlaq_n_s32(a, b, c);
+}
+
+// CHECK: test_vmlaq_n_u16
+// CHECK: vmla.i16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vmlaq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) {
+  return vmlaq_n_u16(a, b, c);
+}
+
+// CHECK: test_vmlaq_n_u32
+// CHECK: vmla.i32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vmlaq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) {
+  return vmlaq_n_u32(a, b, c);
+}
+
+// CHECK: test_vmlaq_n_f32
+// CHECK: vmul.f32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[0]
+// CHECK: vadd.f32
+float32x4_t test_vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
+  return vmlaq_n_f32(a, b, c);
+}
+
+
+// CHECK: test_vmls_s8
+// CHECK: vmls.i8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vmls_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
+  return vmls_s8(a, b, c);
+}
+
+// CHECK: test_vmls_s16
+// CHECK: vmls.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vmls_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
+  return vmls_s16(a, b, c);
+}
+
+// CHECK: test_vmls_s32
+// CHECK: vmls.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vmls_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
+  return vmls_s32(a, b, c);
+}
+
+// CHECK: test_vmls_f32
+// CHECK: vmul.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK: vsub.f32
+float32x2_t test_vmls_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
+  return vmls_f32(a, b, c);
+}
+
+// CHECK: test_vmls_u8
+// CHECK: vmls.i8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vmls_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
+  return vmls_u8(a, b, c);
+}
+
+// CHECK: test_vmls_u16
+// CHECK: vmls.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vmls_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
+  return vmls_u16(a, b, c);
+}
+
+// CHECK: test_vmls_u32
+// CHECK: vmls.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vmls_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
+  return vmls_u32(a, b, c);
+}
+
+// CHECK: test_vmlsq_s8
+// CHECK: vmls.i8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int8x16_t test_vmlsq_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
+  return vmlsq_s8(a, b, c);
+}
+
+// CHECK: test_vmlsq_s16
+// CHECK: vmls.i16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vmlsq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
+  return vmlsq_s16(a, b, c);
+}
+
+// CHECK: test_vmlsq_s32
+// CHECK: vmls.i32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vmlsq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
+  return vmlsq_s32(a, b, c);
+}
+
+// CHECK: test_vmlsq_f32
+// CHECK: vmul.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK: vsub.f32
+float32x4_t test_vmlsq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
+  return vmlsq_f32(a, b, c);
+}
+
+// CHECK: test_vmlsq_u8
+// CHECK: vmls.i8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vmlsq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
+  return vmlsq_u8(a, b, c);
+}
+
+// CHECK: test_vmlsq_u16
+// CHECK: vmls.i16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vmlsq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
+  return vmlsq_u16(a, b, c);
+}
+
+// CHECK: test_vmlsq_u32
+// CHECK: vmls.i32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vmlsq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
+  return vmlsq_u32(a, b, c);
+}
+
+
+// CHECK: test_vmlsl_s8
+// CHECK: vmlsl.s8 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
+  return vmlsl_s8(a, b, c);
+}
+
+// CHECK: test_vmlsl_s16
+// CHECK: vmlsl.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
+  return vmlsl_s16(a, b, c);
+}
+
+// CHECK: test_vmlsl_s32
+// CHECK: vmlsl.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
+  return vmlsl_s32(a, b, c);
+}
+
+// CHECK: test_vmlsl_u8
+// CHECK: vmlsl.u8 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
+  return vmlsl_u8(a, b, c);
+}
+
+// CHECK: test_vmlsl_u16
+// CHECK: vmlsl.u16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
+  return vmlsl_u16(a, b, c);
+}
+
+// CHECK: test_vmlsl_u32
+// CHECK: vmlsl.u32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
+  return vmlsl_u32(a, b, c);
+}
+
+
+// CHECK: test_vmlsl_lane_s16
+// CHECK: vmlsl.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int32x4_t test_vmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
+  return vmlsl_lane_s16(a, b, c, 3);
+}
+
+// CHECK: test_vmlsl_lane_s32
+// CHECK: vmlsl.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int64x2_t test_vmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
+  return vmlsl_lane_s32(a, b, c, 1);
+}
+
+// CHECK: test_vmlsl_lane_u16
+// CHECK: vmlsl.u16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+uint32x4_t test_vmlsl_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
+  return vmlsl_lane_u16(a, b, c, 3);
+}
+
+// CHECK: test_vmlsl_lane_u32
+// CHECK: vmlsl.u32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+uint64x2_t test_vmlsl_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
+  return vmlsl_lane_u32(a, b, c, 1);
+}
+
+
+// CHECK: test_vmlsl_n_s16
+// CHECK: vmlsl.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x4_t test_vmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
+  return vmlsl_n_s16(a, b, c);
+}
+
+// CHECK: test_vmlsl_n_s32
+// CHECK: vmlsl.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int64x2_t test_vmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
+  return vmlsl_n_s32(a, b, c);
+}
+
+// CHECK: test_vmlsl_n_u16
+// CHECK: vmlsl.u16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x4_t test_vmlsl_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) {
+  return vmlsl_n_u16(a, b, c);
+}
+
+// CHECK: test_vmlsl_n_u32
+// CHECK: vmlsl.u32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint64x2_t test_vmlsl_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) {
+  return vmlsl_n_u32(a, b, c);
+}
+
+
+// CHECK: test_vmls_lane_s16
+// CHECK: vmls.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int16x4_t test_vmls_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
+  return vmls_lane_s16(a, b, c, 3);
+}
+
+// CHECK: test_vmls_lane_s32
+// CHECK: vmls.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int32x2_t test_vmls_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
+  return vmls_lane_s32(a, b, c, 1);
+}
+
+// CHECK: test_vmls_lane_u16
+// CHECK: vmls.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+uint16x4_t test_vmls_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
+  return vmls_lane_u16(a, b, c, 3);
+}
+
+// CHECK: test_vmls_lane_u32
+// CHECK: vmls.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+uint32x2_t test_vmls_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
+  return vmls_lane_u32(a, b, c, 1);
+}
+
+// CHECK: test_vmls_lane_f32
+// CHECK: vmul.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK: vsub.f32
+float32x2_t test_vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
+  return vmls_lane_f32(a, b, c, 1);
+}
+
+// CHECK: test_vmlsq_lane_s16
+// CHECK: vmls.i16 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int16x8_t test_vmlsq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) {
+  return vmlsq_lane_s16(a, b, c, 3);
+}
+
+// CHECK: test_vmlsq_lane_s32
+// CHECK: vmls.i32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int32x4_t test_vmlsq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) {
+  return vmlsq_lane_s32(a, b, c, 1);
+}
+
+// CHECK: test_vmlsq_lane_u16
+// CHECK: vmls.i16 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+uint16x8_t test_vmlsq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t c) {
+  return vmlsq_lane_u16(a, b, c, 3);
+}
+
+// CHECK: test_vmlsq_lane_u32
+// CHECK: vmls.i32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+uint32x4_t test_vmlsq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t c) {
+  return vmlsq_lane_u32(a, b, c, 1);
+}
+
+// CHECK: test_vmlsq_lane_f32
+// CHECK: vmul.f32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK: vsub.f32
+float32x4_t test_vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t c) {
+  return vmlsq_lane_f32(a, b, c, 1);
+}
+
+
+// CHECK: test_vmls_n_s16
+// CHECK: vmls.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vmls_n_s16(int16x4_t a, int16x4_t b, int16_t c) {
+  return vmls_n_s16(a, b, c);
+}
+
+// CHECK: test_vmls_n_s32
+// CHECK: vmls.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vmls_n_s32(int32x2_t a, int32x2_t b, int32_t c) {
+  return vmls_n_s32(a, b, c);
+}
+
+// CHECK: test_vmls_n_u16
+// CHECK: vmls.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vmls_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) {
+  return vmls_n_u16(a, b, c);
+}
+
+// CHECK: test_vmls_n_u32
+// CHECK: vmls.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vmls_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) {
+  return vmls_n_u32(a, b, c);
+}
+
+// CHECK: test_vmls_n_f32
+// CHECK: vmul.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK: vsub.f32
+float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c) {
+  return vmls_n_f32(a, b, c);
+}
+
+// CHECK: test_vmlsq_n_s16
+// CHECK: vmls.i16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vmlsq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
+  return vmlsq_n_s16(a, b, c);
+}
+
+// CHECK: test_vmlsq_n_s32
+// CHECK: vmls.i32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vmlsq_n_s32(int32x4_t a, int32x4_t b, int32_t c) {
+  return vmlsq_n_s32(a, b, c);
+}
+
+// CHECK: test_vmlsq_n_u16
+// CHECK: vmls.i16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vmlsq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) {
+  return vmlsq_n_u16(a, b, c);
+}
+
+// CHECK: test_vmlsq_n_u32
+// CHECK: vmls.i32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vmlsq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) {
+  return vmlsq_n_u32(a, b, c);
+}
+
+// CHECK: test_vmlsq_n_f32
+// CHECK: vmul.f32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[0]
+// CHECK: vsub.f32
+float32x4_t test_vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
+  return vmlsq_n_f32(a, b, c);
+}
+
+
+// CHECK: test_vmovl_s8
+// CHECK: vmovl.s8 q{{[0-9]+}}, d{{[0-9]+}}
+int16x8_t test_vmovl_s8(int8x8_t a) {
+  return vmovl_s8(a);
+}
+
+// CHECK: test_vmovl_s16
+// CHECK: vmovl.s16 q{{[0-9]+}}, d{{[0-9]+}}
+int32x4_t test_vmovl_s16(int16x4_t a) {
+  return vmovl_s16(a);
+}
+
+// CHECK: test_vmovl_s32
+// CHECK: vmovl.s32 q{{[0-9]+}}, d{{[0-9]+}}
+int64x2_t test_vmovl_s32(int32x2_t a) {
+  return vmovl_s32(a);
+}
+
+// CHECK: test_vmovl_u8
+// CHECK: vmovl.u8 q{{[0-9]+}}, d{{[0-9]+}}
+uint16x8_t test_vmovl_u8(uint8x8_t a) {
+  return vmovl_u8(a);
+}
+
+// CHECK: test_vmovl_u16
+// CHECK: vmovl.u16 q{{[0-9]+}}, d{{[0-9]+}}
+uint32x4_t test_vmovl_u16(uint16x4_t a) {
+  return vmovl_u16(a);
+}
+
+// CHECK: test_vmovl_u32
+// CHECK: vmovl.u32 q{{[0-9]+}}, d{{[0-9]+}}
+uint64x2_t test_vmovl_u32(uint32x2_t a) {
+  return vmovl_u32(a);
+}
+
+
+// CHECK: test_vmovn_s16
+// CHECK: vmovn.i16 d{{[0-9]+}}, q{{[0-9]+}}
+int8x8_t test_vmovn_s16(int16x8_t a) {
+  return vmovn_s16(a);
+}
+
+// CHECK: test_vmovn_s32
+// CHECK: vmovn.i32 d{{[0-9]+}}, q{{[0-9]+}}
+int16x4_t test_vmovn_s32(int32x4_t a) {
+  return vmovn_s32(a);
+}
+
+// CHECK: test_vmovn_s64
+// CHECK: vmovn.i64 d{{[0-9]+}}, q{{[0-9]+}}
+int32x2_t test_vmovn_s64(int64x2_t a) {
+  return vmovn_s64(a);
+}
+
+// CHECK: test_vmovn_u16
+// CHECK: vmovn.i16 d{{[0-9]+}}, q{{[0-9]+}}
+uint8x8_t test_vmovn_u16(uint16x8_t a) {
+  return vmovn_u16(a);
+}
+
+// CHECK: test_vmovn_u32
+// CHECK: vmovn.i32 d{{[0-9]+}}, q{{[0-9]+}}
+uint16x4_t test_vmovn_u32(uint32x4_t a) {
+  return vmovn_u32(a);
+}
+
+// CHECK: test_vmovn_u64
+// CHECK: vmovn.i64 d{{[0-9]+}}, q{{[0-9]+}}
+uint32x2_t test_vmovn_u64(uint64x2_t a) {
+  return vmovn_u64(a);
+}
+
+
+// CHECK: test_vmov_n_u8
+// CHECK: vmov 
+uint8x8_t test_vmov_n_u8(uint8_t a) {
+  return vmov_n_u8(a);
+}
+
+// CHECK: test_vmov_n_u16
+// CHECK: vmov 
+uint16x4_t test_vmov_n_u16(uint16_t a) {
+  return vmov_n_u16(a);
+}
+
+// CHECK: test_vmov_n_u32
+// CHECK: vmov 
+uint32x2_t test_vmov_n_u32(uint32_t a) {
+  return vmov_n_u32(a);
+}
+
+// CHECK: test_vmov_n_s8
+// CHECK: vmov 
+int8x8_t test_vmov_n_s8(int8_t a) {
+  return vmov_n_s8(a);
+}
+
+// CHECK: test_vmov_n_s16
+// CHECK: vmov 
+int16x4_t test_vmov_n_s16(int16_t a) {
+  return vmov_n_s16(a);
+}
+
+// CHECK: test_vmov_n_s32
+// CHECK: vmov 
+int32x2_t test_vmov_n_s32(int32_t a) {
+  return vmov_n_s32(a);
+}
+
+// CHECK: test_vmov_n_p8
+// CHECK: vmov 
+poly8x8_t test_vmov_n_p8(poly8_t a) {
+  return vmov_n_p8(a);
+}
+
+// CHECK: test_vmov_n_p16
+// CHECK: vmov 
+poly16x4_t test_vmov_n_p16(poly16_t a) {
+  return vmov_n_p16(a);
+}
+
+// CHECK: test_vmov_n_f32
+// CHECK: vmov 
+float32x2_t test_vmov_n_f32(float32_t a) {
+  return vmov_n_f32(a);
+}
+
+// CHECK: test_vmovq_n_u8
+// CHECK: vmov 
+uint8x16_t test_vmovq_n_u8(uint8_t a) {
+  return vmovq_n_u8(a);
+}
+
+// CHECK: test_vmovq_n_u16
+// CHECK: vmov 
+uint16x8_t test_vmovq_n_u16(uint16_t a) {
+  return vmovq_n_u16(a);
+}
+
+// CHECK: test_vmovq_n_u32
+// CHECK: vmov 
+uint32x4_t test_vmovq_n_u32(uint32_t a) {
+  return vmovq_n_u32(a);
+}
+
+// CHECK: test_vmovq_n_s8
+// CHECK: vmov 
+int8x16_t test_vmovq_n_s8(int8_t a) {
+  return vmovq_n_s8(a);
+}
+
+// CHECK: test_vmovq_n_s16
+// CHECK: vmov 
+int16x8_t test_vmovq_n_s16(int16_t a) {
+  return vmovq_n_s16(a);
+}
+
+// CHECK: test_vmovq_n_s32
+// CHECK: vmov 
+int32x4_t test_vmovq_n_s32(int32_t a) {
+  return vmovq_n_s32(a);
+}
+
+// CHECK: test_vmovq_n_p8
+// CHECK: vmov 
+poly8x16_t test_vmovq_n_p8(poly8_t a) {
+  return vmovq_n_p8(a);
+}
+
+// CHECK: test_vmovq_n_p16
+// CHECK: vmov 
+poly16x8_t test_vmovq_n_p16(poly16_t a) {
+  return vmovq_n_p16(a);
+}
+
+// CHECK: test_vmovq_n_f32
+// CHECK: vmov 
+float32x4_t test_vmovq_n_f32(float32_t a) {
+  return vmovq_n_f32(a);
+}
+
+// CHECK: test_vmov_n_s64
+// CHECK: vmov 
+int64x1_t test_vmov_n_s64(int64_t a) {
+  return vmov_n_s64(a);
+}
+
+// CHECK: test_vmov_n_u64
+// CHECK: vmov 
+uint64x1_t test_vmov_n_u64(uint64_t a) {
+  return vmov_n_u64(a);
+}
+
+// CHECK: test_vmovq_n_s64
+// CHECK: vmov 
+int64x2_t test_vmovq_n_s64(int64_t a) {
+  return vmovq_n_s64(a);
+}
+
+// CHECK: test_vmovq_n_u64
+// CHECK: vmov 
+uint64x2_t test_vmovq_n_u64(uint64_t a) {
+  return vmovq_n_u64(a);
+}
+
+
+// CHECK: test_vmul_s8
+// CHECK: vmul.i8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vmul_s8(int8x8_t a, int8x8_t b) {
+  return vmul_s8(a, b);
+}
+
+// CHECK: test_vmul_s16
+// CHECK: vmul.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vmul_s16(int16x4_t a, int16x4_t b) {
+  return vmul_s16(a, b);
+}
+
+// CHECK: test_vmul_s32
+// CHECK: vmul.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vmul_s32(int32x2_t a, int32x2_t b) {
+  return vmul_s32(a, b);
+}
+
+// CHECK: test_vmul_f32
+// CHECK: vmul.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+float32x2_t test_vmul_f32(float32x2_t a, float32x2_t b) {
+  return vmul_f32(a, b);
+}
+
+// CHECK: test_vmul_u8
+// CHECK: vmul.i8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vmul_u8(uint8x8_t a, uint8x8_t b) {
+  return vmul_u8(a, b);
+}
+
+// CHECK: test_vmul_u16
+// CHECK: vmul.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vmul_u16(uint16x4_t a, uint16x4_t b) {
+  return vmul_u16(a, b);
+}
+
+// CHECK: test_vmul_u32
+// CHECK: vmul.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vmul_u32(uint32x2_t a, uint32x2_t b) {
+  return vmul_u32(a, b);
+}
+
+// CHECK: test_vmulq_s8
+// CHECK: vmul.i8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int8x16_t test_vmulq_s8(int8x16_t a, int8x16_t b) {
+  return vmulq_s8(a, b);
+}
+
+// CHECK: test_vmulq_s16
+// CHECK: vmul.i16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vmulq_s16(int16x8_t a, int16x8_t b) {
+  return vmulq_s16(a, b);
+}
+
+// CHECK: test_vmulq_s32
+// CHECK: vmul.i32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vmulq_s32(int32x4_t a, int32x4_t b) {
+  return vmulq_s32(a, b);
+}
+
+// CHECK: test_vmulq_f32
+// CHECK: vmul.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+float32x4_t test_vmulq_f32(float32x4_t a, float32x4_t b) {
+  return vmulq_f32(a, b);
+}
+
+// CHECK: test_vmulq_u8
+// CHECK: vmul.i8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vmulq_u8(uint8x16_t a, uint8x16_t b) {
+  return vmulq_u8(a, b);
+}
+
+// CHECK: test_vmulq_u16
+// CHECK: vmul.i16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vmulq_u16(uint16x8_t a, uint16x8_t b) {
+  return vmulq_u16(a, b);
+}
+
+// CHECK: test_vmulq_u32
+// CHECK: vmul.i32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vmulq_u32(uint32x4_t a, uint32x4_t b) {
+  return vmulq_u32(a, b);
+}
+
+
+// CHECK: test_vmull_s8
+// CHECK: vmull.s8 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) {
+  return vmull_s8(a, b);
+}
+
+// CHECK: test_vmull_s16
+// CHECK: vmull.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) {
+  return vmull_s16(a, b);
+}
+
+// CHECK: test_vmull_s32
+// CHECK: vmull.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int64x2_t test_vmull_s32(int32x2_t a, int32x2_t b) {
+  return vmull_s32(a, b);
+}
+
+// CHECK: test_vmull_u8
+// CHECK: vmull.u8 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) {
+  return vmull_u8(a, b);
+}
+
+// CHECK: test_vmull_u16
+// CHECK: vmull.u16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) {
+  return vmull_u16(a, b);
+}
+
+// CHECK: test_vmull_u32
+// CHECK: vmull.u32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) {
+  return vmull_u32(a, b);
+}
+
+// CHECK: test_vmull_p8
+// CHECK: vmull.p8 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) {
+  return vmull_p8(a, b);
+}
+
+
+// CHECK: test_vmull_lane_s16
+// CHECK: vmull.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int32x4_t test_vmull_lane_s16(int16x4_t a, int16x4_t b) {
+  return vmull_lane_s16(a, b, 3);
+}
+
+// CHECK: test_vmull_lane_s32
+// CHECK: vmull.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int64x2_t test_vmull_lane_s32(int32x2_t a, int32x2_t b) {
+  return vmull_lane_s32(a, b, 1);
+}
+
+// CHECK: test_vmull_lane_u16
+// CHECK: vmull.u16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+uint32x4_t test_vmull_lane_u16(uint16x4_t a, uint16x4_t b) {
+  return vmull_lane_u16(a, b, 3);
+}
+
+// CHECK: test_vmull_lane_u32
+// CHECK: vmull.u32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+uint64x2_t test_vmull_lane_u32(uint32x2_t a, uint32x2_t b) {
+  return vmull_lane_u32(a, b, 1);
+}
+
+
+// CHECK: test_vmull_n_s16
+// CHECK: vmull.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x4_t test_vmull_n_s16(int16x4_t a, int16_t b) {
+  return vmull_n_s16(a, b);
+}
+
+// CHECK: test_vmull_n_s32
+// CHECK: vmull.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int64x2_t test_vmull_n_s32(int32x2_t a, int32_t b) {
+  return vmull_n_s32(a, b);
+}
+
+// CHECK: test_vmull_n_u16
+// CHECK: vmull.u16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x4_t test_vmull_n_u16(uint16x4_t a, uint16_t b) {
+  return vmull_n_u16(a, b);
+}
+
+// CHECK: test_vmull_n_u32
+// CHECK: vmull.u32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint64x2_t test_vmull_n_u32(uint32x2_t a, uint32_t b) {
+  return vmull_n_u32(a, b);
+}
+
+
+// CHECK: test_vmul_p8
+// CHECK: vmul.p8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+poly8x8_t test_vmul_p8(poly8x8_t a, poly8x8_t b) {
+  return vmul_p8(a, b);
+}
+
+// CHECK: test_vmulq_p8
+// CHECK: vmul.p8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+poly8x16_t test_vmulq_p8(poly8x16_t a, poly8x16_t b) {
+  return vmulq_p8(a, b);
+}
+
+
+// CHECK: test_vmul_lane_s16
+// CHECK: vmul.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int16x4_t test_vmul_lane_s16(int16x4_t a, int16x4_t b) {
+  return vmul_lane_s16(a, b, 3);
+}
+
+// CHECK: test_vmul_lane_s32
+// CHECK: vmul.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int32x2_t test_vmul_lane_s32(int32x2_t a, int32x2_t b) {
+  return vmul_lane_s32(a, b, 1);
+}
+
+// CHECK: test_vmul_lane_f32
+// CHECK: vmul.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+float32x2_t test_vmul_lane_f32(float32x2_t a, float32x2_t b) {
+  return vmul_lane_f32(a, b, 1);
+}
+
+// CHECK: test_vmul_lane_u16
+// CHECK: vmul.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+uint16x4_t test_vmul_lane_u16(uint16x4_t a, uint16x4_t b) {
+  return vmul_lane_u16(a, b, 3);
+}
+
+// CHECK: test_vmul_lane_u32
+// CHECK: vmul.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+uint32x2_t test_vmul_lane_u32(uint32x2_t a, uint32x2_t b) {
+  return vmul_lane_u32(a, b, 1);
+}
+
+// CHECK: test_vmulq_lane_s16
+// CHECK: vmul.i16 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int16x8_t test_vmulq_lane_s16(int16x8_t a, int16x4_t b) {
+  return vmulq_lane_s16(a, b, 3);
+}
+
+// CHECK: test_vmulq_lane_s32
+// CHECK: vmul.i32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int32x4_t test_vmulq_lane_s32(int32x4_t a, int32x2_t b) {
+  return vmulq_lane_s32(a, b, 1);
+}
+
+// CHECK: test_vmulq_lane_f32
+// CHECK: vmul.f32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+float32x4_t test_vmulq_lane_f32(float32x4_t a, float32x2_t b) {
+  return vmulq_lane_f32(a, b, 1);
+}
+
+// CHECK: test_vmulq_lane_u16
+// CHECK: vmul.i16 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+uint16x8_t test_vmulq_lane_u16(uint16x8_t a, uint16x4_t b) {
+  return vmulq_lane_u16(a, b, 3);
+}
+
+// CHECK: test_vmulq_lane_u32
+// CHECK: vmul.i32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+uint32x4_t test_vmulq_lane_u32(uint32x4_t a, uint32x2_t b) {
+  return vmulq_lane_u32(a, b, 1);
+}
+
+
+// CHECK: test_vmul_n_s16
+// CHECK: vmul.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vmul_n_s16(int16x4_t a, int16_t b) {
+  return vmul_n_s16(a, b);
+}
+
+// CHECK: test_vmul_n_s32
+// CHECK: vmul.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vmul_n_s32(int32x2_t a, int32_t b) {
+  return vmul_n_s32(a, b);
+}
+
+// CHECK: test_vmul_n_f32
+// CHECK: vmul.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+float32x2_t test_vmul_n_f32(float32x2_t a, float32_t b) {
+  return vmul_n_f32(a, b);
+}
+
+// CHECK: test_vmul_n_u16
+// CHECK: vmul.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vmul_n_u16(uint16x4_t a, uint16_t b) {
+  return vmul_n_u16(a, b);
+}
+
+// CHECK: test_vmul_n_u32
+// CHECK: vmul.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vmul_n_u32(uint32x2_t a, uint32_t b) {
+  return vmul_n_u32(a, b);
+}
+
+// CHECK: test_vmulq_n_s16
+// CHECK: vmul.i16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vmulq_n_s16(int16x8_t a, int16_t b) {
+  return vmulq_n_s16(a, b);
+}
+
+// CHECK: test_vmulq_n_s32
+// CHECK: vmul.i32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vmulq_n_s32(int32x4_t a, int32_t b) {
+  return vmulq_n_s32(a, b);
+}
+
+// CHECK: test_vmulq_n_f32
+// CHECK: vmul.f32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[0]
+float32x4_t test_vmulq_n_f32(float32x4_t a, float32_t b) {
+  return vmulq_n_f32(a, b);
+}
+
+// CHECK: test_vmulq_n_u16
+// CHECK: vmul.i16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vmulq_n_u16(uint16x8_t a, uint16_t b) {
+  return vmulq_n_u16(a, b);
+}
+
+// CHECK: test_vmulq_n_u32
+// CHECK: vmul.i32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vmulq_n_u32(uint32x4_t a, uint32_t b) {
+  return vmulq_n_u32(a, b);
+}
+
+
+// CHECK: test_vmvn_s8
+// CHECK: vmvn d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vmvn_s8(int8x8_t a) {
+  return vmvn_s8(a);
+}
+
+// CHECK: test_vmvn_s16
+// CHECK: vmvn d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vmvn_s16(int16x4_t a) {
+  return vmvn_s16(a);
+}
+
+// CHECK: test_vmvn_s32
+// CHECK: vmvn d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vmvn_s32(int32x2_t a) {
+  return vmvn_s32(a);
+}
+
+// CHECK: test_vmvn_u8
+// CHECK: vmvn d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vmvn_u8(uint8x8_t a) {
+  return vmvn_u8(a);
+}
+
+// CHECK: test_vmvn_u16
+// CHECK: vmvn d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vmvn_u16(uint16x4_t a) {
+  return vmvn_u16(a);
+}
+
+// CHECK: test_vmvn_u32
+// CHECK: vmvn d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vmvn_u32(uint32x2_t a) {
+  return vmvn_u32(a);
+}
+
+// CHECK: test_vmvn_p8
+// CHECK: vmvn d{{[0-9]+}}, d{{[0-9]+}}
+poly8x8_t test_vmvn_p8(poly8x8_t a) {
+  return vmvn_p8(a);
+}
+
+// CHECK: test_vmvnq_s8
+// CHECK: vmvn q{{[0-9]+}}, q{{[0-9]+}}
+int8x16_t test_vmvnq_s8(int8x16_t a) {
+  return vmvnq_s8(a);
+}
+
+// CHECK: test_vmvnq_s16
+// CHECK: vmvn q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vmvnq_s16(int16x8_t a) {
+  return vmvnq_s16(a);
+}
+
+// CHECK: test_vmvnq_s32
+// CHECK: vmvn q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vmvnq_s32(int32x4_t a) {
+  return vmvnq_s32(a);
+}
+
+// CHECK: test_vmvnq_u8
+// CHECK: vmvn q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vmvnq_u8(uint8x16_t a) {
+  return vmvnq_u8(a);
+}
+
+// CHECK: test_vmvnq_u16
+// CHECK: vmvn q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vmvnq_u16(uint16x8_t a) {
+  return vmvnq_u16(a);
+}
+
+// CHECK: test_vmvnq_u32
+// CHECK: vmvn q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vmvnq_u32(uint32x4_t a) {
+  return vmvnq_u32(a);
+}
+
+// CHECK: test_vmvnq_p8
+// CHECK: vmvn q{{[0-9]+}}, q{{[0-9]+}}
+poly8x16_t test_vmvnq_p8(poly8x16_t a) {
+  return vmvnq_p8(a);
+}
+
+
+// CHECK: test_vneg_s8
+// CHECK: vneg.s8 d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vneg_s8(int8x8_t a) {
+  return vneg_s8(a);
+}
+
+// CHECK: test_vneg_s16
+// CHECK: vneg.s16 d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vneg_s16(int16x4_t a) {
+  return vneg_s16(a);
+}
+
+// CHECK: test_vneg_s32
+// CHECK: vneg.s32 d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vneg_s32(int32x2_t a) {
+  return vneg_s32(a);
+}
+
+// CHECK: test_vneg_f32
+// CHECK: vneg.f32 d{{[0-9]+}}, d{{[0-9]+}}
+float32x2_t test_vneg_f32(float32x2_t a) {
+  return vneg_f32(a);
+}
+
+// CHECK: test_vnegq_s8
+// CHECK: vneg.s8 q{{[0-9]+}}, q{{[0-9]+}}
+int8x16_t test_vnegq_s8(int8x16_t a) {
+  return vnegq_s8(a);
+}
+
+// CHECK: test_vnegq_s16
+// CHECK: vneg.s16 q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vnegq_s16(int16x8_t a) {
+  return vnegq_s16(a);
+}
+
+// CHECK: test_vnegq_s32
+// CHECK: vneg.s32 q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vnegq_s32(int32x4_t a) {
+  return vnegq_s32(a);
+}
+
+// CHECK: test_vnegq_f32
+// CHECK: vneg.f32 q{{[0-9]+}}, q{{[0-9]+}}
+float32x4_t test_vnegq_f32(float32x4_t a) {
+  return vnegq_f32(a);
+}
+
+
+// CHECK: test_vorn_s8
+// CHECK: vorn d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vorn_s8(int8x8_t a, int8x8_t b) {
+  return vorn_s8(a, b);
+}
+
+// CHECK: test_vorn_s16
+// CHECK: vorn d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vorn_s16(int16x4_t a, int16x4_t b) {
+  return vorn_s16(a, b);
+}
+
+// CHECK: test_vorn_s32
+// CHECK: vorn d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vorn_s32(int32x2_t a, int32x2_t b) {
+  return vorn_s32(a, b);
+}
+
+// CHECK: test_vorn_s64
+// CHECK: vorn d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int64x1_t test_vorn_s64(int64x1_t a, int64x1_t b) {
+  return vorn_s64(a, b);
+}
+
+// CHECK: test_vorn_u8
+// CHECK: vorn d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vorn_u8(uint8x8_t a, uint8x8_t b) {
+  return vorn_u8(a, b);
+}
+
+// CHECK: test_vorn_u16
+// CHECK: vorn d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vorn_u16(uint16x4_t a, uint16x4_t b) {
+  return vorn_u16(a, b);
+}
+
+// CHECK: test_vorn_u32
+// CHECK: vorn d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vorn_u32(uint32x2_t a, uint32x2_t b) {
+  return vorn_u32(a, b);
+}
+
+// CHECK: test_vorn_u64
+// CHECK: vorn d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint64x1_t test_vorn_u64(uint64x1_t a, uint64x1_t b) {
+  return vorn_u64(a, b);
+}
+
+// CHECK: test_vornq_s8
+// CHECK: vorn q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int8x16_t test_vornq_s8(int8x16_t a, int8x16_t b) {
+  return vornq_s8(a, b);
+}
+
+// CHECK: test_vornq_s16
+// CHECK: vorn q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vornq_s16(int16x8_t a, int16x8_t b) {
+  return vornq_s16(a, b);
+}
+
+// CHECK: test_vornq_s32
+// CHECK: vorn q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vornq_s32(int32x4_t a, int32x4_t b) {
+  return vornq_s32(a, b);
+}
+
+// CHECK: test_vornq_s64
+// CHECK: vorn q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int64x2_t test_vornq_s64(int64x2_t a, int64x2_t b) {
+  return vornq_s64(a, b);
+}
+
+// CHECK: test_vornq_u8
+// CHECK: vorn q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vornq_u8(uint8x16_t a, uint8x16_t b) {
+  return vornq_u8(a, b);
+}
+
+// CHECK: test_vornq_u16
+// CHECK: vorn q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vornq_u16(uint16x8_t a, uint16x8_t b) {
+  return vornq_u16(a, b);
+}
+
+// CHECK: test_vornq_u32
+// CHECK: vorn q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vornq_u32(uint32x4_t a, uint32x4_t b) {
+  return vornq_u32(a, b);
+}
+
+// CHECK: test_vornq_u64
+// CHECK: vorn q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint64x2_t test_vornq_u64(uint64x2_t a, uint64x2_t b) {
+  return vornq_u64(a, b);
+}
+
+
+// CHECK: test_vorr_s8
+// CHECK: vorr d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vorr_s8(int8x8_t a, int8x8_t b) {
+  return vorr_s8(a, b);
+}
+
+// CHECK: test_vorr_s16
+// CHECK: vorr d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vorr_s16(int16x4_t a, int16x4_t b) {
+  return vorr_s16(a, b);
+}
+
+// CHECK: test_vorr_s32
+// CHECK: vorr d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vorr_s32(int32x2_t a, int32x2_t b) {
+  return vorr_s32(a, b);
+}
+
+// CHECK: test_vorr_s64
+// CHECK: vorr d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int64x1_t test_vorr_s64(int64x1_t a, int64x1_t b) {
+  return vorr_s64(a, b);
+}
+
+// CHECK: test_vorr_u8
+// CHECK: vorr d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vorr_u8(uint8x8_t a, uint8x8_t b) {
+  return vorr_u8(a, b);
+}
+
+// CHECK: test_vorr_u16
+// CHECK: vorr d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vorr_u16(uint16x4_t a, uint16x4_t b) {
+  return vorr_u16(a, b);
+}
+
+// CHECK: test_vorr_u32
+// CHECK: vorr d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vorr_u32(uint32x2_t a, uint32x2_t b) {
+  return vorr_u32(a, b);
+}
+
+// CHECK: test_vorr_u64
+// CHECK: vorr d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint64x1_t test_vorr_u64(uint64x1_t a, uint64x1_t b) {
+  return vorr_u64(a, b);
+}
+
+// CHECK: test_vorrq_s8
+// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int8x16_t test_vorrq_s8(int8x16_t a, int8x16_t b) {
+  return vorrq_s8(a, b);
+}
+
+// CHECK: test_vorrq_s16
+// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vorrq_s16(int16x8_t a, int16x8_t b) {
+  return vorrq_s16(a, b);
+}
+
+// CHECK: test_vorrq_s32
+// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vorrq_s32(int32x4_t a, int32x4_t b) {
+  return vorrq_s32(a, b);
+}
+
+// CHECK: test_vorrq_s64
+// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int64x2_t test_vorrq_s64(int64x2_t a, int64x2_t b) {
+  return vorrq_s64(a, b);
+}
+
+// CHECK: test_vorrq_u8
+// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vorrq_u8(uint8x16_t a, uint8x16_t b) {
+  return vorrq_u8(a, b);
+}
+
+// CHECK: test_vorrq_u16
+// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vorrq_u16(uint16x8_t a, uint16x8_t b) {
+  return vorrq_u16(a, b);
+}
+
+// CHECK: test_vorrq_u32
+// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vorrq_u32(uint32x4_t a, uint32x4_t b) {
+  return vorrq_u32(a, b);
+}
+
+// CHECK: test_vorrq_u64
+// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint64x2_t test_vorrq_u64(uint64x2_t a, uint64x2_t b) {
+  return vorrq_u64(a, b);
+}
+
+
+// CHECK: test_vpadal_s8
+// CHECK: vpadal.s8 d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vpadal_s8(int16x4_t a, int8x8_t b) {
+  return vpadal_s8(a, b);
+}
+
+// CHECK: test_vpadal_s16
+// CHECK: vpadal.s16 d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vpadal_s16(int32x2_t a, int16x4_t b) {
+  return vpadal_s16(a, b);
+}
+
+// CHECK: test_vpadal_s32
+// CHECK: vpadal.s32 d{{[0-9]+}}, d{{[0-9]+}}
+int64x1_t test_vpadal_s32(int64x1_t a, int32x2_t b) {
+  return vpadal_s32(a, b);
+}
+
+// CHECK: test_vpadal_u8
+// CHECK: vpadal.u8 d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vpadal_u8(uint16x4_t a, uint8x8_t b) {
+  return vpadal_u8(a, b);
+}
+
+// CHECK: test_vpadal_u16
+// CHECK: vpadal.u16 d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vpadal_u16(uint32x2_t a, uint16x4_t b) {
+  return vpadal_u16(a, b);
+}
+
+// CHECK: test_vpadal_u32
+// CHECK: vpadal.u32 d{{[0-9]+}}, d{{[0-9]+}}
+uint64x1_t test_vpadal_u32(uint64x1_t a, uint32x2_t b) {
+  return vpadal_u32(a, b);
+}
+
+// CHECK: test_vpadalq_s8
+// CHECK: vpadal.s8 q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vpadalq_s8(int16x8_t a, int8x16_t b) {
+  return vpadalq_s8(a, b);
+}
+
+// CHECK: test_vpadalq_s16
+// CHECK: vpadal.s16 q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vpadalq_s16(int32x4_t a, int16x8_t b) {
+  return vpadalq_s16(a, b);
+}
+
+// CHECK: test_vpadalq_s32
+// CHECK: vpadal.s32 q{{[0-9]+}}, q{{[0-9]+}}
+int64x2_t test_vpadalq_s32(int64x2_t a, int32x4_t b) {
+  return vpadalq_s32(a, b);
+}
+
+// CHECK: test_vpadalq_u8
+// CHECK: vpadal.u8 q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vpadalq_u8(uint16x8_t a, uint8x16_t b) {
+  return vpadalq_u8(a, b);
+}
+
+// CHECK: test_vpadalq_u16
+// CHECK: vpadal.u16 q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vpadalq_u16(uint32x4_t a, uint16x8_t b) {
+  return vpadalq_u16(a, b);
+}
+
+// CHECK: test_vpadalq_u32
+// CHECK: vpadal.u32 q{{[0-9]+}}, q{{[0-9]+}}
+uint64x2_t test_vpadalq_u32(uint64x2_t a, uint32x4_t b) {
+  return vpadalq_u32(a, b);
+}
+
+
+// CHECK: test_vpadd_s8
+// CHECK: vpadd.i8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) {
+  return vpadd_s8(a, b);
+}
+
+// CHECK: test_vpadd_s16
+// CHECK: vpadd.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) {
+  return vpadd_s16(a, b);
+}
+
+// CHECK: test_vpadd_s32
+// CHECK: vpadd.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) {
+  return vpadd_s32(a, b);
+}
+
+// CHECK: test_vpadd_u8
+// CHECK: vpadd.i8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) {
+  return vpadd_u8(a, b);
+}
+
+// CHECK: test_vpadd_u16
+// CHECK: vpadd.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) {
+  return vpadd_u16(a, b);
+}
+
+// CHECK: test_vpadd_u32
+// CHECK: vpadd.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) {
+  return vpadd_u32(a, b);
+}
+
+// CHECK: test_vpadd_f32
+// CHECK: vpadd.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) {
+  return vpadd_f32(a, b);
+}
+
+
+// CHECK: test_vpaddl_s8
+// CHECK: vpaddl.s8 d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vpaddl_s8(int8x8_t a) {
+  return vpaddl_s8(a);
+}
+
+// CHECK: test_vpaddl_s16
+// CHECK: vpaddl.s16 d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vpaddl_s16(int16x4_t a) {
+  return vpaddl_s16(a);
+}
+
+// CHECK: test_vpaddl_s32
+// CHECK: vpaddl.s32 d{{[0-9]+}}, d{{[0-9]+}}
+int64x1_t test_vpaddl_s32(int32x2_t a) {
+  return vpaddl_s32(a);
+}
+
+// CHECK: test_vpaddl_u8
+// CHECK: vpaddl.u8 d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vpaddl_u8(uint8x8_t a) {
+  return vpaddl_u8(a);
+}
+
+// CHECK: test_vpaddl_u16
+// CHECK: vpaddl.u16 d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vpaddl_u16(uint16x4_t a) {
+  return vpaddl_u16(a);
+}
+
+// CHECK: test_vpaddl_u32
+// CHECK: vpaddl.u32 d{{[0-9]+}}, d{{[0-9]+}}
+uint64x1_t test_vpaddl_u32(uint32x2_t a) {
+  return vpaddl_u32(a);
+}
+
+// CHECK: test_vpaddlq_s8
+// CHECK: vpaddl.s8 q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vpaddlq_s8(int8x16_t a) {
+  return vpaddlq_s8(a);
+}
+
+// CHECK: test_vpaddlq_s16
+// CHECK: vpaddl.s16 q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vpaddlq_s16(int16x8_t a) {
+  return vpaddlq_s16(a);
+}
+
+// CHECK: test_vpaddlq_s32
+// CHECK: vpaddl.s32 q{{[0-9]+}}, q{{[0-9]+}}
+int64x2_t test_vpaddlq_s32(int32x4_t a) {
+  return vpaddlq_s32(a);
+}
+
+// CHECK: test_vpaddlq_u8
+// CHECK: vpaddl.u8 q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vpaddlq_u8(uint8x16_t a) {
+  return vpaddlq_u8(a);
+}
+
+// CHECK: test_vpaddlq_u16
+// CHECK: vpaddl.u16 q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vpaddlq_u16(uint16x8_t a) {
+  return vpaddlq_u16(a);
+}
+
+// CHECK: test_vpaddlq_u32
+// CHECK: vpaddl.u32 q{{[0-9]+}}, q{{[0-9]+}}
+uint64x2_t test_vpaddlq_u32(uint32x4_t a) {
+  return vpaddlq_u32(a);
+}
+
+
+// CHECK: test_vpmax_s8
+// CHECK: vpmax.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) {
+  return vpmax_s8(a, b);
+}
+
+// CHECK: test_vpmax_s16
+// CHECK: vpmax.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) {
+  return vpmax_s16(a, b);
+}
+
+// CHECK: test_vpmax_s32
+// CHECK: vpmax.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) {
+  return vpmax_s32(a, b);
+}
+
+// CHECK: test_vpmax_u8
+// CHECK: vpmax.u8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) {
+  return vpmax_u8(a, b);
+}
+
+// CHECK: test_vpmax_u16
+// CHECK: vpmax.u16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) {
+  return vpmax_u16(a, b);
+}
+
+// CHECK: test_vpmax_u32
+// CHECK: vpmax.u32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) {
+  return vpmax_u32(a, b);
+}
+
+// CHECK: test_vpmax_f32
+// CHECK: vpmax.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) {
+  return vpmax_f32(a, b);
+}
+
+
+// CHECK: test_vpmin_s8
+// CHECK: vpmin.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) {
+  return vpmin_s8(a, b);
+}
+
+// CHECK: test_vpmin_s16
+// CHECK: vpmin.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) {
+  return vpmin_s16(a, b);
+}
+
+// CHECK: test_vpmin_s32
+// CHECK: vpmin.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) {
+  return vpmin_s32(a, b);
+}
+
+// CHECK: test_vpmin_u8
+// CHECK: vpmin.u8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) {
+  return vpmin_u8(a, b);
+}
+
+// CHECK: test_vpmin_u16
+// CHECK: vpmin.u16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) {
+  return vpmin_u16(a, b);
+}
+
+// CHECK: test_vpmin_u32
+// CHECK: vpmin.u32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) {
+  return vpmin_u32(a, b);
+}
+
+// CHECK: test_vpmin_f32
+// CHECK: vpmin.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) {
+  return vpmin_f32(a, b);
+}
+
+
+// CHECK: test_vqabs_s8
+// CHECK: vqabs.s8 d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vqabs_s8(int8x8_t a) {
+  return vqabs_s8(a);
+}
+
+// CHECK: test_vqabs_s16
+// CHECK: vqabs.s16 d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vqabs_s16(int16x4_t a) {
+  return vqabs_s16(a);
+}
+
+// CHECK: test_vqabs_s32
+// CHECK: vqabs.s32 d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vqabs_s32(int32x2_t a) {
+  return vqabs_s32(a);
+}
+
+// CHECK: test_vqabsq_s8
+// CHECK: vqabs.s8 q{{[0-9]+}}, q{{[0-9]+}}
+int8x16_t test_vqabsq_s8(int8x16_t a) {
+  return vqabsq_s8(a);
+}
+
+// CHECK: test_vqabsq_s16
+// CHECK: vqabs.s16 q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vqabsq_s16(int16x8_t a) {
+  return vqabsq_s16(a);
+}
+
+// CHECK: test_vqabsq_s32
+// CHECK: vqabs.s32 q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vqabsq_s32(int32x4_t a) {
+  return vqabsq_s32(a);
+}
+
+
+// CHECK: test_vqadd_s8
+// CHECK: vqadd.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) {
+  return vqadd_s8(a, b);
+}
+
+// CHECK: test_vqadd_s16
+// CHECK: vqadd.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) {
+  return vqadd_s16(a, b);
+}
+
+// CHECK: test_vqadd_s32
+// CHECK: vqadd.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) {
+  return vqadd_s32(a, b);
+}
+
+// CHECK: test_vqadd_s64
+// CHECK: vqadd.s64 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) {
+  return vqadd_s64(a, b);
+}
+
+// CHECK: test_vqadd_u8
+// CHECK: vqadd.u8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) {
+  return vqadd_u8(a, b);
+}
+
+// CHECK: test_vqadd_u16
+// CHECK: vqadd.u16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) {
+  return vqadd_u16(a, b);
+}
+
+// CHECK: test_vqadd_u32
+// CHECK: vqadd.u32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) {
+  return vqadd_u32(a, b);
+}
+
+// CHECK: test_vqadd_u64
+// CHECK: vqadd.u64 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) {
+  return vqadd_u64(a, b);
+}
+
+// CHECK: test_vqaddq_s8
+// CHECK: vqadd.s8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) {
+  return vqaddq_s8(a, b);
+}
+
+// CHECK: test_vqaddq_s16
+// CHECK: vqadd.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) {
+  return vqaddq_s16(a, b);
+}
+
+// CHECK: test_vqaddq_s32
+// CHECK: vqadd.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) {
+  return vqaddq_s32(a, b);
+}
+
+// CHECK: test_vqaddq_s64
+// CHECK: vqadd.s64 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) {
+  return vqaddq_s64(a, b);
+}
+
+// CHECK: test_vqaddq_u8
+// CHECK: vqadd.u8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) {
+  return vqaddq_u8(a, b);
+}
+
+// CHECK: test_vqaddq_u16
+// CHECK: vqadd.u16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) {
+  return vqaddq_u16(a, b);
+}
+
+// CHECK: test_vqaddq_u32
+// CHECK: vqadd.u32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) {
+  return vqaddq_u32(a, b);
+}
+
+// CHECK: test_vqaddq_u64
+// CHECK: vqadd.u64 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) {
+  return vqaddq_u64(a, b);
+}
+
+
+// CHECK: test_vqdmlal_s16
+// CHECK: vqdmlal.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
+  return vqdmlal_s16(a, b, c);
+}
+
+// CHECK: test_vqdmlal_s32
+// CHECK: vqdmlal.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
+  return vqdmlal_s32(a, b, c);
+}
+
+
+// CHECK: test_vqdmlal_lane_s16
+// CHECK: vqdmlal.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int32x4_t test_vqdmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
+  return vqdmlal_lane_s16(a, b, c, 3);
+}
+
+// CHECK: test_vqdmlal_lane_s32
+// CHECK: vqdmlal.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int64x2_t test_vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
+  return vqdmlal_lane_s32(a, b, c, 1);
+}
+
+
+// CHECK: test_vqdmlal_n_s16
+// CHECK: vqdmlal.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x4_t test_vqdmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
+  return vqdmlal_n_s16(a, b, c);
+}
+
+// CHECK: test_vqdmlal_n_s32
+// CHECK: vqdmlal.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int64x2_t test_vqdmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
+  return vqdmlal_n_s32(a, b, c);
+}
+
+
+// CHECK: test_vqdmlsl_s16
+// CHECK: vqdmlsl.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
+  return vqdmlsl_s16(a, b, c);
+}
+
+// CHECK: test_vqdmlsl_s32
+// CHECK: vqdmlsl.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
+  return vqdmlsl_s32(a, b, c);
+}
+
+
+// CHECK: test_vqdmlsl_lane_s16
+// CHECK: vqdmlsl.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int32x4_t test_vqdmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
+  return vqdmlsl_lane_s16(a, b, c, 3);
+}
+
+// CHECK: test_vqdmlsl_lane_s32
+// CHECK: vqdmlsl.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int64x2_t test_vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
+  return vqdmlsl_lane_s32(a, b, c, 1);
+}
+
+
+// CHECK: test_vqdmlsl_n_s16
+// CHECK: vqdmlsl.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x4_t test_vqdmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
+  return vqdmlsl_n_s16(a, b, c);
+}
+
+// CHECK: test_vqdmlsl_n_s32
+// CHECK: vqdmlsl.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int64x2_t test_vqdmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
+  return vqdmlsl_n_s32(a, b, c);
+}
+
+
+// CHECK: test_vqdmulh_s16
+// CHECK: vqdmulh.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vqdmulh_s16(int16x4_t a, int16x4_t b) {
+  return vqdmulh_s16(a, b);
+}
+
+// CHECK: test_vqdmulh_s32
+// CHECK: vqdmulh.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vqdmulh_s32(int32x2_t a, int32x2_t b) {
+  return vqdmulh_s32(a, b);
+}
+
+// CHECK: test_vqdmulhq_s16
+// CHECK: vqdmulh.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b) {
+  return vqdmulhq_s16(a, b);
+}
+
+// CHECK: test_vqdmulhq_s32
+// CHECK: vqdmulh.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vqdmulhq_s32(int32x4_t a, int32x4_t b) {
+  return vqdmulhq_s32(a, b);
+}
+
+
+// CHECK: test_vqdmulh_lane_s16
+// CHECK: vqdmulh.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int16x4_t test_vqdmulh_lane_s16(int16x4_t a, int16x4_t b) {
+  return vqdmulh_lane_s16(a, b, 3);
+}
+
+// CHECK: test_vqdmulh_lane_s32
+// CHECK: vqdmulh.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int32x2_t test_vqdmulh_lane_s32(int32x2_t a, int32x2_t b) {
+  return vqdmulh_lane_s32(a, b, 1);
+}
+
+// CHECK: test_vqdmulhq_lane_s16
+// CHECK: vqdmulh.s16 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int16x8_t test_vqdmulhq_lane_s16(int16x8_t a, int16x4_t b) {
+  return vqdmulhq_lane_s16(a, b, 3);
+}
+
+// CHECK: test_vqdmulhq_lane_s32
+// CHECK: vqdmulh.s32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int32x4_t test_vqdmulhq_lane_s32(int32x4_t a, int32x2_t b) {
+  return vqdmulhq_lane_s32(a, b, 1);
+}
+
+
+// CHECK: test_vqdmulh_n_s16
+// CHECK: vqdmulh.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vqdmulh_n_s16(int16x4_t a, int16_t b) {
+  return vqdmulh_n_s16(a, b);
+}
+
+// CHECK: test_vqdmulh_n_s32
+// CHECK: vqdmulh.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vqdmulh_n_s32(int32x2_t a, int32_t b) {
+  return vqdmulh_n_s32(a, b);
+}
+
+// CHECK: test_vqdmulhq_n_s16
+// CHECK: vqdmulh.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vqdmulhq_n_s16(int16x8_t a, int16_t b) {
+  return vqdmulhq_n_s16(a, b);
+}
+
+// CHECK: test_vqdmulhq_n_s32
+// CHECK: vqdmulh.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vqdmulhq_n_s32(int32x4_t a, int32_t b) {
+  return vqdmulhq_n_s32(a, b);
+}
+
+
+// CHECK: test_vqdmull_s16
+// CHECK: vqdmull.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) {
+  return vqdmull_s16(a, b);
+}
+
+// CHECK: test_vqdmull_s32
+// CHECK: vqdmull.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) {
+  return vqdmull_s32(a, b);
+}
+
+
+// CHECK: test_vqdmull_lane_s16
+// CHECK: vqdmull.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int32x4_t test_vqdmull_lane_s16(int16x4_t a, int16x4_t b) {
+  return vqdmull_lane_s16(a, b, 3);
+}
+
+// CHECK: test_vqdmull_lane_s32
+// CHECK: vqdmull.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int64x2_t test_vqdmull_lane_s32(int32x2_t a, int32x2_t b) {
+  return vqdmull_lane_s32(a, b, 1);
+}
+
+
+// CHECK: test_vqdmull_n_s16
+// CHECK: vqdmull.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x4_t test_vqdmull_n_s16(int16x4_t a, int16_t b) {
+  return vqdmull_n_s16(a, b);
+}
+
+// CHECK: test_vqdmull_n_s32
+// CHECK: vqdmull.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int64x2_t test_vqdmull_n_s32(int32x2_t a, int32_t b) {
+  return vqdmull_n_s32(a, b);
+}
+
+
+// CHECK: test_vqmovn_s16
+// CHECK: vqmovn.s16 d{{[0-9]+}}, q{{[0-9]+}}
+int8x8_t test_vqmovn_s16(int16x8_t a) {
+  return vqmovn_s16(a);
+}
+
+// CHECK: test_vqmovn_s32
+// CHECK: vqmovn.s32 d{{[0-9]+}}, q{{[0-9]+}}
+int16x4_t test_vqmovn_s32(int32x4_t a) {
+  return vqmovn_s32(a);
+}
+
+// CHECK: test_vqmovn_s64
+// CHECK: vqmovn.s64 d{{[0-9]+}}, q{{[0-9]+}}
+int32x2_t test_vqmovn_s64(int64x2_t a) {
+  return vqmovn_s64(a);
+}
+
+// CHECK: test_vqmovn_u16
+// CHECK: vqmovn.u16 d{{[0-9]+}}, q{{[0-9]+}}
+uint8x8_t test_vqmovn_u16(uint16x8_t a) {
+  return vqmovn_u16(a);
+}
+
+// CHECK: test_vqmovn_u32
+// CHECK: vqmovn.u32 d{{[0-9]+}}, q{{[0-9]+}}
+uint16x4_t test_vqmovn_u32(uint32x4_t a) {
+  return vqmovn_u32(a);
+}
+
+// CHECK: test_vqmovn_u64
+// CHECK: vqmovn.u64 d{{[0-9]+}}, q{{[0-9]+}}
+uint32x2_t test_vqmovn_u64(uint64x2_t a) {
+  return vqmovn_u64(a);
+}
+
+
+// CHECK: test_vqmovun_s16
+// CHECK: vqmovun.s16 d{{[0-9]+}}, q{{[0-9]+}}
+uint8x8_t test_vqmovun_s16(int16x8_t a) {
+  return vqmovun_s16(a);
+}
+
+// CHECK: test_vqmovun_s32
+// CHECK: vqmovun.s32 d{{[0-9]+}}, q{{[0-9]+}}
+uint16x4_t test_vqmovun_s32(int32x4_t a) {
+  return vqmovun_s32(a);
+}
+
+// CHECK: test_vqmovun_s64
+// CHECK: vqmovun.s64 d{{[0-9]+}}, q{{[0-9]+}}
+uint32x2_t test_vqmovun_s64(int64x2_t a) {
+  return vqmovun_s64(a);
+}
+
+
+// CHECK: test_vqneg_s8
+// CHECK: vqneg.s8 d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vqneg_s8(int8x8_t a) {
+  return vqneg_s8(a);
+}
+
+// CHECK: test_vqneg_s16
+// CHECK: vqneg.s16 d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vqneg_s16(int16x4_t a) {
+  return vqneg_s16(a);
+}
+
+// CHECK: test_vqneg_s32
+// CHECK: vqneg.s32 d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vqneg_s32(int32x2_t a) {
+  return vqneg_s32(a);
+}
+
+// CHECK: test_vqnegq_s8
+// CHECK: vqneg.s8 q{{[0-9]+}}, q{{[0-9]+}}
+int8x16_t test_vqnegq_s8(int8x16_t a) {
+  return vqnegq_s8(a);
+}
+
+// CHECK: test_vqnegq_s16
+// CHECK: vqneg.s16 q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vqnegq_s16(int16x8_t a) {
+  return vqnegq_s16(a);
+}
+
+// CHECK: test_vqnegq_s32
+// CHECK: vqneg.s32 q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vqnegq_s32(int32x4_t a) {
+  return vqnegq_s32(a);
+}
+
+
+// CHECK: test_vqrdmulh_s16
+// CHECK: vqrdmulh.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vqrdmulh_s16(int16x4_t a, int16x4_t b) {
+  return vqrdmulh_s16(a, b);
+}
+
+// CHECK: test_vqrdmulh_s32
+// CHECK: vqrdmulh.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vqrdmulh_s32(int32x2_t a, int32x2_t b) {
+  return vqrdmulh_s32(a, b);
+}
+
+// CHECK: test_vqrdmulhq_s16
+// CHECK: vqrdmulh.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b) {
+  return vqrdmulhq_s16(a, b);
+}
+
+// CHECK: test_vqrdmulhq_s32
+// CHECK: vqrdmulh.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vqrdmulhq_s32(int32x4_t a, int32x4_t b) {
+  return vqrdmulhq_s32(a, b);
+}
+
+
+// CHECK: test_vqrdmulh_lane_s16
+// CHECK: vqrdmulh.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int16x4_t test_vqrdmulh_lane_s16(int16x4_t a, int16x4_t b) {
+  return vqrdmulh_lane_s16(a, b, 3);
+}
+
+// CHECK: test_vqrdmulh_lane_s32
+// CHECK: vqrdmulh.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int32x2_t test_vqrdmulh_lane_s32(int32x2_t a, int32x2_t b) {
+  return vqrdmulh_lane_s32(a, b, 1);
+}
+
+// CHECK: test_vqrdmulhq_lane_s16
+// CHECK: vqrdmulh.s16 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int16x8_t test_vqrdmulhq_lane_s16(int16x8_t a, int16x4_t b) {
+  return vqrdmulhq_lane_s16(a, b, 3);
+}
+
+// CHECK: test_vqrdmulhq_lane_s32
+// CHECK: vqrdmulh.s32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+int32x4_t test_vqrdmulhq_lane_s32(int32x4_t a, int32x2_t b) {
+  return vqrdmulhq_lane_s32(a, b, 1);
+}
+
+
+// CHECK: test_vqrdmulh_n_s16
+// CHECK: vqrdmulh.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vqrdmulh_n_s16(int16x4_t a, int16_t b) {
+  return vqrdmulh_n_s16(a, b);
+}
+
+// CHECK: test_vqrdmulh_n_s32
+// CHECK: vqrdmulh.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vqrdmulh_n_s32(int32x2_t a, int32_t b) {
+  return vqrdmulh_n_s32(a, b);
+}
+
+// CHECK: test_vqrdmulhq_n_s16
+// CHECK: vqrdmulh.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vqrdmulhq_n_s16(int16x8_t a, int16_t b) {
+  return vqrdmulhq_n_s16(a, b);
+}
+
+// CHECK: test_vqrdmulhq_n_s32
+// CHECK: vqrdmulh.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vqrdmulhq_n_s32(int32x4_t a, int32_t b) {
+  return vqrdmulhq_n_s32(a, b);
+}
+
+
+// CHECK: test_vqrshl_s8
+// CHECK: vqrshl.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vqrshl_s8(int8x8_t a, int8x8_t b) {
+  return vqrshl_s8(a, b);
+}
+
+// CHECK: test_vqrshl_s16
+// CHECK: vqrshl.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vqrshl_s16(int16x4_t a, int16x4_t b) {
+  return vqrshl_s16(a, b);
+}
+
+// CHECK: test_vqrshl_s32
+// CHECK: vqrshl.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vqrshl_s32(int32x2_t a, int32x2_t b) {
+  return vqrshl_s32(a, b);
+}
+
+// CHECK: test_vqrshl_s64
+// CHECK: vqrshl.s64 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int64x1_t test_vqrshl_s64(int64x1_t a, int64x1_t b) {
+  return vqrshl_s64(a, b);
+}
+
+// CHECK: test_vqrshl_u8
+// CHECK: vqrshl.u8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vqrshl_u8(uint8x8_t a, int8x8_t b) {
+  return vqrshl_u8(a, b);
+}
+
+// CHECK: test_vqrshl_u16
+// CHECK: vqrshl.u16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vqrshl_u16(uint16x4_t a, int16x4_t b) {
+  return vqrshl_u16(a, b);
+}
+
+// CHECK: test_vqrshl_u32
+// CHECK: vqrshl.u32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vqrshl_u32(uint32x2_t a, int32x2_t b) {
+  return vqrshl_u32(a, b);
+}
+
+// CHECK: test_vqrshl_u64
+// CHECK: vqrshl.u64 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint64x1_t test_vqrshl_u64(uint64x1_t a, int64x1_t b) {
+  return vqrshl_u64(a, b);
+}
+
+// CHECK: test_vqrshlq_s8
+// CHECK: vqrshl.s8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int8x16_t test_vqrshlq_s8(int8x16_t a, int8x16_t b) {
+  return vqrshlq_s8(a, b);
+}
+
+// CHECK: test_vqrshlq_s16
+// CHECK: vqrshl.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vqrshlq_s16(int16x8_t a, int16x8_t b) {
+  return vqrshlq_s16(a, b);
+}
+
+// CHECK: test_vqrshlq_s32
+// CHECK: vqrshl.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vqrshlq_s32(int32x4_t a, int32x4_t b) {
+  return vqrshlq_s32(a, b);
+}
+
+// CHECK: test_vqrshlq_s64
+// CHECK: vqrshl.s64 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int64x2_t test_vqrshlq_s64(int64x2_t a, int64x2_t b) {
+  return vqrshlq_s64(a, b);
+}
+
+// CHECK: test_vqrshlq_u8
+// CHECK: vqrshl.u8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vqrshlq_u8(uint8x16_t a, int8x16_t b) {
+  return vqrshlq_u8(a, b);
+}
+
+// CHECK: test_vqrshlq_u16
+// CHECK: vqrshl.u16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vqrshlq_u16(uint16x8_t a, int16x8_t b) {
+  return vqrshlq_u16(a, b);
+}
+
+// CHECK: test_vqrshlq_u32
+// CHECK: vqrshl.u32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vqrshlq_u32(uint32x4_t a, int32x4_t b) {
+  return vqrshlq_u32(a, b);
+}
+
+// CHECK: test_vqrshlq_u64
+// CHECK: vqrshl.u64 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) {
+  return vqrshlq_u64(a, b);
+}
+
+
+// CHECK: test_vqrshrn_n_s16
+// CHECK: vqrshrn.s16 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int8x8_t test_vqrshrn_n_s16(int16x8_t a) {
+  return vqrshrn_n_s16(a, 1);
+}
+
+// CHECK: test_vqrshrn_n_s32
+// CHECK: vqrshrn.s32 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int16x4_t test_vqrshrn_n_s32(int32x4_t a) {
+  return vqrshrn_n_s32(a, 1);
+}
+
+// CHECK: test_vqrshrn_n_s64
+// CHECK: vqrshrn.s64 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int32x2_t test_vqrshrn_n_s64(int64x2_t a) {
+  return vqrshrn_n_s64(a, 1);
+}
+
+// CHECK: test_vqrshrn_n_u16
+// CHECK: vqrshrn.u16 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) {
+  return vqrshrn_n_u16(a, 1);
+}
+
+// CHECK: test_vqrshrn_n_u32
+// CHECK: vqrshrn.u32 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) {
+  return vqrshrn_n_u32(a, 1);
+}
+
+// CHECK: test_vqrshrn_n_u64
+// CHECK: vqrshrn.u64 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) {
+  return vqrshrn_n_u64(a, 1);
+}
+
+
+// CHECK: test_vqrshrun_n_s16
+// CHECK: vqrshrun.s16 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint8x8_t test_vqrshrun_n_s16(int16x8_t a) {
+  return vqrshrun_n_s16(a, 1);
+}
+
+// CHECK: test_vqrshrun_n_s32
+// CHECK: vqrshrun.s32 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint16x4_t test_vqrshrun_n_s32(int32x4_t a) {
+  return vqrshrun_n_s32(a, 1);
+}
+
+// CHECK: test_vqrshrun_n_s64
+// CHECK: vqrshrun.s64 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint32x2_t test_vqrshrun_n_s64(int64x2_t a) {
+  return vqrshrun_n_s64(a, 1);
+}
+
+
+// CHECK: test_vqshl_s8
+// CHECK: vqshl.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) {
+  return vqshl_s8(a, b);
+}
+
+// CHECK: test_vqshl_s16
+// CHECK: vqshl.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) {
+  return vqshl_s16(a, b);
+}
+
+// CHECK: test_vqshl_s32
+// CHECK: vqshl.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) {
+  return vqshl_s32(a, b);
+}
+
+// CHECK: test_vqshl_s64
+// CHECK: vqshl.s64 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) {
+  return vqshl_s64(a, b);
+}
+
+// CHECK: test_vqshl_u8
+// CHECK: vqshl.u8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) {
+  return vqshl_u8(a, b);
+}
+
+// CHECK: test_vqshl_u16
+// CHECK: vqshl.u16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) {
+  return vqshl_u16(a, b);
+}
+
+// CHECK: test_vqshl_u32
+// CHECK: vqshl.u32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) {
+  return vqshl_u32(a, b);
+}
+
+// CHECK: test_vqshl_u64
+// CHECK: vqshl.u64 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) {
+  return vqshl_u64(a, b);
+}
+
+// CHECK: test_vqshlq_s8
+// CHECK: vqshl.s8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) {
+  return vqshlq_s8(a, b);
+}
+
+// CHECK: test_vqshlq_s16
+// CHECK: vqshl.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) {
+  return vqshlq_s16(a, b);
+}
+
+// CHECK: test_vqshlq_s32
+// CHECK: vqshl.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) {
+  return vqshlq_s32(a, b);
+}
+
+// CHECK: test_vqshlq_s64
+// CHECK: vqshl.s64 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) {
+  return vqshlq_s64(a, b);
+}
+
+// CHECK: test_vqshlq_u8
+// CHECK: vqshl.u8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) {
+  return vqshlq_u8(a, b);
+}
+
+// CHECK: test_vqshlq_u16
+// CHECK: vqshl.u16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vqshlq_u16(uint16x8_t a, int16x8_t b) {
+  return vqshlq_u16(a, b);
+}
+
+// CHECK: test_vqshlq_u32
+// CHECK: vqshl.u32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vqshlq_u32(uint32x4_t a, int32x4_t b) {
+  return vqshlq_u32(a, b);
+}
+
+// CHECK: test_vqshlq_u64
+// CHECK: vqshl.u64 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) {
+  return vqshlq_u64(a, b);
+}
+
+
+// CHECK: test_vqshlu_n_s8
+// CHECK: vqshlu.s8 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint8x8_t test_vqshlu_n_s8(int8x8_t a) {
+  return vqshlu_n_s8(a, 1);
+}
+
+// CHECK: test_vqshlu_n_s16
+// CHECK: vqshlu.s16 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint16x4_t test_vqshlu_n_s16(int16x4_t a) {
+  return vqshlu_n_s16(a, 1);
+}
+
+// CHECK: test_vqshlu_n_s32
+// CHECK: vqshlu.s32 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint32x2_t test_vqshlu_n_s32(int32x2_t a) {
+  return vqshlu_n_s32(a, 1);
+}
+
+// CHECK: test_vqshlu_n_s64
+// CHECK: vqshlu.s64 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint64x1_t test_vqshlu_n_s64(int64x1_t a) {
+  return vqshlu_n_s64(a, 1);
+}
+
+// CHECK: test_vqshluq_n_s8
+// CHECK: vqshlu.s8 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint8x16_t test_vqshluq_n_s8(int8x16_t a) {
+  return vqshluq_n_s8(a, 1);
+}
+
+// CHECK: test_vqshluq_n_s16
+// CHECK: vqshlu.s16 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint16x8_t test_vqshluq_n_s16(int16x8_t a) {
+  return vqshluq_n_s16(a, 1);
+}
+
+// CHECK: test_vqshluq_n_s32
+// CHECK: vqshlu.s32 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint32x4_t test_vqshluq_n_s32(int32x4_t a) {
+  return vqshluq_n_s32(a, 1);
+}
+
+// CHECK: test_vqshluq_n_s64
+// CHECK: vqshlu.s64 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint64x2_t test_vqshluq_n_s64(int64x2_t a) {
+  return vqshluq_n_s64(a, 1);
+}
+
+
+// CHECK: test_vqshl_n_s8
+// CHECK: vqshl.s8 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int8x8_t test_vqshl_n_s8(int8x8_t a) {
+  return vqshl_n_s8(a, 1);
+}
+
+// CHECK: test_vqshl_n_s16
+// CHECK: vqshl.s16 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int16x4_t test_vqshl_n_s16(int16x4_t a) {
+  return vqshl_n_s16(a, 1);
+}
+
+// CHECK: test_vqshl_n_s32
+// CHECK: vqshl.s32 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int32x2_t test_vqshl_n_s32(int32x2_t a) {
+  return vqshl_n_s32(a, 1);
+}
+
+// CHECK: test_vqshl_n_s64
+// CHECK: vqshl.s64 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int64x1_t test_vqshl_n_s64(int64x1_t a) {
+  return vqshl_n_s64(a, 1);
+}
+
+// CHECK: test_vqshl_n_u8
+// CHECK: vqshl.u8 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint8x8_t test_vqshl_n_u8(uint8x8_t a) {
+  return vqshl_n_u8(a, 1);
+}
+
+// CHECK: test_vqshl_n_u16
+// CHECK: vqshl.u16 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint16x4_t test_vqshl_n_u16(uint16x4_t a) {
+  return vqshl_n_u16(a, 1);
+}
+
+// CHECK: test_vqshl_n_u32
+// CHECK: vqshl.u32 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint32x2_t test_vqshl_n_u32(uint32x2_t a) {
+  return vqshl_n_u32(a, 1);
+}
+
+// CHECK: test_vqshl_n_u64
+// CHECK: vqshl.u64 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint64x1_t test_vqshl_n_u64(uint64x1_t a) {
+  return vqshl_n_u64(a, 1);
+}
+
+// CHECK: test_vqshlq_n_s8
+// CHECK: vqshl.s8 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int8x16_t test_vqshlq_n_s8(int8x16_t a) {
+  return vqshlq_n_s8(a, 1);
+}
+
+// CHECK: test_vqshlq_n_s16
+// CHECK: vqshl.s16 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int16x8_t test_vqshlq_n_s16(int16x8_t a) {
+  return vqshlq_n_s16(a, 1);
+}
+
+// CHECK: test_vqshlq_n_s32
+// CHECK: vqshl.s32 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int32x4_t test_vqshlq_n_s32(int32x4_t a) {
+  return vqshlq_n_s32(a, 1);
+}
+
+// CHECK: test_vqshlq_n_s64
+// CHECK: vqshl.s64 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int64x2_t test_vqshlq_n_s64(int64x2_t a) {
+  return vqshlq_n_s64(a, 1);
+}
+
+// CHECK: test_vqshlq_n_u8
+// CHECK: vqshl.u8 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint8x16_t test_vqshlq_n_u8(uint8x16_t a) {
+  return vqshlq_n_u8(a, 1);
+}
+
+// CHECK: test_vqshlq_n_u16
+// CHECK: vqshl.u16 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint16x8_t test_vqshlq_n_u16(uint16x8_t a) {
+  return vqshlq_n_u16(a, 1);
+}
+
+// CHECK: test_vqshlq_n_u32
+// CHECK: vqshl.u32 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint32x4_t test_vqshlq_n_u32(uint32x4_t a) {
+  return vqshlq_n_u32(a, 1);
+}
+
+// CHECK: test_vqshlq_n_u64
+// CHECK: vqshl.u64 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint64x2_t test_vqshlq_n_u64(uint64x2_t a) {
+  return vqshlq_n_u64(a, 1);
+}
+
+
+// CHECK: test_vqshrn_n_s16
+// CHECK: vqshrn.s16 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int8x8_t test_vqshrn_n_s16(int16x8_t a) {
+  return vqshrn_n_s16(a, 1);
+}
+
+// CHECK: test_vqshrn_n_s32
+// CHECK: vqshrn.s32 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int16x4_t test_vqshrn_n_s32(int32x4_t a) {
+  return vqshrn_n_s32(a, 1);
+}
+
+// CHECK: test_vqshrn_n_s64
+// CHECK: vqshrn.s64 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int32x2_t test_vqshrn_n_s64(int64x2_t a) {
+  return vqshrn_n_s64(a, 1);
+}
+
+// CHECK: test_vqshrn_n_u16
+// CHECK: vqshrn.u16 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint8x8_t test_vqshrn_n_u16(uint16x8_t a) {
+  return vqshrn_n_u16(a, 1);
+}
+
+// CHECK: test_vqshrn_n_u32
+// CHECK: vqshrn.u32 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint16x4_t test_vqshrn_n_u32(uint32x4_t a) {
+  return vqshrn_n_u32(a, 1);
+}
+
+// CHECK: test_vqshrn_n_u64
+// CHECK: vqshrn.u64 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint32x2_t test_vqshrn_n_u64(uint64x2_t a) {
+  return vqshrn_n_u64(a, 1);
+}
+
+
+// CHECK: test_vqshrun_n_s16
+// CHECK: vqshrun.s16 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint8x8_t test_vqshrun_n_s16(int16x8_t a) {
+  return vqshrun_n_s16(a, 1);
+}
+
+// CHECK: test_vqshrun_n_s32
+// CHECK: vqshrun.s32 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint16x4_t test_vqshrun_n_s32(int32x4_t a) {
+  return vqshrun_n_s32(a, 1);
+}
+
+// CHECK: test_vqshrun_n_s64
+// CHECK: vqshrun.s64 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint32x2_t test_vqshrun_n_s64(int64x2_t a) {
+  return vqshrun_n_s64(a, 1);
+}
+
+
+// CHECK: test_vqsub_s8
+// CHECK: vqsub.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) {
+  return vqsub_s8(a, b);
+}
+
+// CHECK: test_vqsub_s16
+// CHECK: vqsub.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) {
+  return vqsub_s16(a, b);
+}
+
+// CHECK: test_vqsub_s32
+// CHECK: vqsub.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) {
+  return vqsub_s32(a, b);
+}
+
+// CHECK: test_vqsub_s64
+// CHECK: vqsub.s64 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) {
+  return vqsub_s64(a, b);
+}
+
+// CHECK: test_vqsub_u8
+// CHECK: vqsub.u8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) {
+  return vqsub_u8(a, b);
+}
+
+// CHECK: test_vqsub_u16
+// CHECK: vqsub.u16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) {
+  return vqsub_u16(a, b);
+}
+
+// CHECK: test_vqsub_u32
+// CHECK: vqsub.u32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) {
+  return vqsub_u32(a, b);
+}
+
+// CHECK: test_vqsub_u64
+// CHECK: vqsub.u64 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) {
+  return vqsub_u64(a, b);
+}
+
+// CHECK: test_vqsubq_s8
+// CHECK: vqsub.s8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) {
+  return vqsubq_s8(a, b);
+}
+
+// CHECK: test_vqsubq_s16
+// CHECK: vqsub.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) {
+  return vqsubq_s16(a, b);
+}
+
+// CHECK: test_vqsubq_s32
+// CHECK: vqsub.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) {
+  return vqsubq_s32(a, b);
+}
+
+// CHECK: test_vqsubq_s64
+// CHECK: vqsub.s64 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) {
+  return vqsubq_s64(a, b);
+}
+
+// CHECK: test_vqsubq_u8
+// CHECK: vqsub.u8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) {
+  return vqsubq_u8(a, b);
+}
+
+// CHECK: test_vqsubq_u16
+// CHECK: vqsub.u16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) {
+  return vqsubq_u16(a, b);
+}
+
+// CHECK: test_vqsubq_u32
+// CHECK: vqsub.u32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) {
+  return vqsubq_u32(a, b);
+}
+
+// CHECK: test_vqsubq_u64
+// CHECK: vqsub.u64 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) {
+  return vqsubq_u64(a, b);
+}
+
+
+// CHECK: test_vraddhn_s16
+// CHECK: vraddhn.i16 d{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) {
+  return vraddhn_s16(a, b);
+}
+
+// CHECK: test_vraddhn_s32
+// CHECK: vraddhn.i32 d{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) {
+  return vraddhn_s32(a, b);
+}
+
+// CHECK: test_vraddhn_s64
+// CHECK: vraddhn.i64 d{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) {
+  return vraddhn_s64(a, b);
+}
+
+// CHECK: test_vraddhn_u16
+// CHECK: vraddhn.i16 d{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) {
+  return vraddhn_u16(a, b);
+}
+
+// CHECK: test_vraddhn_u32
+// CHECK: vraddhn.i32 d{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) {
+  return vraddhn_u32(a, b);
+}
+
+// CHECK: test_vraddhn_u64
+// CHECK: vraddhn.i64 d{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) {
+  return vraddhn_u64(a, b);
+}
+
+
+// CHECK: test_vrecpe_f32
+// CHECK: vrecpe.f32 d{{[0-9]+}}, d{{[0-9]+}}
+float32x2_t test_vrecpe_f32(float32x2_t a) {
+  return vrecpe_f32(a);
+}
+
+// CHECK: test_vrecpe_u32
+// CHECK: vrecpe.u32 d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vrecpe_u32(uint32x2_t a) {
+  return vrecpe_u32(a);
+}
+
+// CHECK: test_vrecpeq_f32
+// CHECK: vrecpe.f32 q{{[0-9]+}}, q{{[0-9]+}}
+float32x4_t test_vrecpeq_f32(float32x4_t a) {
+  return vrecpeq_f32(a);
+}
+
+// CHECK: test_vrecpeq_u32
+// CHECK: vrecpe.u32 q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vrecpeq_u32(uint32x4_t a) {
+  return vrecpeq_u32(a);
+}
+
+
+// CHECK: test_vrecps_f32
+// CHECK: vrecps.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+float32x2_t test_vrecps_f32(float32x2_t a, float32x2_t b) {
+  return vrecps_f32(a, b);
+}
+
+// CHECK: test_vrecpsq_f32
+// CHECK: vrecps.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+float32x4_t test_vrecpsq_f32(float32x4_t a, float32x4_t b) {
+  return vrecpsq_f32(a, b);
+}
+
+
+// CHECK: test_vreinterpret_s8_s16
+int8x8_t test_vreinterpret_s8_s16(int16x4_t a) {
+  return vreinterpret_s8_s16(a);
+}
+
+// CHECK: test_vreinterpret_s8_s32
+int8x8_t test_vreinterpret_s8_s32(int32x2_t a) {
+  return vreinterpret_s8_s32(a);
+}
+
+// CHECK: test_vreinterpret_s8_s64
+int8x8_t test_vreinterpret_s8_s64(int64x1_t a) {
+  return vreinterpret_s8_s64(a);
+}
+
+// CHECK: test_vreinterpret_s8_u8
+int8x8_t test_vreinterpret_s8_u8(uint8x8_t a) {
+  return vreinterpret_s8_u8(a);
+}
+
+// CHECK: test_vreinterpret_s8_u16
+int8x8_t test_vreinterpret_s8_u16(uint16x4_t a) {
+  return vreinterpret_s8_u16(a);
+}
+
+// CHECK: test_vreinterpret_s8_u32
+int8x8_t test_vreinterpret_s8_u32(uint32x2_t a) {
+  return vreinterpret_s8_u32(a);
+}
+
+// CHECK: test_vreinterpret_s8_u64
+int8x8_t test_vreinterpret_s8_u64(uint64x1_t a) {
+  return vreinterpret_s8_u64(a);
+}
+
+// CHECK: test_vreinterpret_s8_f16
+int8x8_t test_vreinterpret_s8_f16(float16x4_t a) {
+  return vreinterpret_s8_f16(a);
+}
+
+// CHECK: test_vreinterpret_s8_f32
+int8x8_t test_vreinterpret_s8_f32(float32x2_t a) {
+  return vreinterpret_s8_f32(a);
+}
+
+// CHECK: test_vreinterpret_s8_p8
+int8x8_t test_vreinterpret_s8_p8(poly8x8_t a) {
+  return vreinterpret_s8_p8(a);
+}
+
+// CHECK: test_vreinterpret_s8_p16
+int8x8_t test_vreinterpret_s8_p16(poly16x4_t a) {
+  return vreinterpret_s8_p16(a);
+}
+
+// CHECK: test_vreinterpret_s16_s8
+int16x4_t test_vreinterpret_s16_s8(int8x8_t a) {
+  return vreinterpret_s16_s8(a);
+}
+
+// CHECK: test_vreinterpret_s16_s32
+int16x4_t test_vreinterpret_s16_s32(int32x2_t a) {
+  return vreinterpret_s16_s32(a);
+}
+
+// CHECK: test_vreinterpret_s16_s64
+int16x4_t test_vreinterpret_s16_s64(int64x1_t a) {
+  return vreinterpret_s16_s64(a);
+}
+
+// CHECK: test_vreinterpret_s16_u8
+int16x4_t test_vreinterpret_s16_u8(uint8x8_t a) {
+  return vreinterpret_s16_u8(a);
+}
+
+// CHECK: test_vreinterpret_s16_u16
+int16x4_t test_vreinterpret_s16_u16(uint16x4_t a) {
+  return vreinterpret_s16_u16(a);
+}
+
+// CHECK: test_vreinterpret_s16_u32
+int16x4_t test_vreinterpret_s16_u32(uint32x2_t a) {
+  return vreinterpret_s16_u32(a);
+}
+
+// CHECK: test_vreinterpret_s16_u64
+int16x4_t test_vreinterpret_s16_u64(uint64x1_t a) {
+  return vreinterpret_s16_u64(a);
+}
+
+// CHECK: test_vreinterpret_s16_f16
+int16x4_t test_vreinterpret_s16_f16(float16x4_t a) {
+  return vreinterpret_s16_f16(a);
+}
+
+// CHECK: test_vreinterpret_s16_f32
+int16x4_t test_vreinterpret_s16_f32(float32x2_t a) {
+  return vreinterpret_s16_f32(a);
+}
+
+// CHECK: test_vreinterpret_s16_p8
+int16x4_t test_vreinterpret_s16_p8(poly8x8_t a) {
+  return vreinterpret_s16_p8(a);
+}
+
+// CHECK: test_vreinterpret_s16_p16
+int16x4_t test_vreinterpret_s16_p16(poly16x4_t a) {
+  return vreinterpret_s16_p16(a);
+}
+
+// CHECK: test_vreinterpret_s32_s8
+int32x2_t test_vreinterpret_s32_s8(int8x8_t a) {
+  return vreinterpret_s32_s8(a);
+}
+
+// CHECK: test_vreinterpret_s32_s16
+int32x2_t test_vreinterpret_s32_s16(int16x4_t a) {
+  return vreinterpret_s32_s16(a);
+}
+
+// CHECK: test_vreinterpret_s32_s64
+int32x2_t test_vreinterpret_s32_s64(int64x1_t a) {
+  return vreinterpret_s32_s64(a);
+}
+
+// CHECK: test_vreinterpret_s32_u8
+int32x2_t test_vreinterpret_s32_u8(uint8x8_t a) {
+  return vreinterpret_s32_u8(a);
+}
+
+// CHECK: test_vreinterpret_s32_u16
+int32x2_t test_vreinterpret_s32_u16(uint16x4_t a) {
+  return vreinterpret_s32_u16(a);
+}
+
+// CHECK: test_vreinterpret_s32_u32
+int32x2_t test_vreinterpret_s32_u32(uint32x2_t a) {
+  return vreinterpret_s32_u32(a);
+}
+
+// CHECK: test_vreinterpret_s32_u64
+int32x2_t test_vreinterpret_s32_u64(uint64x1_t a) {
+  return vreinterpret_s32_u64(a);
+}
+
+// CHECK: test_vreinterpret_s32_f16
+int32x2_t test_vreinterpret_s32_f16(float16x4_t a) {
+  return vreinterpret_s32_f16(a);
+}
+
+// CHECK: test_vreinterpret_s32_f32
+int32x2_t test_vreinterpret_s32_f32(float32x2_t a) {
+  return vreinterpret_s32_f32(a);
+}
+
+// CHECK: test_vreinterpret_s32_p8
+int32x2_t test_vreinterpret_s32_p8(poly8x8_t a) {
+  return vreinterpret_s32_p8(a);
+}
+
+// CHECK: test_vreinterpret_s32_p16
+int32x2_t test_vreinterpret_s32_p16(poly16x4_t a) {
+  return vreinterpret_s32_p16(a);
+}
+
+// CHECK: test_vreinterpret_s64_s8
+int64x1_t test_vreinterpret_s64_s8(int8x8_t a) {
+  return vreinterpret_s64_s8(a);
+}
+
+// CHECK: test_vreinterpret_s64_s16
+int64x1_t test_vreinterpret_s64_s16(int16x4_t a) {
+  return vreinterpret_s64_s16(a);
+}
+
+// CHECK: test_vreinterpret_s64_s32
+int64x1_t test_vreinterpret_s64_s32(int32x2_t a) {
+  return vreinterpret_s64_s32(a);
+}
+
+// CHECK: test_vreinterpret_s64_u8
+int64x1_t test_vreinterpret_s64_u8(uint8x8_t a) {
+  return vreinterpret_s64_u8(a);
+}
+
+// CHECK: test_vreinterpret_s64_u16
+int64x1_t test_vreinterpret_s64_u16(uint16x4_t a) {
+  return vreinterpret_s64_u16(a);
+}
+
+// CHECK: test_vreinterpret_s64_u32
+int64x1_t test_vreinterpret_s64_u32(uint32x2_t a) {
+  return vreinterpret_s64_u32(a);
+}
+
+// CHECK: test_vreinterpret_s64_u64
+int64x1_t test_vreinterpret_s64_u64(uint64x1_t a) {
+  return vreinterpret_s64_u64(a);
+}
+
+// CHECK: test_vreinterpret_s64_f16
+int64x1_t test_vreinterpret_s64_f16(float16x4_t a) {
+  return vreinterpret_s64_f16(a);
+}
+
+// CHECK: test_vreinterpret_s64_f32
+int64x1_t test_vreinterpret_s64_f32(float32x2_t a) {
+  return vreinterpret_s64_f32(a);
+}
+
+// CHECK: test_vreinterpret_s64_p8
+int64x1_t test_vreinterpret_s64_p8(poly8x8_t a) {
+  return vreinterpret_s64_p8(a);
+}
+
+// CHECK: test_vreinterpret_s64_p16
+int64x1_t test_vreinterpret_s64_p16(poly16x4_t a) {
+  return vreinterpret_s64_p16(a);
+}
+
+// CHECK: test_vreinterpret_u8_s8
+uint8x8_t test_vreinterpret_u8_s8(int8x8_t a) {
+  return vreinterpret_u8_s8(a);
+}
+
+// CHECK: test_vreinterpret_u8_s16
+uint8x8_t test_vreinterpret_u8_s16(int16x4_t a) {
+  return vreinterpret_u8_s16(a);
+}
+
+// CHECK: test_vreinterpret_u8_s32
+uint8x8_t test_vreinterpret_u8_s32(int32x2_t a) {
+  return vreinterpret_u8_s32(a);
+}
+
+// CHECK: test_vreinterpret_u8_s64
+uint8x8_t test_vreinterpret_u8_s64(int64x1_t a) {
+  return vreinterpret_u8_s64(a);
+}
+
+// CHECK: test_vreinterpret_u8_u16
+uint8x8_t test_vreinterpret_u8_u16(uint16x4_t a) {
+  return vreinterpret_u8_u16(a);
+}
+
+// CHECK: test_vreinterpret_u8_u32
+uint8x8_t test_vreinterpret_u8_u32(uint32x2_t a) {
+  return vreinterpret_u8_u32(a);
+}
+
+// CHECK: test_vreinterpret_u8_u64
+uint8x8_t test_vreinterpret_u8_u64(uint64x1_t a) {
+  return vreinterpret_u8_u64(a);
+}
+
+// CHECK: test_vreinterpret_u8_f16
+uint8x8_t test_vreinterpret_u8_f16(float16x4_t a) {
+  return vreinterpret_u8_f16(a);
+}
+
+// CHECK: test_vreinterpret_u8_f32
+uint8x8_t test_vreinterpret_u8_f32(float32x2_t a) {
+  return vreinterpret_u8_f32(a);
+}
+
+// CHECK: test_vreinterpret_u8_p8
+uint8x8_t test_vreinterpret_u8_p8(poly8x8_t a) {
+  return vreinterpret_u8_p8(a);
+}
+
+// CHECK: test_vreinterpret_u8_p16
+uint8x8_t test_vreinterpret_u8_p16(poly16x4_t a) {
+  return vreinterpret_u8_p16(a);
+}
+
+// CHECK: test_vreinterpret_u16_s8
+uint16x4_t test_vreinterpret_u16_s8(int8x8_t a) {
+  return vreinterpret_u16_s8(a);
+}
+
+// CHECK: test_vreinterpret_u16_s16
+uint16x4_t test_vreinterpret_u16_s16(int16x4_t a) {
+  return vreinterpret_u16_s16(a);
+}
+
+// CHECK: test_vreinterpret_u16_s32
+uint16x4_t test_vreinterpret_u16_s32(int32x2_t a) {
+  return vreinterpret_u16_s32(a);
+}
+
+// CHECK: test_vreinterpret_u16_s64
+uint16x4_t test_vreinterpret_u16_s64(int64x1_t a) {
+  return vreinterpret_u16_s64(a);
+}
+
+// CHECK: test_vreinterpret_u16_u8
+uint16x4_t test_vreinterpret_u16_u8(uint8x8_t a) {
+  return vreinterpret_u16_u8(a);
+}
+
+// CHECK: test_vreinterpret_u16_u32
+uint16x4_t test_vreinterpret_u16_u32(uint32x2_t a) {
+  return vreinterpret_u16_u32(a);
+}
+
+// CHECK: test_vreinterpret_u16_u64
+uint16x4_t test_vreinterpret_u16_u64(uint64x1_t a) {
+  return vreinterpret_u16_u64(a);
+}
+
+// CHECK: test_vreinterpret_u16_f16
+uint16x4_t test_vreinterpret_u16_f16(float16x4_t a) {
+  return vreinterpret_u16_f16(a);
+}
+
+// CHECK: test_vreinterpret_u16_f32
+uint16x4_t test_vreinterpret_u16_f32(float32x2_t a) {
+  return vreinterpret_u16_f32(a);
+}
+
+// CHECK: test_vreinterpret_u16_p8
+uint16x4_t test_vreinterpret_u16_p8(poly8x8_t a) {
+  return vreinterpret_u16_p8(a);
+}
+
+// CHECK: test_vreinterpret_u16_p16
+uint16x4_t test_vreinterpret_u16_p16(poly16x4_t a) {
+  return vreinterpret_u16_p16(a);
+}
+
+// CHECK: test_vreinterpret_u32_s8
+uint32x2_t test_vreinterpret_u32_s8(int8x8_t a) {
+  return vreinterpret_u32_s8(a);
+}
+
+// CHECK: test_vreinterpret_u32_s16
+uint32x2_t test_vreinterpret_u32_s16(int16x4_t a) {
+  return vreinterpret_u32_s16(a);
+}
+
+// CHECK: test_vreinterpret_u32_s32
+uint32x2_t test_vreinterpret_u32_s32(int32x2_t a) {
+  return vreinterpret_u32_s32(a);
+}
+
+// CHECK: test_vreinterpret_u32_s64
+uint32x2_t test_vreinterpret_u32_s64(int64x1_t a) {
+  return vreinterpret_u32_s64(a);
+}
+
+// CHECK: test_vreinterpret_u32_u8
+uint32x2_t test_vreinterpret_u32_u8(uint8x8_t a) {
+  return vreinterpret_u32_u8(a);
+}
+
+// CHECK: test_vreinterpret_u32_u16
+uint32x2_t test_vreinterpret_u32_u16(uint16x4_t a) {
+  return vreinterpret_u32_u16(a);
+}
+
+// CHECK: test_vreinterpret_u32_u64
+uint32x2_t test_vreinterpret_u32_u64(uint64x1_t a) {
+  return vreinterpret_u32_u64(a);
+}
+
+// CHECK: test_vreinterpret_u32_f16
+uint32x2_t test_vreinterpret_u32_f16(float16x4_t a) {
+  return vreinterpret_u32_f16(a);
+}
+
+// CHECK: test_vreinterpret_u32_f32
+uint32x2_t test_vreinterpret_u32_f32(float32x2_t a) {
+  return vreinterpret_u32_f32(a);
+}
+
+// CHECK: test_vreinterpret_u32_p8
+uint32x2_t test_vreinterpret_u32_p8(poly8x8_t a) {
+  return vreinterpret_u32_p8(a);
+}
+
+// CHECK: test_vreinterpret_u32_p16
+uint32x2_t test_vreinterpret_u32_p16(poly16x4_t a) {
+  return vreinterpret_u32_p16(a);
+}
+
+// CHECK: test_vreinterpret_u64_s8
+uint64x1_t test_vreinterpret_u64_s8(int8x8_t a) {
+  return vreinterpret_u64_s8(a);
+}
+
+// CHECK: test_vreinterpret_u64_s16
+uint64x1_t test_vreinterpret_u64_s16(int16x4_t a) {
+  return vreinterpret_u64_s16(a);
+}
+
+// CHECK: test_vreinterpret_u64_s32
+uint64x1_t test_vreinterpret_u64_s32(int32x2_t a) {
+  return vreinterpret_u64_s32(a);
+}
+
+// CHECK: test_vreinterpret_u64_s64
+uint64x1_t test_vreinterpret_u64_s64(int64x1_t a) {
+  return vreinterpret_u64_s64(a);
+}
+
+// CHECK: test_vreinterpret_u64_u8
+uint64x1_t test_vreinterpret_u64_u8(uint8x8_t a) {
+  return vreinterpret_u64_u8(a);
+}
+
+// CHECK: test_vreinterpret_u64_u16
+uint64x1_t test_vreinterpret_u64_u16(uint16x4_t a) {
+  return vreinterpret_u64_u16(a);
+}
+
+// CHECK: test_vreinterpret_u64_u32
+uint64x1_t test_vreinterpret_u64_u32(uint32x2_t a) {
+  return vreinterpret_u64_u32(a);
+}
+
+// CHECK: test_vreinterpret_u64_f16
+uint64x1_t test_vreinterpret_u64_f16(float16x4_t a) {
+  return vreinterpret_u64_f16(a);
+}
+
+// CHECK: test_vreinterpret_u64_f32
+uint64x1_t test_vreinterpret_u64_f32(float32x2_t a) {
+  return vreinterpret_u64_f32(a);
+}
+
+// CHECK: test_vreinterpret_u64_p8
+uint64x1_t test_vreinterpret_u64_p8(poly8x8_t a) {
+  return vreinterpret_u64_p8(a);
+}
+
+// CHECK: test_vreinterpret_u64_p16
+uint64x1_t test_vreinterpret_u64_p16(poly16x4_t a) {
+  return vreinterpret_u64_p16(a);
+}
+
+// CHECK: test_vreinterpret_f16_s8
+float16x4_t test_vreinterpret_f16_s8(int8x8_t a) {
+  return vreinterpret_f16_s8(a);
+}
+
+// CHECK: test_vreinterpret_f16_s16
+float16x4_t test_vreinterpret_f16_s16(int16x4_t a) {
+  return vreinterpret_f16_s16(a);
+}
+
+// CHECK: test_vreinterpret_f16_s32
+float16x4_t test_vreinterpret_f16_s32(int32x2_t a) {
+  return vreinterpret_f16_s32(a);
+}
+
+// CHECK: test_vreinterpret_f16_s64
+float16x4_t test_vreinterpret_f16_s64(int64x1_t a) {
+  return vreinterpret_f16_s64(a);
+}
+
+// CHECK: test_vreinterpret_f16_u8
+float16x4_t test_vreinterpret_f16_u8(uint8x8_t a) {
+  return vreinterpret_f16_u8(a);
+}
+
+// CHECK: test_vreinterpret_f16_u16
+float16x4_t test_vreinterpret_f16_u16(uint16x4_t a) {
+  return vreinterpret_f16_u16(a);
+}
+
+// CHECK: test_vreinterpret_f16_u32
+float16x4_t test_vreinterpret_f16_u32(uint32x2_t a) {
+  return vreinterpret_f16_u32(a);
+}
+
+// CHECK: test_vreinterpret_f16_u64
+float16x4_t test_vreinterpret_f16_u64(uint64x1_t a) {
+  return vreinterpret_f16_u64(a);
+}
+
+// CHECK: test_vreinterpret_f16_f32
+float16x4_t test_vreinterpret_f16_f32(float32x2_t a) {
+  return vreinterpret_f16_f32(a);
+}
+
+// CHECK: test_vreinterpret_f16_p8
+float16x4_t test_vreinterpret_f16_p8(poly8x8_t a) {
+  return vreinterpret_f16_p8(a);
+}
+
+// CHECK: test_vreinterpret_f16_p16
+float16x4_t test_vreinterpret_f16_p16(poly16x4_t a) {
+  return vreinterpret_f16_p16(a);
+}
+
+// CHECK: test_vreinterpret_f32_s8
+float32x2_t test_vreinterpret_f32_s8(int8x8_t a) {
+  return vreinterpret_f32_s8(a);
+}
+
+// CHECK: test_vreinterpret_f32_s16
+float32x2_t test_vreinterpret_f32_s16(int16x4_t a) {
+  return vreinterpret_f32_s16(a);
+}
+
+// CHECK: test_vreinterpret_f32_s32
+float32x2_t test_vreinterpret_f32_s32(int32x2_t a) {
+  return vreinterpret_f32_s32(a);
+}
+
+// CHECK: test_vreinterpret_f32_s64
+float32x2_t test_vreinterpret_f32_s64(int64x1_t a) {
+  return vreinterpret_f32_s64(a);
+}
+
+// CHECK: test_vreinterpret_f32_u8
+float32x2_t test_vreinterpret_f32_u8(uint8x8_t a) {
+  return vreinterpret_f32_u8(a);
+}
+
+// CHECK: test_vreinterpret_f32_u16
+float32x2_t test_vreinterpret_f32_u16(uint16x4_t a) {
+  return vreinterpret_f32_u16(a);
+}
+
+// CHECK: test_vreinterpret_f32_u32
+float32x2_t test_vreinterpret_f32_u32(uint32x2_t a) {
+  return vreinterpret_f32_u32(a);
+}
+
+// CHECK: test_vreinterpret_f32_u64
+float32x2_t test_vreinterpret_f32_u64(uint64x1_t a) {
+  return vreinterpret_f32_u64(a);
+}
+
+// CHECK: test_vreinterpret_f32_f16
+float32x2_t test_vreinterpret_f32_f16(float16x4_t a) {
+  return vreinterpret_f32_f16(a);
+}
+
+// CHECK: test_vreinterpret_f32_p8
+float32x2_t test_vreinterpret_f32_p8(poly8x8_t a) {
+  return vreinterpret_f32_p8(a);
+}
+
+// CHECK: test_vreinterpret_f32_p16
+float32x2_t test_vreinterpret_f32_p16(poly16x4_t a) {
+  return vreinterpret_f32_p16(a);
+}
+
+// CHECK: test_vreinterpret_p8_s8
+poly8x8_t test_vreinterpret_p8_s8(int8x8_t a) {
+  return vreinterpret_p8_s8(a);
+}
+
+// CHECK: test_vreinterpret_p8_s16
+poly8x8_t test_vreinterpret_p8_s16(int16x4_t a) {
+  return vreinterpret_p8_s16(a);
+}
+
+// CHECK: test_vreinterpret_p8_s32
+poly8x8_t test_vreinterpret_p8_s32(int32x2_t a) {
+  return vreinterpret_p8_s32(a);
+}
+
+// CHECK: test_vreinterpret_p8_s64
+poly8x8_t test_vreinterpret_p8_s64(int64x1_t a) {
+  return vreinterpret_p8_s64(a);
+}
+
+// CHECK: test_vreinterpret_p8_u8
+poly8x8_t test_vreinterpret_p8_u8(uint8x8_t a) {
+  return vreinterpret_p8_u8(a);
+}
+
+// CHECK: test_vreinterpret_p8_u16
+poly8x8_t test_vreinterpret_p8_u16(uint16x4_t a) {
+  return vreinterpret_p8_u16(a);
+}
+
+// CHECK: test_vreinterpret_p8_u32
+poly8x8_t test_vreinterpret_p8_u32(uint32x2_t a) {
+  return vreinterpret_p8_u32(a);
+}
+
+// CHECK: test_vreinterpret_p8_u64
+poly8x8_t test_vreinterpret_p8_u64(uint64x1_t a) {
+  return vreinterpret_p8_u64(a);
+}
+
+// CHECK: test_vreinterpret_p8_f16
+poly8x8_t test_vreinterpret_p8_f16(float16x4_t a) {
+  return vreinterpret_p8_f16(a);
+}
+
+// CHECK: test_vreinterpret_p8_f32
+poly8x8_t test_vreinterpret_p8_f32(float32x2_t a) {
+  return vreinterpret_p8_f32(a);
+}
+
+// CHECK: test_vreinterpret_p8_p16
+poly8x8_t test_vreinterpret_p8_p16(poly16x4_t a) {
+  return vreinterpret_p8_p16(a);
+}
+
+// CHECK: test_vreinterpret_p16_s8
+poly16x4_t test_vreinterpret_p16_s8(int8x8_t a) {
+  return vreinterpret_p16_s8(a);
+}
+
+// CHECK: test_vreinterpret_p16_s16
+poly16x4_t test_vreinterpret_p16_s16(int16x4_t a) {
+  return vreinterpret_p16_s16(a);
+}
+
+// CHECK: test_vreinterpret_p16_s32
+poly16x4_t test_vreinterpret_p16_s32(int32x2_t a) {
+  return vreinterpret_p16_s32(a);
+}
+
+// CHECK: test_vreinterpret_p16_s64
+poly16x4_t test_vreinterpret_p16_s64(int64x1_t a) {
+  return vreinterpret_p16_s64(a);
+}
+
+// CHECK: test_vreinterpret_p16_u8
+poly16x4_t test_vreinterpret_p16_u8(uint8x8_t a) {
+  return vreinterpret_p16_u8(a);
+}
+
+// CHECK: test_vreinterpret_p16_u16
+poly16x4_t test_vreinterpret_p16_u16(uint16x4_t a) {
+  return vreinterpret_p16_u16(a);
+}
+
+// CHECK: test_vreinterpret_p16_u32
+poly16x4_t test_vreinterpret_p16_u32(uint32x2_t a) {
+  return vreinterpret_p16_u32(a);
+}
+
+// CHECK: test_vreinterpret_p16_u64
+poly16x4_t test_vreinterpret_p16_u64(uint64x1_t a) {
+  return vreinterpret_p16_u64(a);
+}
+
+// CHECK: test_vreinterpret_p16_f16
+poly16x4_t test_vreinterpret_p16_f16(float16x4_t a) {
+  return vreinterpret_p16_f16(a);
+}
+
+// CHECK: test_vreinterpret_p16_f32
+poly16x4_t test_vreinterpret_p16_f32(float32x2_t a) {
+  return vreinterpret_p16_f32(a);
+}
+
+// CHECK: test_vreinterpret_p16_p8
+poly16x4_t test_vreinterpret_p16_p8(poly8x8_t a) {
+  return vreinterpret_p16_p8(a);
+}
+
+// CHECK: test_vreinterpretq_s8_s16
+int8x16_t test_vreinterpretq_s8_s16(int16x8_t a) {
+  return vreinterpretq_s8_s16(a);
+}
+
+// CHECK: test_vreinterpretq_s8_s32
+int8x16_t test_vreinterpretq_s8_s32(int32x4_t a) {
+  return vreinterpretq_s8_s32(a);
+}
+
+// CHECK: test_vreinterpretq_s8_s64
+int8x16_t test_vreinterpretq_s8_s64(int64x2_t a) {
+  return vreinterpretq_s8_s64(a);
+}
+
+// CHECK: test_vreinterpretq_s8_u8
+int8x16_t test_vreinterpretq_s8_u8(uint8x16_t a) {
+  return vreinterpretq_s8_u8(a);
+}
+
+// CHECK: test_vreinterpretq_s8_u16
+int8x16_t test_vreinterpretq_s8_u16(uint16x8_t a) {
+  return vreinterpretq_s8_u16(a);
+}
+
+// CHECK: test_vreinterpretq_s8_u32
+int8x16_t test_vreinterpretq_s8_u32(uint32x4_t a) {
+  return vreinterpretq_s8_u32(a);
+}
+
+// CHECK: test_vreinterpretq_s8_u64
+int8x16_t test_vreinterpretq_s8_u64(uint64x2_t a) {
+  return vreinterpretq_s8_u64(a);
+}
+
+// CHECK: test_vreinterpretq_s8_f16
+int8x16_t test_vreinterpretq_s8_f16(float16x8_t a) {
+  return vreinterpretq_s8_f16(a);
+}
+
+// CHECK: test_vreinterpretq_s8_f32
+int8x16_t test_vreinterpretq_s8_f32(float32x4_t a) {
+  return vreinterpretq_s8_f32(a);
+}
+
+// CHECK: test_vreinterpretq_s8_p8
+int8x16_t test_vreinterpretq_s8_p8(poly8x16_t a) {
+  return vreinterpretq_s8_p8(a);
+}
+
+// CHECK: test_vreinterpretq_s8_p16
+int8x16_t test_vreinterpretq_s8_p16(poly16x8_t a) {
+  return vreinterpretq_s8_p16(a);
+}
+
+// CHECK: test_vreinterpretq_s16_s8
+int16x8_t test_vreinterpretq_s16_s8(int8x16_t a) {
+  return vreinterpretq_s16_s8(a);
+}
+
+// CHECK: test_vreinterpretq_s16_s32
+int16x8_t test_vreinterpretq_s16_s32(int32x4_t a) {
+  return vreinterpretq_s16_s32(a);
+}
+
+// CHECK: test_vreinterpretq_s16_s64
+int16x8_t test_vreinterpretq_s16_s64(int64x2_t a) {
+  return vreinterpretq_s16_s64(a);
+}
+
+// CHECK: test_vreinterpretq_s16_u8
+int16x8_t test_vreinterpretq_s16_u8(uint8x16_t a) {
+  return vreinterpretq_s16_u8(a);
+}
+
+// CHECK: test_vreinterpretq_s16_u16
+int16x8_t test_vreinterpretq_s16_u16(uint16x8_t a) {
+  return vreinterpretq_s16_u16(a);
+}
+
+// CHECK: test_vreinterpretq_s16_u32
+int16x8_t test_vreinterpretq_s16_u32(uint32x4_t a) {
+  return vreinterpretq_s16_u32(a);
+}
+
+// CHECK: test_vreinterpretq_s16_u64
+int16x8_t test_vreinterpretq_s16_u64(uint64x2_t a) {
+  return vreinterpretq_s16_u64(a);
+}
+
+// CHECK: test_vreinterpretq_s16_f16
+int16x8_t test_vreinterpretq_s16_f16(float16x8_t a) {
+  return vreinterpretq_s16_f16(a);
+}
+
+// CHECK: test_vreinterpretq_s16_f32
+int16x8_t test_vreinterpretq_s16_f32(float32x4_t a) {
+  return vreinterpretq_s16_f32(a);
+}
+
+// CHECK: test_vreinterpretq_s16_p8
+int16x8_t test_vreinterpretq_s16_p8(poly8x16_t a) {
+  return vreinterpretq_s16_p8(a);
+}
+
+// CHECK: test_vreinterpretq_s16_p16
+int16x8_t test_vreinterpretq_s16_p16(poly16x8_t a) {
+  return vreinterpretq_s16_p16(a);
+}
+
+// CHECK: test_vreinterpretq_s32_s8
+int32x4_t test_vreinterpretq_s32_s8(int8x16_t a) {
+  return vreinterpretq_s32_s8(a);
+}
+
+// CHECK: test_vreinterpretq_s32_s16
+int32x4_t test_vreinterpretq_s32_s16(int16x8_t a) {
+  return vreinterpretq_s32_s16(a);
+}
+
+// CHECK: test_vreinterpretq_s32_s64
+int32x4_t test_vreinterpretq_s32_s64(int64x2_t a) {
+  return vreinterpretq_s32_s64(a);
+}
+
+// CHECK: test_vreinterpretq_s32_u8
+int32x4_t test_vreinterpretq_s32_u8(uint8x16_t a) {
+  return vreinterpretq_s32_u8(a);
+}
+
+// CHECK: test_vreinterpretq_s32_u16
+int32x4_t test_vreinterpretq_s32_u16(uint16x8_t a) {
+  return vreinterpretq_s32_u16(a);
+}
+
+// CHECK: test_vreinterpretq_s32_u32
+int32x4_t test_vreinterpretq_s32_u32(uint32x4_t a) {
+  return vreinterpretq_s32_u32(a);
+}
+
+// CHECK: test_vreinterpretq_s32_u64
+int32x4_t test_vreinterpretq_s32_u64(uint64x2_t a) {
+  return vreinterpretq_s32_u64(a);
+}
+
+// CHECK: test_vreinterpretq_s32_f16
+int32x4_t test_vreinterpretq_s32_f16(float16x8_t a) {
+  return vreinterpretq_s32_f16(a);
+}
+
+// CHECK: test_vreinterpretq_s32_f32
+int32x4_t test_vreinterpretq_s32_f32(float32x4_t a) {
+  return vreinterpretq_s32_f32(a);
+}
+
+// CHECK: test_vreinterpretq_s32_p8
+int32x4_t test_vreinterpretq_s32_p8(poly8x16_t a) {
+  return vreinterpretq_s32_p8(a);
+}
+
+// CHECK: test_vreinterpretq_s32_p16
+int32x4_t test_vreinterpretq_s32_p16(poly16x8_t a) {
+  return vreinterpretq_s32_p16(a);
+}
+
+// CHECK: test_vreinterpretq_s64_s8
+int64x2_t test_vreinterpretq_s64_s8(int8x16_t a) {
+  return vreinterpretq_s64_s8(a);
+}
+
+// CHECK: test_vreinterpretq_s64_s16
+int64x2_t test_vreinterpretq_s64_s16(int16x8_t a) {
+  return vreinterpretq_s64_s16(a);
+}
+
+// CHECK: test_vreinterpretq_s64_s32
+int64x2_t test_vreinterpretq_s64_s32(int32x4_t a) {
+  return vreinterpretq_s64_s32(a);
+}
+
+// CHECK: test_vreinterpretq_s64_u8
+int64x2_t test_vreinterpretq_s64_u8(uint8x16_t a) {
+  return vreinterpretq_s64_u8(a);
+}
+
+// CHECK: test_vreinterpretq_s64_u16
+int64x2_t test_vreinterpretq_s64_u16(uint16x8_t a) {
+  return vreinterpretq_s64_u16(a);
+}
+
+// CHECK: test_vreinterpretq_s64_u32
+int64x2_t test_vreinterpretq_s64_u32(uint32x4_t a) {
+  return vreinterpretq_s64_u32(a);
+}
+
+// CHECK: test_vreinterpretq_s64_u64
+int64x2_t test_vreinterpretq_s64_u64(uint64x2_t a) {
+  return vreinterpretq_s64_u64(a);
+}
+
+// CHECK: test_vreinterpretq_s64_f16
+int64x2_t test_vreinterpretq_s64_f16(float16x8_t a) {
+  return vreinterpretq_s64_f16(a);
+}
+
+// CHECK: test_vreinterpretq_s64_f32
+int64x2_t test_vreinterpretq_s64_f32(float32x4_t a) {
+  return vreinterpretq_s64_f32(a);
+}
+
+// CHECK: test_vreinterpretq_s64_p8
+int64x2_t test_vreinterpretq_s64_p8(poly8x16_t a) {
+  return vreinterpretq_s64_p8(a);
+}
+
+// CHECK: test_vreinterpretq_s64_p16
+int64x2_t test_vreinterpretq_s64_p16(poly16x8_t a) {
+  return vreinterpretq_s64_p16(a);
+}
+
+// CHECK: test_vreinterpretq_u8_s8
+uint8x16_t test_vreinterpretq_u8_s8(int8x16_t a) {
+  return vreinterpretq_u8_s8(a);
+}
+
+// CHECK: test_vreinterpretq_u8_s16
+uint8x16_t test_vreinterpretq_u8_s16(int16x8_t a) {
+  return vreinterpretq_u8_s16(a);
+}
+
+// CHECK: test_vreinterpretq_u8_s32
+uint8x16_t test_vreinterpretq_u8_s32(int32x4_t a) {
+  return vreinterpretq_u8_s32(a);
+}
+
+// CHECK: test_vreinterpretq_u8_s64
+uint8x16_t test_vreinterpretq_u8_s64(int64x2_t a) {
+  return vreinterpretq_u8_s64(a);
+}
+
+// CHECK: test_vreinterpretq_u8_u16
+uint8x16_t test_vreinterpretq_u8_u16(uint16x8_t a) {
+  return vreinterpretq_u8_u16(a);
+}
+
+// CHECK: test_vreinterpretq_u8_u32
+uint8x16_t test_vreinterpretq_u8_u32(uint32x4_t a) {
+  return vreinterpretq_u8_u32(a);
+}
+
+// CHECK: test_vreinterpretq_u8_u64
+uint8x16_t test_vreinterpretq_u8_u64(uint64x2_t a) {
+  return vreinterpretq_u8_u64(a);
+}
+
+// CHECK: test_vreinterpretq_u8_f16
+uint8x16_t test_vreinterpretq_u8_f16(float16x8_t a) {
+  return vreinterpretq_u8_f16(a);
+}
+
+// CHECK: test_vreinterpretq_u8_f32
+uint8x16_t test_vreinterpretq_u8_f32(float32x4_t a) {
+  return vreinterpretq_u8_f32(a);
+}
+
+// CHECK: test_vreinterpretq_u8_p8
+uint8x16_t test_vreinterpretq_u8_p8(poly8x16_t a) {
+  return vreinterpretq_u8_p8(a);
+}
+
+// CHECK: test_vreinterpretq_u8_p16
+uint8x16_t test_vreinterpretq_u8_p16(poly16x8_t a) {
+  return vreinterpretq_u8_p16(a);
+}
+
+// CHECK: test_vreinterpretq_u16_s8
+uint16x8_t test_vreinterpretq_u16_s8(int8x16_t a) {
+  return vreinterpretq_u16_s8(a);
+}
+
+// CHECK: test_vreinterpretq_u16_s16
+uint16x8_t test_vreinterpretq_u16_s16(int16x8_t a) {
+  return vreinterpretq_u16_s16(a);
+}
+
+// CHECK: test_vreinterpretq_u16_s32
+uint16x8_t test_vreinterpretq_u16_s32(int32x4_t a) {
+  return vreinterpretq_u16_s32(a);
+}
+
+// CHECK: test_vreinterpretq_u16_s64
+uint16x8_t test_vreinterpretq_u16_s64(int64x2_t a) {
+  return vreinterpretq_u16_s64(a);
+}
+
+// CHECK: test_vreinterpretq_u16_u8
+uint16x8_t test_vreinterpretq_u16_u8(uint8x16_t a) {
+  return vreinterpretq_u16_u8(a);
+}
+
+// CHECK: test_vreinterpretq_u16_u32
+uint16x8_t test_vreinterpretq_u16_u32(uint32x4_t a) {
+  return vreinterpretq_u16_u32(a);
+}
+
+// CHECK: test_vreinterpretq_u16_u64
+uint16x8_t test_vreinterpretq_u16_u64(uint64x2_t a) {
+  return vreinterpretq_u16_u64(a);
+}
+
+// CHECK: test_vreinterpretq_u16_f16
+uint16x8_t test_vreinterpretq_u16_f16(float16x8_t a) {
+  return vreinterpretq_u16_f16(a);
+}
+
+// CHECK: test_vreinterpretq_u16_f32
+uint16x8_t test_vreinterpretq_u16_f32(float32x4_t a) {
+  return vreinterpretq_u16_f32(a);
+}
+
+// CHECK: test_vreinterpretq_u16_p8
+uint16x8_t test_vreinterpretq_u16_p8(poly8x16_t a) {
+  return vreinterpretq_u16_p8(a);
+}
+
+// CHECK: test_vreinterpretq_u16_p16
+uint16x8_t test_vreinterpretq_u16_p16(poly16x8_t a) {
+  return vreinterpretq_u16_p16(a);
+}
+
+// CHECK: test_vreinterpretq_u32_s8
+uint32x4_t test_vreinterpretq_u32_s8(int8x16_t a) {
+  return vreinterpretq_u32_s8(a);
+}
+
+// CHECK: test_vreinterpretq_u32_s16
+uint32x4_t test_vreinterpretq_u32_s16(int16x8_t a) {
+  return vreinterpretq_u32_s16(a);
+}
+
+// CHECK: test_vreinterpretq_u32_s32
+uint32x4_t test_vreinterpretq_u32_s32(int32x4_t a) {
+  return vreinterpretq_u32_s32(a);
+}
+
+// CHECK: test_vreinterpretq_u32_s64
+uint32x4_t test_vreinterpretq_u32_s64(int64x2_t a) {
+  return vreinterpretq_u32_s64(a);
+}
+
+// CHECK: test_vreinterpretq_u32_u8
+uint32x4_t test_vreinterpretq_u32_u8(uint8x16_t a) {
+  return vreinterpretq_u32_u8(a);
+}
+
+// CHECK: test_vreinterpretq_u32_u16
+uint32x4_t test_vreinterpretq_u32_u16(uint16x8_t a) {
+  return vreinterpretq_u32_u16(a);
+}
+
+// CHECK: test_vreinterpretq_u32_u64
+uint32x4_t test_vreinterpretq_u32_u64(uint64x2_t a) {
+  return vreinterpretq_u32_u64(a);
+}
+
+// CHECK: test_vreinterpretq_u32_f16
+uint32x4_t test_vreinterpretq_u32_f16(float16x8_t a) {
+  return vreinterpretq_u32_f16(a);
+}
+
+// CHECK: test_vreinterpretq_u32_f32
+uint32x4_t test_vreinterpretq_u32_f32(float32x4_t a) {
+  return vreinterpretq_u32_f32(a);
+}
+
+// CHECK: test_vreinterpretq_u32_p8
+uint32x4_t test_vreinterpretq_u32_p8(poly8x16_t a) {
+  return vreinterpretq_u32_p8(a);
+}
+
+// CHECK: test_vreinterpretq_u32_p16
+uint32x4_t test_vreinterpretq_u32_p16(poly16x8_t a) {
+  return vreinterpretq_u32_p16(a);
+}
+
+// CHECK: test_vreinterpretq_u64_s8
+uint64x2_t test_vreinterpretq_u64_s8(int8x16_t a) {
+  return vreinterpretq_u64_s8(a);
+}
+
+// CHECK: test_vreinterpretq_u64_s16
+uint64x2_t test_vreinterpretq_u64_s16(int16x8_t a) {
+  return vreinterpretq_u64_s16(a);
+}
+
+// CHECK: test_vreinterpretq_u64_s32
+uint64x2_t test_vreinterpretq_u64_s32(int32x4_t a) {
+  return vreinterpretq_u64_s32(a);
+}
+
+// CHECK: test_vreinterpretq_u64_s64
+uint64x2_t test_vreinterpretq_u64_s64(int64x2_t a) {
+  return vreinterpretq_u64_s64(a);
+}
+
+// CHECK: test_vreinterpretq_u64_u8
+uint64x2_t test_vreinterpretq_u64_u8(uint8x16_t a) {
+  return vreinterpretq_u64_u8(a);
+}
+
+// CHECK: test_vreinterpretq_u64_u16
+uint64x2_t test_vreinterpretq_u64_u16(uint16x8_t a) {
+  return vreinterpretq_u64_u16(a);
+}
+
+// CHECK: test_vreinterpretq_u64_u32
+uint64x2_t test_vreinterpretq_u64_u32(uint32x4_t a) {
+  return vreinterpretq_u64_u32(a);
+}
+
+// CHECK: test_vreinterpretq_u64_f16
+uint64x2_t test_vreinterpretq_u64_f16(float16x8_t a) {
+  return vreinterpretq_u64_f16(a);
+}
+
+// CHECK: test_vreinterpretq_u64_f32
+uint64x2_t test_vreinterpretq_u64_f32(float32x4_t a) {
+  return vreinterpretq_u64_f32(a);
+}
+
+// CHECK: test_vreinterpretq_u64_p8
+uint64x2_t test_vreinterpretq_u64_p8(poly8x16_t a) {
+  return vreinterpretq_u64_p8(a);
+}
+
+// CHECK: test_vreinterpretq_u64_p16
+uint64x2_t test_vreinterpretq_u64_p16(poly16x8_t a) {
+  return vreinterpretq_u64_p16(a);
+}
+
+// CHECK: test_vreinterpretq_f16_s8
+float16x8_t test_vreinterpretq_f16_s8(int8x16_t a) {
+  return vreinterpretq_f16_s8(a);
+}
+
+// CHECK: test_vreinterpretq_f16_s16
+float16x8_t test_vreinterpretq_f16_s16(int16x8_t a) {
+  return vreinterpretq_f16_s16(a);
+}
+
+// CHECK: test_vreinterpretq_f16_s32
+float16x8_t test_vreinterpretq_f16_s32(int32x4_t a) {
+  return vreinterpretq_f16_s32(a);
+}
+
+// CHECK: test_vreinterpretq_f16_s64
+float16x8_t test_vreinterpretq_f16_s64(int64x2_t a) {
+  return vreinterpretq_f16_s64(a);
+}
+
+// CHECK: test_vreinterpretq_f16_u8
+float16x8_t test_vreinterpretq_f16_u8(uint8x16_t a) {
+  return vreinterpretq_f16_u8(a);
+}
+
+// CHECK: test_vreinterpretq_f16_u16
+float16x8_t test_vreinterpretq_f16_u16(uint16x8_t a) {
+  return vreinterpretq_f16_u16(a);
+}
+
+// CHECK: test_vreinterpretq_f16_u32
+float16x8_t test_vreinterpretq_f16_u32(uint32x4_t a) {
+  return vreinterpretq_f16_u32(a);
+}
+
+// CHECK: test_vreinterpretq_f16_u64
+float16x8_t test_vreinterpretq_f16_u64(uint64x2_t a) {
+  return vreinterpretq_f16_u64(a);
+}
+
+// CHECK: test_vreinterpretq_f16_f32
+float16x8_t test_vreinterpretq_f16_f32(float32x4_t a) {
+  return vreinterpretq_f16_f32(a);
+}
+
+// CHECK: test_vreinterpretq_f16_p8
+float16x8_t test_vreinterpretq_f16_p8(poly8x16_t a) {
+  return vreinterpretq_f16_p8(a);
+}
+
+// CHECK: test_vreinterpretq_f16_p16
+float16x8_t test_vreinterpretq_f16_p16(poly16x8_t a) {
+  return vreinterpretq_f16_p16(a);
+}
+
+// CHECK: test_vreinterpretq_f32_s8
+float32x4_t test_vreinterpretq_f32_s8(int8x16_t a) {
+  return vreinterpretq_f32_s8(a);
+}
+
+// CHECK: test_vreinterpretq_f32_s16
+float32x4_t test_vreinterpretq_f32_s16(int16x8_t a) {
+  return vreinterpretq_f32_s16(a);
+}
+
+// CHECK: test_vreinterpretq_f32_s32
+float32x4_t test_vreinterpretq_f32_s32(int32x4_t a) {
+  return vreinterpretq_f32_s32(a);
+}
+
+// CHECK: test_vreinterpretq_f32_s64
+float32x4_t test_vreinterpretq_f32_s64(int64x2_t a) {
+  return vreinterpretq_f32_s64(a);
+}
+
+// CHECK: test_vreinterpretq_f32_u8
+float32x4_t test_vreinterpretq_f32_u8(uint8x16_t a) {
+  return vreinterpretq_f32_u8(a);
+}
+
+// CHECK: test_vreinterpretq_f32_u16
+float32x4_t test_vreinterpretq_f32_u16(uint16x8_t a) {
+  return vreinterpretq_f32_u16(a);
+}
+
+// CHECK: test_vreinterpretq_f32_u32
+float32x4_t test_vreinterpretq_f32_u32(uint32x4_t a) {
+  return vreinterpretq_f32_u32(a);
+}
+
+// CHECK: test_vreinterpretq_f32_u64
+float32x4_t test_vreinterpretq_f32_u64(uint64x2_t a) {
+  return vreinterpretq_f32_u64(a);
+}
+
+// CHECK: test_vreinterpretq_f32_f16
+float32x4_t test_vreinterpretq_f32_f16(float16x8_t a) {
+  return vreinterpretq_f32_f16(a);
+}
+
+// CHECK: test_vreinterpretq_f32_p8
+float32x4_t test_vreinterpretq_f32_p8(poly8x16_t a) {
+  return vreinterpretq_f32_p8(a);
+}
+
+// CHECK: test_vreinterpretq_f32_p16
+float32x4_t test_vreinterpretq_f32_p16(poly16x8_t a) {
+  return vreinterpretq_f32_p16(a);
+}
+
+// CHECK: test_vreinterpretq_p8_s8
+poly8x16_t test_vreinterpretq_p8_s8(int8x16_t a) {
+  return vreinterpretq_p8_s8(a);
+}
+
+// CHECK: test_vreinterpretq_p8_s16
+poly8x16_t test_vreinterpretq_p8_s16(int16x8_t a) {
+  return vreinterpretq_p8_s16(a);
+}
+
+// CHECK: test_vreinterpretq_p8_s32
+poly8x16_t test_vreinterpretq_p8_s32(int32x4_t a) {
+  return vreinterpretq_p8_s32(a);
+}
+
+// CHECK: test_vreinterpretq_p8_s64
+poly8x16_t test_vreinterpretq_p8_s64(int64x2_t a) {
+  return vreinterpretq_p8_s64(a);
+}
+
+// CHECK: test_vreinterpretq_p8_u8
+poly8x16_t test_vreinterpretq_p8_u8(uint8x16_t a) {
+  return vreinterpretq_p8_u8(a);
+}
+
+// CHECK: test_vreinterpretq_p8_u16
+poly8x16_t test_vreinterpretq_p8_u16(uint16x8_t a) {
+  return vreinterpretq_p8_u16(a);
+}
+
+// CHECK: test_vreinterpretq_p8_u32
+poly8x16_t test_vreinterpretq_p8_u32(uint32x4_t a) {
+  return vreinterpretq_p8_u32(a);
+}
+
+// CHECK: test_vreinterpretq_p8_u64
+poly8x16_t test_vreinterpretq_p8_u64(uint64x2_t a) {
+  return vreinterpretq_p8_u64(a);
+}
+
+// CHECK: test_vreinterpretq_p8_f16
+poly8x16_t test_vreinterpretq_p8_f16(float16x8_t a) {
+  return vreinterpretq_p8_f16(a);
+}
+
+// CHECK: test_vreinterpretq_p8_f32
+poly8x16_t test_vreinterpretq_p8_f32(float32x4_t a) {
+  return vreinterpretq_p8_f32(a);
+}
+
+// CHECK: test_vreinterpretq_p8_p16
+poly8x16_t test_vreinterpretq_p8_p16(poly16x8_t a) {
+  return vreinterpretq_p8_p16(a);
+}
+
+// CHECK: test_vreinterpretq_p16_s8
+poly16x8_t test_vreinterpretq_p16_s8(int8x16_t a) {
+  return vreinterpretq_p16_s8(a);
+}
+
+// CHECK: test_vreinterpretq_p16_s16
+poly16x8_t test_vreinterpretq_p16_s16(int16x8_t a) {
+  return vreinterpretq_p16_s16(a);
+}
+
+// CHECK: test_vreinterpretq_p16_s32
+poly16x8_t test_vreinterpretq_p16_s32(int32x4_t a) {
+  return vreinterpretq_p16_s32(a);
+}
+
+// CHECK: test_vreinterpretq_p16_s64
+poly16x8_t test_vreinterpretq_p16_s64(int64x2_t a) {
+  return vreinterpretq_p16_s64(a);
+}
+
+// CHECK: test_vreinterpretq_p16_u8
+poly16x8_t test_vreinterpretq_p16_u8(uint8x16_t a) {
+  return vreinterpretq_p16_u8(a);
+}
+
+// CHECK: test_vreinterpretq_p16_u16
+poly16x8_t test_vreinterpretq_p16_u16(uint16x8_t a) {
+  return vreinterpretq_p16_u16(a);
+}
+
+// CHECK: test_vreinterpretq_p16_u32
+poly16x8_t test_vreinterpretq_p16_u32(uint32x4_t a) {
+  return vreinterpretq_p16_u32(a);
+}
+
+// CHECK: test_vreinterpretq_p16_u64
+poly16x8_t test_vreinterpretq_p16_u64(uint64x2_t a) {
+  return vreinterpretq_p16_u64(a);
+}
+
+// CHECK: test_vreinterpretq_p16_f16
+poly16x8_t test_vreinterpretq_p16_f16(float16x8_t a) {
+  return vreinterpretq_p16_f16(a);
+}
+
+// CHECK: test_vreinterpretq_p16_f32
+poly16x8_t test_vreinterpretq_p16_f32(float32x4_t a) {
+  return vreinterpretq_p16_f32(a);
+}
+
+// CHECK: test_vreinterpretq_p16_p8
+poly16x8_t test_vreinterpretq_p16_p8(poly8x16_t a) {
+  return vreinterpretq_p16_p8(a);
+}
+
+
+// CHECK: test_vrev16_s8
+// CHECK: vrev16.8 d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vrev16_s8(int8x8_t a) {
+  return vrev16_s8(a);
+}
+
+// CHECK: test_vrev16_u8
+// CHECK: vrev16.8 d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vrev16_u8(uint8x8_t a) {
+  return vrev16_u8(a);
+}
+
+// CHECK: test_vrev16_p8
+// CHECK: vrev16.8 d{{[0-9]+}}, d{{[0-9]+}}
+poly8x8_t test_vrev16_p8(poly8x8_t a) {
+  return vrev16_p8(a);
+}
+
+// CHECK: test_vrev16q_s8
+// CHECK: vrev16.8 q{{[0-9]+}}, q{{[0-9]+}}
+int8x16_t test_vrev16q_s8(int8x16_t a) {
+  return vrev16q_s8(a);
+}
+
+// CHECK: test_vrev16q_u8
+// CHECK: vrev16.8 q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vrev16q_u8(uint8x16_t a) {
+  return vrev16q_u8(a);
+}
+
+// CHECK: test_vrev16q_p8
+// CHECK: vrev16.8 q{{[0-9]+}}, q{{[0-9]+}}
+poly8x16_t test_vrev16q_p8(poly8x16_t a) {
+  return vrev16q_p8(a);
+}
+
+
+// CHECK: test_vrev32_s8
+// CHECK: vrev32.8 d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vrev32_s8(int8x8_t a) {
+  return vrev32_s8(a);
+}
+
+// CHECK: test_vrev32_s16
+// CHECK: vrev32.16 d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vrev32_s16(int16x4_t a) {
+  return vrev32_s16(a);
+}
+
+// CHECK: test_vrev32_u8
+// CHECK: vrev32.8 d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vrev32_u8(uint8x8_t a) {
+  return vrev32_u8(a);
+}
+
+// CHECK: test_vrev32_u16
+// CHECK: vrev32.16 d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vrev32_u16(uint16x4_t a) {
+  return vrev32_u16(a);
+}
+
+// CHECK: test_vrev32_p8
+// CHECK: vrev32.8 d{{[0-9]+}}, d{{[0-9]+}}
+poly8x8_t test_vrev32_p8(poly8x8_t a) {
+  return vrev32_p8(a);
+}
+
+// CHECK: test_vrev32_p16
+// CHECK: vrev32.16 d{{[0-9]+}}, d{{[0-9]+}}
+poly16x4_t test_vrev32_p16(poly16x4_t a) {
+  return vrev32_p16(a);
+}
+
+// CHECK: test_vrev32q_s8
+// CHECK: vrev32.8 q{{[0-9]+}}, q{{[0-9]+}}
+int8x16_t test_vrev32q_s8(int8x16_t a) {
+  return vrev32q_s8(a);
+}
+
+// CHECK: test_vrev32q_s16
+// CHECK: vrev32.16 q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vrev32q_s16(int16x8_t a) {
+  return vrev32q_s16(a);
+}
+
+// CHECK: test_vrev32q_u8
+// CHECK: vrev32.8 q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vrev32q_u8(uint8x16_t a) {
+  return vrev32q_u8(a);
+}
+
+// CHECK: test_vrev32q_u16
+// CHECK: vrev32.16 q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vrev32q_u16(uint16x8_t a) {
+  return vrev32q_u16(a);
+}
+
+// CHECK: test_vrev32q_p8
+// CHECK: vrev32.8 q{{[0-9]+}}, q{{[0-9]+}}
+poly8x16_t test_vrev32q_p8(poly8x16_t a) {
+  return vrev32q_p8(a);
+}
+
+// CHECK: test_vrev32q_p16
+// CHECK: vrev32.16 q{{[0-9]+}}, q{{[0-9]+}}
+poly16x8_t test_vrev32q_p16(poly16x8_t a) {
+  return vrev32q_p16(a);
+}
+
+
+// CHECK: test_vrev64_s8
+// CHECK: vrev64.8 d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vrev64_s8(int8x8_t a) {
+  return vrev64_s8(a);
+}
+
+// CHECK: test_vrev64_s16
+// CHECK: vrev64.16 d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vrev64_s16(int16x4_t a) {
+  return vrev64_s16(a);
+}
+
+// CHECK: test_vrev64_s32
+// CHECK: vrev64.32 d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vrev64_s32(int32x2_t a) {
+  return vrev64_s32(a);
+}
+
+// CHECK: test_vrev64_u8
+// CHECK: vrev64.8 d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vrev64_u8(uint8x8_t a) {
+  return vrev64_u8(a);
+}
+
+// CHECK: test_vrev64_u16
+// CHECK: vrev64.16 d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vrev64_u16(uint16x4_t a) {
+  return vrev64_u16(a);
+}
+
+// CHECK: test_vrev64_u32
+// CHECK: vrev64.32 d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vrev64_u32(uint32x2_t a) {
+  return vrev64_u32(a);
+}
+
+// CHECK: test_vrev64_p8
+// CHECK: vrev64.8 d{{[0-9]+}}, d{{[0-9]+}}
+poly8x8_t test_vrev64_p8(poly8x8_t a) {
+  return vrev64_p8(a);
+}
+
+// CHECK: test_vrev64_p16
+// CHECK: vrev64.16 d{{[0-9]+}}, d{{[0-9]+}}
+poly16x4_t test_vrev64_p16(poly16x4_t a) {
+  return vrev64_p16(a);
+}
+
+// CHECK: test_vrev64_f32
+// CHECK: vrev64.32 d{{[0-9]+}}, d{{[0-9]+}}
+float32x2_t test_vrev64_f32(float32x2_t a) {
+  return vrev64_f32(a);
+}
+
+// CHECK: test_vrev64q_s8
+// CHECK: vrev64.8 q{{[0-9]+}}, q{{[0-9]+}}
+int8x16_t test_vrev64q_s8(int8x16_t a) {
+  return vrev64q_s8(a);
+}
+
+// CHECK: test_vrev64q_s16
+// CHECK: vrev64.16 q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vrev64q_s16(int16x8_t a) {
+  return vrev64q_s16(a);
+}
+
+// CHECK: test_vrev64q_s32
+// CHECK: vrev64.32 q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vrev64q_s32(int32x4_t a) {
+  return vrev64q_s32(a);
+}
+
+// CHECK: test_vrev64q_u8
+// CHECK: vrev64.8 q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vrev64q_u8(uint8x16_t a) {
+  return vrev64q_u8(a);
+}
+
+// CHECK: test_vrev64q_u16
+// CHECK: vrev64.16 q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vrev64q_u16(uint16x8_t a) {
+  return vrev64q_u16(a);
+}
+
+// CHECK: test_vrev64q_u32
+// CHECK: vrev64.32 q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vrev64q_u32(uint32x4_t a) {
+  return vrev64q_u32(a);
+}
+
+// CHECK: test_vrev64q_p8
+// CHECK: vrev64.8 q{{[0-9]+}}, q{{[0-9]+}}
+poly8x16_t test_vrev64q_p8(poly8x16_t a) {
+  return vrev64q_p8(a);
+}
+
+// CHECK: test_vrev64q_p16
+// CHECK: vrev64.16 q{{[0-9]+}}, q{{[0-9]+}}
+poly16x8_t test_vrev64q_p16(poly16x8_t a) {
+  return vrev64q_p16(a);
+}
+
+// CHECK: test_vrev64q_f32
+// CHECK: vrev64.32 q{{[0-9]+}}, q{{[0-9]+}}
+float32x4_t test_vrev64q_f32(float32x4_t a) {
+  return vrev64q_f32(a);
+}
+
+
+// CHECK: test_vrhadd_s8
+// CHECK: vrhadd.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vrhadd_s8(int8x8_t a, int8x8_t b) {
+  return vrhadd_s8(a, b);
+}
+
+// CHECK: test_vrhadd_s16
+// CHECK: vrhadd.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vrhadd_s16(int16x4_t a, int16x4_t b) {
+  return vrhadd_s16(a, b);
+}
+
+// CHECK: test_vrhadd_s32
+// CHECK: vrhadd.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vrhadd_s32(int32x2_t a, int32x2_t b) {
+  return vrhadd_s32(a, b);
+}
+
+// CHECK: test_vrhadd_u8
+// CHECK: vrhadd.u8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vrhadd_u8(uint8x8_t a, uint8x8_t b) {
+  return vrhadd_u8(a, b);
+}
+
+// CHECK: test_vrhadd_u16
+// CHECK: vrhadd.u16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vrhadd_u16(uint16x4_t a, uint16x4_t b) {
+  return vrhadd_u16(a, b);
+}
+
+// CHECK: test_vrhadd_u32
+// CHECK: vrhadd.u32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vrhadd_u32(uint32x2_t a, uint32x2_t b) {
+  return vrhadd_u32(a, b);
+}
+
+// CHECK: test_vrhaddq_s8
+// CHECK: vrhadd.s8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int8x16_t test_vrhaddq_s8(int8x16_t a, int8x16_t b) {
+  return vrhaddq_s8(a, b);
+}
+
+// CHECK: test_vrhaddq_s16
+// CHECK: vrhadd.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vrhaddq_s16(int16x8_t a, int16x8_t b) {
+  return vrhaddq_s16(a, b);
+}
+
+// CHECK: test_vrhaddq_s32
+// CHECK: vrhadd.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vrhaddq_s32(int32x4_t a, int32x4_t b) {
+  return vrhaddq_s32(a, b);
+}
+
+// CHECK: test_vrhaddq_u8
+// CHECK: vrhadd.u8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vrhaddq_u8(uint8x16_t a, uint8x16_t b) {
+  return vrhaddq_u8(a, b);
+}
+
+// CHECK: test_vrhaddq_u16
+// CHECK: vrhadd.u16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vrhaddq_u16(uint16x8_t a, uint16x8_t b) {
+  return vrhaddq_u16(a, b);
+}
+
+// CHECK: test_vrhaddq_u32
+// CHECK: vrhadd.u32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vrhaddq_u32(uint32x4_t a, uint32x4_t b) {
+  return vrhaddq_u32(a, b);
+}
+
+
+// CHECK: test_vrshl_s8
+// CHECK: vrshl.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) {
+  return vrshl_s8(a, b);
+}
+
+// CHECK: test_vrshl_s16
+// CHECK: vrshl.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) {
+  return vrshl_s16(a, b);
+}
+
+// CHECK: test_vrshl_s32
+// CHECK: vrshl.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) {
+  return vrshl_s32(a, b);
+}
+
+// CHECK: test_vrshl_s64
+// CHECK: vrshl.s64 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) {
+  return vrshl_s64(a, b);
+}
+
+// CHECK: test_vrshl_u8
+// CHECK: vrshl.u8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) {
+  return vrshl_u8(a, b);
+}
+
+// CHECK: test_vrshl_u16
+// CHECK: vrshl.u16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) {
+  return vrshl_u16(a, b);
+}
+
+// CHECK: test_vrshl_u32
+// CHECK: vrshl.u32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) {
+  return vrshl_u32(a, b);
+}
+
+// CHECK: test_vrshl_u64
+// CHECK: vrshl.u64 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) {
+  return vrshl_u64(a, b);
+}
+
+// CHECK: test_vrshlq_s8
+// CHECK: vrshl.s8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) {
+  return vrshlq_s8(a, b);
+}
+
+// CHECK: test_vrshlq_s16
+// CHECK: vrshl.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) {
+  return vrshlq_s16(a, b);
+}
+
+// CHECK: test_vrshlq_s32
+// CHECK: vrshl.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) {
+  return vrshlq_s32(a, b);
+}
+
+// CHECK: test_vrshlq_s64
+// CHECK: vrshl.s64 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) {
+  return vrshlq_s64(a, b);
+}
+
+// CHECK: test_vrshlq_u8
+// CHECK: vrshl.u8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) {
+  return vrshlq_u8(a, b);
+}
+
+// CHECK: test_vrshlq_u16
+// CHECK: vrshl.u16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) {
+  return vrshlq_u16(a, b);
+}
+
+// CHECK: test_vrshlq_u32
+// CHECK: vrshl.u32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) {
+  return vrshlq_u32(a, b);
+}
+
+// CHECK: test_vrshlq_u64
+// CHECK: vrshl.u64 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) {
+  return vrshlq_u64(a, b);
+}
+
+
+// CHECK: test_vrshrn_n_s16
+// CHECK: vrshrn.i16 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int8x8_t test_vrshrn_n_s16(int16x8_t a) {
+  return vrshrn_n_s16(a, 1);
+}
+
+// CHECK: test_vrshrn_n_s32
+// CHECK: vrshrn.i32 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int16x4_t test_vrshrn_n_s32(int32x4_t a) {
+  return vrshrn_n_s32(a, 1);
+}
+
+// CHECK: test_vrshrn_n_s64
+// CHECK: vrshrn.i64 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int32x2_t test_vrshrn_n_s64(int64x2_t a) {
+  return vrshrn_n_s64(a, 1);
+}
+
+// CHECK: test_vrshrn_n_u16
+// CHECK: vrshrn.i16 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint8x8_t test_vrshrn_n_u16(uint16x8_t a) {
+  return vrshrn_n_u16(a, 1);
+}
+
+// CHECK: test_vrshrn_n_u32
+// CHECK: vrshrn.i32 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint16x4_t test_vrshrn_n_u32(uint32x4_t a) {
+  return vrshrn_n_u32(a, 1);
+}
+
+// CHECK: test_vrshrn_n_u64
+// CHECK: vrshrn.i64 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint32x2_t test_vrshrn_n_u64(uint64x2_t a) {
+  return vrshrn_n_u64(a, 1);
+}
+
+
+// CHECK: test_vrshr_n_s8
+// CHECK: vrshr.s8 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int8x8_t test_vrshr_n_s8(int8x8_t a) {
+  return vrshr_n_s8(a, 1);
+}
+
+// CHECK: test_vrshr_n_s16
+// CHECK: vrshr.s16 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int16x4_t test_vrshr_n_s16(int16x4_t a) {
+  return vrshr_n_s16(a, 1);
+}
+
+// CHECK: test_vrshr_n_s32
+// CHECK: vrshr.s32 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int32x2_t test_vrshr_n_s32(int32x2_t a) {
+  return vrshr_n_s32(a, 1);
+}
+
+// CHECK: test_vrshr_n_s64
+// CHECK: vrshr.s64 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int64x1_t test_vrshr_n_s64(int64x1_t a) {
+  return vrshr_n_s64(a, 1);
+}
+
+// CHECK: test_vrshr_n_u8
+// CHECK: vrshr.u8 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint8x8_t test_vrshr_n_u8(uint8x8_t a) {
+  return vrshr_n_u8(a, 1);
+}
+
+// CHECK: test_vrshr_n_u16
+// CHECK: vrshr.u16 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint16x4_t test_vrshr_n_u16(uint16x4_t a) {
+  return vrshr_n_u16(a, 1);
+}
+
+// CHECK: test_vrshr_n_u32
+// CHECK: vrshr.u32 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint32x2_t test_vrshr_n_u32(uint32x2_t a) {
+  return vrshr_n_u32(a, 1);
+}
+
+// CHECK: test_vrshr_n_u64
+// CHECK: vrshr.u64 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint64x1_t test_vrshr_n_u64(uint64x1_t a) {
+  return vrshr_n_u64(a, 1);
+}
+
+// CHECK: test_vrshrq_n_s8
+// CHECK: vrshr.s8 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int8x16_t test_vrshrq_n_s8(int8x16_t a) {
+  return vrshrq_n_s8(a, 1);
+}
+
+// CHECK: test_vrshrq_n_s16
+// CHECK: vrshr.s16 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int16x8_t test_vrshrq_n_s16(int16x8_t a) {
+  return vrshrq_n_s16(a, 1);
+}
+
+// CHECK: test_vrshrq_n_s32
+// CHECK: vrshr.s32 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int32x4_t test_vrshrq_n_s32(int32x4_t a) {
+  return vrshrq_n_s32(a, 1);
+}
+
+// CHECK: test_vrshrq_n_s64
+// CHECK: vrshr.s64 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int64x2_t test_vrshrq_n_s64(int64x2_t a) {
+  return vrshrq_n_s64(a, 1);
+}
+
+// CHECK: test_vrshrq_n_u8
+// CHECK: vrshr.u8 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint8x16_t test_vrshrq_n_u8(uint8x16_t a) {
+  return vrshrq_n_u8(a, 1);
+}
+
+// CHECK: test_vrshrq_n_u16
+// CHECK: vrshr.u16 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint16x8_t test_vrshrq_n_u16(uint16x8_t a) {
+  return vrshrq_n_u16(a, 1);
+}
+
+// CHECK: test_vrshrq_n_u32
+// CHECK: vrshr.u32 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint32x4_t test_vrshrq_n_u32(uint32x4_t a) {
+  return vrshrq_n_u32(a, 1);
+}
+
+// CHECK: test_vrshrq_n_u64
+// CHECK: vrshr.u64 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint64x2_t test_vrshrq_n_u64(uint64x2_t a) {
+  return vrshrq_n_u64(a, 1);
+}
+
+
+// CHECK: test_vrsqrte_f32
+// CHECK: vrsqrte.f32 d{{[0-9]+}}, d{{[0-9]+}}
+float32x2_t test_vrsqrte_f32(float32x2_t a) {
+  return vrsqrte_f32(a);
+}
+
+// CHECK: test_vrsqrte_u32
+// CHECK: vrsqrte.u32 d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vrsqrte_u32(uint32x2_t a) {
+  return vrsqrte_u32(a);
+}
+
+// CHECK: test_vrsqrteq_f32
+// CHECK: vrsqrte.f32 q{{[0-9]+}}, q{{[0-9]+}}
+float32x4_t test_vrsqrteq_f32(float32x4_t a) {
+  return vrsqrteq_f32(a);
+}
+
+// CHECK: test_vrsqrteq_u32
+// CHECK: vrsqrte.u32 q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vrsqrteq_u32(uint32x4_t a) {
+  return vrsqrteq_u32(a);
+}
+
+
+// CHECK: test_vrsqrts_f32
+// CHECK: vrsqrts.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+float32x2_t test_vrsqrts_f32(float32x2_t a, float32x2_t b) {
+  return vrsqrts_f32(a, b);
+}
+
+// CHECK: test_vrsqrtsq_f32
+// CHECK: vrsqrts.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+float32x4_t test_vrsqrtsq_f32(float32x4_t a, float32x4_t b) {
+  return vrsqrtsq_f32(a, b);
+}
+
+
+// CHECK: test_vrsra_n_s8
+// CHECK: vrsra.s8 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) {
+  return vrsra_n_s8(a, b, 1);
+}
+
+// CHECK: test_vrsra_n_s16
+// CHECK: vrsra.s16 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) {
+  return vrsra_n_s16(a, b, 1);
+}
+
+// CHECK: test_vrsra_n_s32
+// CHECK: vrsra.s32 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) {
+  return vrsra_n_s32(a, b, 1);
+}
+
+// CHECK: test_vrsra_n_s64
+// CHECK: vrsra.s64 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) {
+  return vrsra_n_s64(a, b, 1);
+}
+
+// CHECK: test_vrsra_n_u8
+// CHECK: vrsra.u8 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint8x8_t test_vrsra_n_u8(uint8x8_t a, uint8x8_t b) {
+  return vrsra_n_u8(a, b, 1);
+}
+
+// CHECK: test_vrsra_n_u16
+// CHECK: vrsra.u16 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint16x4_t test_vrsra_n_u16(uint16x4_t a, uint16x4_t b) {
+  return vrsra_n_u16(a, b, 1);
+}
+
+// CHECK: test_vrsra_n_u32
+// CHECK: vrsra.u32 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint32x2_t test_vrsra_n_u32(uint32x2_t a, uint32x2_t b) {
+  return vrsra_n_u32(a, b, 1);
+}
+
+// CHECK: test_vrsra_n_u64
+// CHECK: vrsra.u64 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) {
+  return vrsra_n_u64(a, b, 1);
+}
+
+// CHECK: test_vrsraq_n_s8
+// CHECK: vrsra.s8 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) {
+  return vrsraq_n_s8(a, b, 1);
+}
+
+// CHECK: test_vrsraq_n_s16
+// CHECK: vrsra.s16 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) {
+  return vrsraq_n_s16(a, b, 1);
+}
+
+// CHECK: test_vrsraq_n_s32
+// CHECK: vrsra.s32 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) {
+  return vrsraq_n_s32(a, b, 1);
+}
+
+// CHECK: test_vrsraq_n_s64
+// CHECK: vrsra.s64 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) {
+  return vrsraq_n_s64(a, b, 1);
+}
+
+// CHECK: test_vrsraq_n_u8
+// CHECK: vrsra.u8 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint8x16_t test_vrsraq_n_u8(uint8x16_t a, uint8x16_t b) {
+  return vrsraq_n_u8(a, b, 1);
+}
+
+// CHECK: test_vrsraq_n_u16
+// CHECK: vrsra.u16 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint16x8_t test_vrsraq_n_u16(uint16x8_t a, uint16x8_t b) {
+  return vrsraq_n_u16(a, b, 1);
+}
+
+// CHECK: test_vrsraq_n_u32
+// CHECK: vrsra.u32 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint32x4_t test_vrsraq_n_u32(uint32x4_t a, uint32x4_t b) {
+  return vrsraq_n_u32(a, b, 1);
+}
+
+// CHECK: test_vrsraq_n_u64
+// CHECK: vrsra.u64 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint64x2_t test_vrsraq_n_u64(uint64x2_t a, uint64x2_t b) {
+  return vrsraq_n_u64(a, b, 1);
+}
+
+
+// CHECK: test_vrsubhn_s16
+// CHECK: vrsubhn.i16 d{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) {
+  return vrsubhn_s16(a, b);
+}
+
+// CHECK: test_vrsubhn_s32
+// CHECK: vrsubhn.i32 d{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) {
+  return vrsubhn_s32(a, b);
+}
+
+// CHECK: test_vrsubhn_s64
+// CHECK: vrsubhn.i64 d{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) {
+  return vrsubhn_s64(a, b);
+}
+
+// CHECK: test_vrsubhn_u16
+// CHECK: vrsubhn.i16 d{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) {
+  return vrsubhn_u16(a, b);
+}
+
+// CHECK: test_vrsubhn_u32
+// CHECK: vrsubhn.i32 d{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) {
+  return vrsubhn_u32(a, b);
+}
+
+// CHECK: test_vrsubhn_u64
+// CHECK: vrsubhn.i64 d{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x2_t test_vrsubhn_u64(uint64x2_t a, uint64x2_t b) {
+  return vrsubhn_u64(a, b);
+}
+
+
+// CHECK: test_vset_lane_u8
+// CHECK: vmov 
+uint8x8_t test_vset_lane_u8(uint8_t a, uint8x8_t b) {
+  return vset_lane_u8(a, b, 7);
+}
+
+// CHECK: test_vset_lane_u16
+// CHECK: vmov 
+uint16x4_t test_vset_lane_u16(uint16_t a, uint16x4_t b) {
+  return vset_lane_u16(a, b, 3);
+}
+
+// CHECK: test_vset_lane_u32
+// CHECK: vmov 
+uint32x2_t test_vset_lane_u32(uint32_t a, uint32x2_t b) {
+  return vset_lane_u32(a, b, 1);
+}
+
+// CHECK: test_vset_lane_s8
+// CHECK: vmov 
+int8x8_t test_vset_lane_s8(int8_t a, int8x8_t b) {
+  return vset_lane_s8(a, b, 7);
+}
+
+// CHECK: test_vset_lane_s16
+// CHECK: vmov 
+int16x4_t test_vset_lane_s16(int16_t a, int16x4_t b) {
+  return vset_lane_s16(a, b, 3);
+}
+
+// CHECK: test_vset_lane_s32
+// CHECK: vmov 
+int32x2_t test_vset_lane_s32(int32_t a, int32x2_t b) {
+  return vset_lane_s32(a, b, 1);
+}
+
+// CHECK: test_vset_lane_p8
+// CHECK: vmov 
+poly8x8_t test_vset_lane_p8(poly8_t a, poly8x8_t b) {
+  return vset_lane_p8(a, b, 7);
+}
+
+// CHECK: test_vset_lane_p16
+// CHECK: vmov 
+poly16x4_t test_vset_lane_p16(poly16_t a, poly16x4_t b) {
+  return vset_lane_p16(a, b, 3);
+}
+
+// CHECK: test_vset_lane_f32
+// CHECK: vmov 
+float32x2_t test_vset_lane_f32(float32_t a, float32x2_t b) {
+  return vset_lane_f32(a, b, 1);
+}
+
+// CHECK: test_vsetq_lane_u8
+// CHECK: vmov 
+uint8x16_t test_vsetq_lane_u8(uint8_t a, uint8x16_t b) {
+  return vsetq_lane_u8(a, b, 15);
+}
+
+// CHECK: test_vsetq_lane_u16
+// CHECK: vmov 
+uint16x8_t test_vsetq_lane_u16(uint16_t a, uint16x8_t b) {
+  return vsetq_lane_u16(a, b, 7);
+}
+
+// CHECK: test_vsetq_lane_u32
+// CHECK: vmov 
+uint32x4_t test_vsetq_lane_u32(uint32_t a, uint32x4_t b) {
+  return vsetq_lane_u32(a, b, 3);
+}
+
+// CHECK: test_vsetq_lane_s8
+// CHECK: vmov 
+int8x16_t test_vsetq_lane_s8(int8_t a, int8x16_t b) {
+  return vsetq_lane_s8(a, b, 15);
+}
+
+// CHECK: test_vsetq_lane_s16
+// CHECK: vmov 
+int16x8_t test_vsetq_lane_s16(int16_t a, int16x8_t b) {
+  return vsetq_lane_s16(a, b, 7);
+}
+
+// CHECK: test_vsetq_lane_s32
+// CHECK: vmov 
+int32x4_t test_vsetq_lane_s32(int32_t a, int32x4_t b) {
+  return vsetq_lane_s32(a, b, 3);
+}
+
+// CHECK: test_vsetq_lane_p8
+// CHECK: vmov 
+poly8x16_t test_vsetq_lane_p8(poly8_t a, poly8x16_t b) {
+  return vsetq_lane_p8(a, b, 15);
+}
+
+// CHECK: test_vsetq_lane_p16
+// CHECK: vmov 
+poly16x8_t test_vsetq_lane_p16(poly16_t a, poly16x8_t b) {
+  return vsetq_lane_p16(a, b, 7);
+}
+
+// CHECK: test_vsetq_lane_f32
+// CHECK: vmov 
+float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) {
+  return vsetq_lane_f32(a, b, 3);
+}
+
+// CHECK: test_vset_lane_s64
+// CHECK: vmov 
+int64x1_t test_vset_lane_s64(int64_t a, int64x1_t b) {
+  return vset_lane_s64(a, b, 0);
+}
+
+// CHECK: test_vset_lane_u64
+// CHECK: vmov 
+uint64x1_t test_vset_lane_u64(uint64_t a, uint64x1_t b) {
+  return vset_lane_u64(a, b, 0);
+}
+
+// CHECK: test_vsetq_lane_s64
+// CHECK: vmov 
+int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b) {
+  return vsetq_lane_s64(a, b, 1);
+}
+
+// CHECK: test_vsetq_lane_u64
+// CHECK: vmov 
+uint64x2_t test_vsetq_lane_u64(uint64_t a, uint64x2_t b) {
+  return vsetq_lane_u64(a, b, 1);
+}
+
+
+// CHECK: test_vshl_s8
+// CHECK: vshl.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vshl_s8(int8x8_t a, int8x8_t b) {
+  return vshl_s8(a, b);
+}
+
+// CHECK: test_vshl_s16
+// CHECK: vshl.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vshl_s16(int16x4_t a, int16x4_t b) {
+  return vshl_s16(a, b);
+}
+
+// CHECK: test_vshl_s32
+// CHECK: vshl.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vshl_s32(int32x2_t a, int32x2_t b) {
+  return vshl_s32(a, b);
+}
+
+// CHECK: test_vshl_s64
+// CHECK: vshl.s64 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int64x1_t test_vshl_s64(int64x1_t a, int64x1_t b) {
+  return vshl_s64(a, b);
+}
+
+// CHECK: test_vshl_u8
+// CHECK: vshl.u8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vshl_u8(uint8x8_t a, int8x8_t b) {
+  return vshl_u8(a, b);
+}
+
+// CHECK: test_vshl_u16
+// CHECK: vshl.u16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vshl_u16(uint16x4_t a, int16x4_t b) {
+  return vshl_u16(a, b);
+}
+
+// CHECK: test_vshl_u32
+// CHECK: vshl.u32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vshl_u32(uint32x2_t a, int32x2_t b) {
+  return vshl_u32(a, b);
+}
+
+// CHECK: test_vshl_u64
+// CHECK: vshl.u64 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint64x1_t test_vshl_u64(uint64x1_t a, int64x1_t b) {
+  return vshl_u64(a, b);
+}
+
+// CHECK: test_vshlq_s8
+// CHECK: vshl.s8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) {
+  return vshlq_s8(a, b);
+}
+
+// CHECK: test_vshlq_s16
+// CHECK: vshl.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) {
+  return vshlq_s16(a, b);
+}
+
+// CHECK: test_vshlq_s32
+// CHECK: vshl.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) {
+  return vshlq_s32(a, b);
+}
+
+// CHECK: test_vshlq_s64
+// CHECK: vshl.s64 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) {
+  return vshlq_s64(a, b);
+}
+
+// CHECK: test_vshlq_u8
+// CHECK: vshl.u8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) {
+  return vshlq_u8(a, b);
+}
+
+// CHECK: test_vshlq_u16
+// CHECK: vshl.u16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) {
+  return vshlq_u16(a, b);
+}
+
+// CHECK: test_vshlq_u32
+// CHECK: vshl.u32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) {
+  return vshlq_u32(a, b);
+}
+
+// CHECK: test_vshlq_u64
+// CHECK: vshl.u64 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) {
+  return vshlq_u64(a, b);
+}
+
+
+// CHECK: test_vshll_n_s8
+// CHECK: vshll.s8 q{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int16x8_t test_vshll_n_s8(int8x8_t a) {
+  return vshll_n_s8(a, 1);
+}
+
+// CHECK: test_vshll_n_s16
+// CHECK: vshll.s16 q{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int32x4_t test_vshll_n_s16(int16x4_t a) {
+  return vshll_n_s16(a, 1);
+}
+
+// CHECK: test_vshll_n_s32
+// CHECK: vshll.s32 q{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int64x2_t test_vshll_n_s32(int32x2_t a) {
+  return vshll_n_s32(a, 1);
+}
+
+// CHECK: test_vshll_n_u8
+// CHECK: vshll.u8 q{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint16x8_t test_vshll_n_u8(uint8x8_t a) {
+  return vshll_n_u8(a, 1);
+}
+
+// CHECK: test_vshll_n_u16
+// CHECK: vshll.u16 q{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint32x4_t test_vshll_n_u16(uint16x4_t a) {
+  return vshll_n_u16(a, 1);
+}
+
+// CHECK: test_vshll_n_u32
+// CHECK: vshll.u32 q{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint64x2_t test_vshll_n_u32(uint32x2_t a) {
+  return vshll_n_u32(a, 1);
+}
+
+
+// CHECK: test_vshl_n_s8
+// CHECK: vshl.i8 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int8x8_t test_vshl_n_s8(int8x8_t a) {
+  return vshl_n_s8(a, 1);
+}
+
+// CHECK: test_vshl_n_s16
+// CHECK: vshl.i16 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int16x4_t test_vshl_n_s16(int16x4_t a) {
+  return vshl_n_s16(a, 1);
+}
+
+// CHECK: test_vshl_n_s32
+// CHECK: vshl.i32 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int32x2_t test_vshl_n_s32(int32x2_t a) {
+  return vshl_n_s32(a, 1);
+}
+
+// CHECK: test_vshl_n_s64
+// CHECK: vshl.i64 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int64x1_t test_vshl_n_s64(int64x1_t a) {
+  return vshl_n_s64(a, 1);
+}
+
+// CHECK: test_vshl_n_u8
+// CHECK: vshl.i8 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint8x8_t test_vshl_n_u8(uint8x8_t a) {
+  return vshl_n_u8(a, 1);
+}
+
+// CHECK: test_vshl_n_u16
+// CHECK: vshl.i16 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint16x4_t test_vshl_n_u16(uint16x4_t a) {
+  return vshl_n_u16(a, 1);
+}
+
+// CHECK: test_vshl_n_u32
+// CHECK: vshl.i32 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint32x2_t test_vshl_n_u32(uint32x2_t a) {
+  return vshl_n_u32(a, 1);
+}
+
+// CHECK: test_vshl_n_u64
+// CHECK: vshl.i64 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint64x1_t test_vshl_n_u64(uint64x1_t a) {
+  return vshl_n_u64(a, 1);
+}
+
+// CHECK: test_vshlq_n_s8
+// CHECK: vshl.i8 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int8x16_t test_vshlq_n_s8(int8x16_t a) {
+  return vshlq_n_s8(a, 1);
+}
+
+// CHECK: test_vshlq_n_s16
+// CHECK: vshl.i16 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int16x8_t test_vshlq_n_s16(int16x8_t a) {
+  return vshlq_n_s16(a, 1);
+}
+
+// CHECK: test_vshlq_n_s32
+// CHECK: vshl.i32 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int32x4_t test_vshlq_n_s32(int32x4_t a) {
+  return vshlq_n_s32(a, 1);
+}
+
+// CHECK: test_vshlq_n_s64
+// CHECK: vshl.i64 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int64x2_t test_vshlq_n_s64(int64x2_t a) {
+  return vshlq_n_s64(a, 1);
+}
+
+// CHECK: test_vshlq_n_u8
+// CHECK: vshl.i8 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint8x16_t test_vshlq_n_u8(uint8x16_t a) {
+  return vshlq_n_u8(a, 1);
+}
+
+// CHECK: test_vshlq_n_u16
+// CHECK: vshl.i16 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint16x8_t test_vshlq_n_u16(uint16x8_t a) {
+  return vshlq_n_u16(a, 1);
+}
+
+// CHECK: test_vshlq_n_u32
+// CHECK: vshl.i32 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint32x4_t test_vshlq_n_u32(uint32x4_t a) {
+  return vshlq_n_u32(a, 1);
+}
+
+// CHECK: test_vshlq_n_u64
+// CHECK: vshl.i64 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint64x2_t test_vshlq_n_u64(uint64x2_t a) {
+  return vshlq_n_u64(a, 1);
+}
+
+
+// CHECK: test_vshrn_n_s16
+// CHECK: vshrn.i16 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int8x8_t test_vshrn_n_s16(int16x8_t a) {
+  return vshrn_n_s16(a, 1);
+}
+
+// CHECK: test_vshrn_n_s32
+// CHECK: vshrn.i32 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int16x4_t test_vshrn_n_s32(int32x4_t a) {
+  return vshrn_n_s32(a, 1);
+}
+
+// CHECK: test_vshrn_n_s64
+// CHECK: vshrn.i64 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int32x2_t test_vshrn_n_s64(int64x2_t a) {
+  return vshrn_n_s64(a, 1);
+}
+
+// CHECK: test_vshrn_n_u16
+// CHECK: vshrn.i16 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint8x8_t test_vshrn_n_u16(uint16x8_t a) {
+  return vshrn_n_u16(a, 1);
+}
+
+// CHECK: test_vshrn_n_u32
+// CHECK: vshrn.i32 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint16x4_t test_vshrn_n_u32(uint32x4_t a) {
+  return vshrn_n_u32(a, 1);
+}
+
+// CHECK: test_vshrn_n_u64
+// CHECK: vshrn.i64 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint32x2_t test_vshrn_n_u64(uint64x2_t a) {
+  return vshrn_n_u64(a, 1);
+}
+
+
+// CHECK: test_vshr_n_s8
+// CHECK: vshr.s8 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int8x8_t test_vshr_n_s8(int8x8_t a) {
+  return vshr_n_s8(a, 1);
+}
+
+// CHECK: test_vshr_n_s16
+// CHECK: vshr.s16 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int16x4_t test_vshr_n_s16(int16x4_t a) {
+  return vshr_n_s16(a, 1);
+}
+
+// CHECK: test_vshr_n_s32
+// CHECK: vshr.s32 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int32x2_t test_vshr_n_s32(int32x2_t a) {
+  return vshr_n_s32(a, 1);
+}
+
+// CHECK: test_vshr_n_s64
+// CHECK: vshr.s64 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int64x1_t test_vshr_n_s64(int64x1_t a) {
+  return vshr_n_s64(a, 1);
+}
+
+// CHECK: test_vshr_n_u8
+// CHECK: vshr.u8 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint8x8_t test_vshr_n_u8(uint8x8_t a) {
+  return vshr_n_u8(a, 1);
+}
+
+// CHECK: test_vshr_n_u16
+// CHECK: vshr.u16 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint16x4_t test_vshr_n_u16(uint16x4_t a) {
+  return vshr_n_u16(a, 1);
+}
+
+// CHECK: test_vshr_n_u32
+// CHECK: vshr.u32 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint32x2_t test_vshr_n_u32(uint32x2_t a) {
+  return vshr_n_u32(a, 1);
+}
+
+// CHECK: test_vshr_n_u64
+// CHECK: vshr.u64 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint64x1_t test_vshr_n_u64(uint64x1_t a) {
+  return vshr_n_u64(a, 1);
+}
+
+// CHECK: test_vshrq_n_s8
+// CHECK: vshr.s8 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int8x16_t test_vshrq_n_s8(int8x16_t a) {
+  return vshrq_n_s8(a, 1);
+}
+
+// CHECK: test_vshrq_n_s16
+// CHECK: vshr.s16 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int16x8_t test_vshrq_n_s16(int16x8_t a) {
+  return vshrq_n_s16(a, 1);
+}
+
+// CHECK: test_vshrq_n_s32
+// CHECK: vshr.s32 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int32x4_t test_vshrq_n_s32(int32x4_t a) {
+  return vshrq_n_s32(a, 1);
+}
+
+// CHECK: test_vshrq_n_s64
+// CHECK: vshr.s64 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int64x2_t test_vshrq_n_s64(int64x2_t a) {
+  return vshrq_n_s64(a, 1);
+}
+
+// CHECK: test_vshrq_n_u8
+// CHECK: vshr.u8 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint8x16_t test_vshrq_n_u8(uint8x16_t a) {
+  return vshrq_n_u8(a, 1);
+}
+
+// CHECK: test_vshrq_n_u16
+// CHECK: vshr.u16 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint16x8_t test_vshrq_n_u16(uint16x8_t a) {
+  return vshrq_n_u16(a, 1);
+}
+
+// CHECK: test_vshrq_n_u32
+// CHECK: vshr.u32 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint32x4_t test_vshrq_n_u32(uint32x4_t a) {
+  return vshrq_n_u32(a, 1);
+}
+
+// CHECK: test_vshrq_n_u64
+// CHECK: vshr.u64 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint64x2_t test_vshrq_n_u64(uint64x2_t a) {
+  return vshrq_n_u64(a, 1);
+}
+
+
+// CHECK: test_vsli_n_s8
+// CHECK: vsli.8 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int8x8_t test_vsli_n_s8(int8x8_t a, int8x8_t b) {
+  return vsli_n_s8(a, b, 1);
+}
+
+// CHECK: test_vsli_n_s16
+// CHECK: vsli.16 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int16x4_t test_vsli_n_s16(int16x4_t a, int16x4_t b) {
+  return vsli_n_s16(a, b, 1);
+}
+
+// CHECK: test_vsli_n_s32
+// CHECK: vsli.32 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int32x2_t test_vsli_n_s32(int32x2_t a, int32x2_t b) {
+  return vsli_n_s32(a, b, 1);
+}
+
+// CHECK: test_vsli_n_s64
+// CHECK: vsli.64 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int64x1_t test_vsli_n_s64(int64x1_t a, int64x1_t b) {
+  return vsli_n_s64(a, b, 1);
+}
+
+// CHECK: test_vsli_n_u8
+// CHECK: vsli.8 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint8x8_t test_vsli_n_u8(uint8x8_t a, uint8x8_t b) {
+  return vsli_n_u8(a, b, 1);
+}
+
+// CHECK: test_vsli_n_u16
+// CHECK: vsli.16 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint16x4_t test_vsli_n_u16(uint16x4_t a, uint16x4_t b) {
+  return vsli_n_u16(a, b, 1);
+}
+
+// CHECK: test_vsli_n_u32
+// CHECK: vsli.32 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint32x2_t test_vsli_n_u32(uint32x2_t a, uint32x2_t b) {
+  return vsli_n_u32(a, b, 1);
+}
+
+// CHECK: test_vsli_n_u64
+// CHECK: vsli.64 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint64x1_t test_vsli_n_u64(uint64x1_t a, uint64x1_t b) {
+  return vsli_n_u64(a, b, 1);
+}
+
+// CHECK: test_vsli_n_p8
+// CHECK: vsli.8 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+poly8x8_t test_vsli_n_p8(poly8x8_t a, poly8x8_t b) {
+  return vsli_n_p8(a, b, 1);
+}
+
+// CHECK: test_vsli_n_p16
+// CHECK: vsli.16 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+poly16x4_t test_vsli_n_p16(poly16x4_t a, poly16x4_t b) {
+  return vsli_n_p16(a, b, 1);
+}
+
+// CHECK: test_vsliq_n_s8
+// CHECK: vsli.8 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int8x16_t test_vsliq_n_s8(int8x16_t a, int8x16_t b) {
+  return vsliq_n_s8(a, b, 1);
+}
+
+// CHECK: test_vsliq_n_s16
+// CHECK: vsli.16 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) {
+  return vsliq_n_s16(a, b, 1);
+}
+
+// CHECK: test_vsliq_n_s32
+// CHECK: vsli.32 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) {
+  return vsliq_n_s32(a, b, 1);
+}
+
+// CHECK: test_vsliq_n_s64
+// CHECK: vsli.64 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int64x2_t test_vsliq_n_s64(int64x2_t a, int64x2_t b) {
+  return vsliq_n_s64(a, b, 1);
+}
+
+// CHECK: test_vsliq_n_u8
+// CHECK: vsli.8 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint8x16_t test_vsliq_n_u8(uint8x16_t a, uint8x16_t b) {
+  return vsliq_n_u8(a, b, 1);
+}
+
+// CHECK: test_vsliq_n_u16
+// CHECK: vsli.16 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) {
+  return vsliq_n_u16(a, b, 1);
+}
+
+// CHECK: test_vsliq_n_u32
+// CHECK: vsli.32 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) {
+  return vsliq_n_u32(a, b, 1);
+}
+
+// CHECK: test_vsliq_n_u64
+// CHECK: vsli.64 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint64x2_t test_vsliq_n_u64(uint64x2_t a, uint64x2_t b) {
+  return vsliq_n_u64(a, b, 1);
+}
+
+// CHECK: test_vsliq_n_p8
+// CHECK: vsli.8 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+poly8x16_t test_vsliq_n_p8(poly8x16_t a, poly8x16_t b) {
+  return vsliq_n_p8(a, b, 1);
+}
+
+// CHECK: test_vsliq_n_p16
+// CHECK: vsli.16 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+poly16x8_t test_vsliq_n_p16(poly16x8_t a, poly16x8_t b) {
+  return vsliq_n_p16(a, b, 1);
+}
+
+
+// CHECK: test_vsra_n_s8
+// CHECK: vsra.s8 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) {
+  return vsra_n_s8(a, b, 1);
+}
+
+// CHECK: test_vsra_n_s16
+// CHECK: vsra.s16 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) {
+  return vsra_n_s16(a, b, 1);
+}
+
+// CHECK: test_vsra_n_s32
+// CHECK: vsra.s32 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) {
+  return vsra_n_s32(a, b, 1);
+}
+
+// CHECK: test_vsra_n_s64
+// CHECK: vsra.s64 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int64x1_t test_vsra_n_s64(int64x1_t a, int64x1_t b) {
+  return vsra_n_s64(a, b, 1);
+}
+
+// CHECK: test_vsra_n_u8
+// CHECK: vsra.u8 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint8x8_t test_vsra_n_u8(uint8x8_t a, uint8x8_t b) {
+  return vsra_n_u8(a, b, 1);
+}
+
+// CHECK: test_vsra_n_u16
+// CHECK: vsra.u16 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint16x4_t test_vsra_n_u16(uint16x4_t a, uint16x4_t b) {
+  return vsra_n_u16(a, b, 1);
+}
+
+// CHECK: test_vsra_n_u32
+// CHECK: vsra.u32 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint32x2_t test_vsra_n_u32(uint32x2_t a, uint32x2_t b) {
+  return vsra_n_u32(a, b, 1);
+}
+
+// CHECK: test_vsra_n_u64
+// CHECK: vsra.u64 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint64x1_t test_vsra_n_u64(uint64x1_t a, uint64x1_t b) {
+  return vsra_n_u64(a, b, 1);
+}
+
+// CHECK: test_vsraq_n_s8
+// CHECK: vsra.s8 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) {
+  return vsraq_n_s8(a, b, 1);
+}
+
+// CHECK: test_vsraq_n_s16
+// CHECK: vsra.s16 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) {
+  return vsraq_n_s16(a, b, 1);
+}
+
+// CHECK: test_vsraq_n_s32
+// CHECK: vsra.s32 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) {
+  return vsraq_n_s32(a, b, 1);
+}
+
+// CHECK: test_vsraq_n_s64
+// CHECK: vsra.s64 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) {
+  return vsraq_n_s64(a, b, 1);
+}
+
+// CHECK: test_vsraq_n_u8
+// CHECK: vsra.u8 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint8x16_t test_vsraq_n_u8(uint8x16_t a, uint8x16_t b) {
+  return vsraq_n_u8(a, b, 1);
+}
+
+// CHECK: test_vsraq_n_u16
+// CHECK: vsra.u16 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint16x8_t test_vsraq_n_u16(uint16x8_t a, uint16x8_t b) {
+  return vsraq_n_u16(a, b, 1);
+}
+
+// CHECK: test_vsraq_n_u32
+// CHECK: vsra.u32 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint32x4_t test_vsraq_n_u32(uint32x4_t a, uint32x4_t b) {
+  return vsraq_n_u32(a, b, 1);
+}
+
+// CHECK: test_vsraq_n_u64
+// CHECK: vsra.u64 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint64x2_t test_vsraq_n_u64(uint64x2_t a, uint64x2_t b) {
+  return vsraq_n_u64(a, b, 1);
+}
+
+
+// CHECK: test_vsri_n_s8
+// CHECK: vsri.8 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int8x8_t test_vsri_n_s8(int8x8_t a, int8x8_t b) {
+  return vsri_n_s8(a, b, 1);
+}
+
+// CHECK: test_vsri_n_s16
+// CHECK: vsri.16 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int16x4_t test_vsri_n_s16(int16x4_t a, int16x4_t b) {
+  return vsri_n_s16(a, b, 1);
+}
+
+// CHECK: test_vsri_n_s32
+// CHECK: vsri.32 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int32x2_t test_vsri_n_s32(int32x2_t a, int32x2_t b) {
+  return vsri_n_s32(a, b, 1);
+}
+
+// CHECK: test_vsri_n_s64
+// CHECK: vsri.64 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+int64x1_t test_vsri_n_s64(int64x1_t a, int64x1_t b) {
+  return vsri_n_s64(a, b, 1);
+}
+
+// CHECK: test_vsri_n_u8
+// CHECK: vsri.8 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint8x8_t test_vsri_n_u8(uint8x8_t a, uint8x8_t b) {
+  return vsri_n_u8(a, b, 1);
+}
+
+// CHECK: test_vsri_n_u16
+// CHECK: vsri.16 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint16x4_t test_vsri_n_u16(uint16x4_t a, uint16x4_t b) {
+  return vsri_n_u16(a, b, 1);
+}
+
+// CHECK: test_vsri_n_u32
+// CHECK: vsri.32 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint32x2_t test_vsri_n_u32(uint32x2_t a, uint32x2_t b) {
+  return vsri_n_u32(a, b, 1);
+}
+
+// CHECK: test_vsri_n_u64
+// CHECK: vsri.64 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+uint64x1_t test_vsri_n_u64(uint64x1_t a, uint64x1_t b) {
+  return vsri_n_u64(a, b, 1);
+}
+
+// CHECK: test_vsri_n_p8
+// CHECK: vsri.8 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+poly8x8_t test_vsri_n_p8(poly8x8_t a, poly8x8_t b) {
+  return vsri_n_p8(a, b, 1);
+}
+
+// CHECK: test_vsri_n_p16
+// CHECK: vsri.16 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+poly16x4_t test_vsri_n_p16(poly16x4_t a, poly16x4_t b) {
+  return vsri_n_p16(a, b, 1);
+}
+
+// CHECK: test_vsriq_n_s8
+// CHECK: vsri.8 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int8x16_t test_vsriq_n_s8(int8x16_t a, int8x16_t b) {
+  return vsriq_n_s8(a, b, 1);
+}
+
+// CHECK: test_vsriq_n_s16
+// CHECK: vsri.16 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) {
+  return vsriq_n_s16(a, b, 1);
+}
+
+// CHECK: test_vsriq_n_s32
+// CHECK: vsri.32 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) {
+  return vsriq_n_s32(a, b, 1);
+}
+
+// CHECK: test_vsriq_n_s64
+// CHECK: vsri.64 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+int64x2_t test_vsriq_n_s64(int64x2_t a, int64x2_t b) {
+  return vsriq_n_s64(a, b, 1);
+}
+
+// CHECK: test_vsriq_n_u8
+// CHECK: vsri.8 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint8x16_t test_vsriq_n_u8(uint8x16_t a, uint8x16_t b) {
+  return vsriq_n_u8(a, b, 1);
+}
+
+// CHECK: test_vsriq_n_u16
+// CHECK: vsri.16 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint16x8_t test_vsriq_n_u16(uint16x8_t a, uint16x8_t b) {
+  return vsriq_n_u16(a, b, 1);
+}
+
+// CHECK: test_vsriq_n_u32
+// CHECK: vsri.32 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint32x4_t test_vsriq_n_u32(uint32x4_t a, uint32x4_t b) {
+  return vsriq_n_u32(a, b, 1);
+}
+
+// CHECK: test_vsriq_n_u64
+// CHECK: vsri.64 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+uint64x2_t test_vsriq_n_u64(uint64x2_t a, uint64x2_t b) {
+  return vsriq_n_u64(a, b, 1);
+}
+
+// CHECK: test_vsriq_n_p8
+// CHECK: vsri.8 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+poly8x16_t test_vsriq_n_p8(poly8x16_t a, poly8x16_t b) {
+  return vsriq_n_p8(a, b, 1);
+}
+
+// CHECK: test_vsriq_n_p16
+// CHECK: vsri.16 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+poly16x8_t test_vsriq_n_p16(poly16x8_t a, poly16x8_t b) {
+  return vsriq_n_p16(a, b, 1);
+}
+
+
+// CHECK: test_vst1q_u8
+// CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst1q_u8(uint8_t * a, uint8x16_t b) {
+  vst1q_u8(a, b);
+}
+
+// CHECK: test_vst1q_u16
+// CHECK: vst1.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst1q_u16(uint16_t * a, uint16x8_t b) {
+  vst1q_u16(a, b);
+}
+
+// CHECK: test_vst1q_u32
+// CHECK: vst1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst1q_u32(uint32_t * a, uint32x4_t b) {
+  vst1q_u32(a, b);
+}
+
+// CHECK: test_vst1q_u64
+// CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst1q_u64(uint64_t * a, uint64x2_t b) {
+  vst1q_u64(a, b);
+}
+
+// CHECK: test_vst1q_s8
+// CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst1q_s8(int8_t * a, int8x16_t b) {
+  vst1q_s8(a, b);
+}
+
+// CHECK: test_vst1q_s16
+// CHECK: vst1.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst1q_s16(int16_t * a, int16x8_t b) {
+  vst1q_s16(a, b);
+}
+
+// CHECK: test_vst1q_s32
+// CHECK: vst1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst1q_s32(int32_t * a, int32x4_t b) {
+  vst1q_s32(a, b);
+}
+
+// CHECK: test_vst1q_s64
+// CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst1q_s64(int64_t * a, int64x2_t b) {
+  vst1q_s64(a, b);
+}
+
+// CHECK: test_vst1q_f16
+// CHECK: vst1.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst1q_f16(float16_t * a, float16x8_t b) {
+  vst1q_f16(a, b);
+}
+
+// CHECK: test_vst1q_f32
+// CHECK: vst1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst1q_f32(float32_t * a, float32x4_t b) {
+  vst1q_f32(a, b);
+}
+
+// CHECK: test_vst1q_p8
+// CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst1q_p8(poly8_t * a, poly8x16_t b) {
+  vst1q_p8(a, b);
+}
+
+// CHECK: test_vst1q_p16
+// CHECK: vst1.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst1q_p16(poly16_t * a, poly16x8_t b) {
+  vst1q_p16(a, b);
+}
+
+// CHECK: test_vst1_u8
+// CHECK: vst1.8 {d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst1_u8(uint8_t * a, uint8x8_t b) {
+  vst1_u8(a, b);
+}
+
+// CHECK: test_vst1_u16
+// CHECK: vst1.16 {d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst1_u16(uint16_t * a, uint16x4_t b) {
+  vst1_u16(a, b);
+}
+
+// CHECK: test_vst1_u32
+// CHECK: vst1.32 {d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst1_u32(uint32_t * a, uint32x2_t b) {
+  vst1_u32(a, b);
+}
+
+// CHECK: test_vst1_u64
+// CHECK: vst1.64 {d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst1_u64(uint64_t * a, uint64x1_t b) {
+  vst1_u64(a, b);
+}
+
+// CHECK: test_vst1_s8
+// CHECK: vst1.8 {d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst1_s8(int8_t * a, int8x8_t b) {
+  vst1_s8(a, b);
+}
+
+// CHECK: test_vst1_s16
+// CHECK: vst1.16 {d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst1_s16(int16_t * a, int16x4_t b) {
+  vst1_s16(a, b);
+}
+
+// CHECK: test_vst1_s32
+// CHECK: vst1.32 {d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst1_s32(int32_t * a, int32x2_t b) {
+  vst1_s32(a, b);
+}
+
+// CHECK: test_vst1_s64
+// CHECK: vst1.64 {d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst1_s64(int64_t * a, int64x1_t b) {
+  vst1_s64(a, b);
+}
+
+// CHECK: test_vst1_f16
+// CHECK: vst1.16 {d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst1_f16(float16_t * a, float16x4_t b) {
+  vst1_f16(a, b);
+}
+
+// CHECK: test_vst1_f32
+// CHECK: vst1.32 {d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst1_f32(float32_t * a, float32x2_t b) {
+  vst1_f32(a, b);
+}
+
+// CHECK: test_vst1_p8
+// CHECK: vst1.8 {d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst1_p8(poly8_t * a, poly8x8_t b) {
+  vst1_p8(a, b);
+}
+
+// CHECK: test_vst1_p16
+// CHECK: vst1.16 {d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst1_p16(poly16_t * a, poly16x4_t b) {
+  vst1_p16(a, b);
+}
+
+
+// CHECK: test_vst1q_lane_u8
+// CHECK: vst1.8 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst1q_lane_u8(uint8_t * a, uint8x16_t b) {
+  vst1q_lane_u8(a, b, 15);
+}
+
+// CHECK: test_vst1q_lane_u16
+// CHECK: vst1.16 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:16]
+void test_vst1q_lane_u16(uint16_t * a, uint16x8_t b) {
+  vst1q_lane_u16(a, b, 7);
+}
+
+// CHECK: test_vst1q_lane_u32
+// CHECK: vst1.32 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:32]
+void test_vst1q_lane_u32(uint32_t * a, uint32x4_t b) {
+  vst1q_lane_u32(a, b, 3);
+}
+
+// CHECK: test_vst1q_lane_u64
+// CHECK: {{str|vstr|vmov}}
+void test_vst1q_lane_u64(uint64_t * a, uint64x2_t b) {
+  vst1q_lane_u64(a, b, 1);
+}
+
+// CHECK: test_vst1q_lane_s8
+// CHECK: vst1.8 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst1q_lane_s8(int8_t * a, int8x16_t b) {
+  vst1q_lane_s8(a, b, 15);
+}
+
+// CHECK: test_vst1q_lane_s16
+// CHECK: vst1.16 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:16]
+void test_vst1q_lane_s16(int16_t * a, int16x8_t b) {
+  vst1q_lane_s16(a, b, 7);
+}
+
+// CHECK: test_vst1q_lane_s32
+// CHECK: vst1.32 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:32]
+void test_vst1q_lane_s32(int32_t * a, int32x4_t b) {
+  vst1q_lane_s32(a, b, 3);
+}
+
+// CHECK: test_vst1q_lane_s64
+// CHECK: {{str|vstr|vmov}}
+void test_vst1q_lane_s64(int64_t * a, int64x2_t b) {
+  vst1q_lane_s64(a, b, 1);
+}
+
+// CHECK: test_vst1q_lane_f16
+// CHECK: vst1.16 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:16]
+void test_vst1q_lane_f16(float16_t * a, float16x8_t b) {
+  vst1q_lane_f16(a, b, 7);
+}
+
+// CHECK: test_vst1q_lane_f32
+// CHECK: vst1.32 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:32]
+void test_vst1q_lane_f32(float32_t * a, float32x4_t b) {
+  vst1q_lane_f32(a, b, 3);
+}
+
+// CHECK: test_vst1q_lane_p8
+// CHECK: vst1.8 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst1q_lane_p8(poly8_t * a, poly8x16_t b) {
+  vst1q_lane_p8(a, b, 15);
+}
+
+// CHECK: test_vst1q_lane_p16
+// CHECK: vst1.16 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:16]
+void test_vst1q_lane_p16(poly16_t * a, poly16x8_t b) {
+  vst1q_lane_p16(a, b, 7);
+}
+
+// CHECK: test_vst1_lane_u8
+// CHECK: vst1.8 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst1_lane_u8(uint8_t * a, uint8x8_t b) {
+  vst1_lane_u8(a, b, 7);
+}
+
+// CHECK: test_vst1_lane_u16
+// CHECK: vst1.16 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:16]
+void test_vst1_lane_u16(uint16_t * a, uint16x4_t b) {
+  vst1_lane_u16(a, b, 3);
+}
+
+// CHECK: test_vst1_lane_u32
+// CHECK: vst1.32 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:32]
+void test_vst1_lane_u32(uint32_t * a, uint32x2_t b) {
+  vst1_lane_u32(a, b, 1);
+}
+
+// CHECK: test_vst1_lane_u64
+// CHECK: {{str|vstr|vmov}}
+void test_vst1_lane_u64(uint64_t * a, uint64x1_t b) {
+  vst1_lane_u64(a, b, 0);
+}
+
+// CHECK: test_vst1_lane_s8
+// CHECK: vst1.8 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst1_lane_s8(int8_t * a, int8x8_t b) {
+  vst1_lane_s8(a, b, 7);
+}
+
+// CHECK: test_vst1_lane_s16
+// CHECK: vst1.16 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:16]
+void test_vst1_lane_s16(int16_t * a, int16x4_t b) {
+  vst1_lane_s16(a, b, 3);
+}
+
+// CHECK: test_vst1_lane_s32
+// CHECK: vst1.32 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:32]
+void test_vst1_lane_s32(int32_t * a, int32x2_t b) {
+  vst1_lane_s32(a, b, 1);
+}
+
+// CHECK: test_vst1_lane_s64
+// CHECK: {{str|vstr|vmov}}
+void test_vst1_lane_s64(int64_t * a, int64x1_t b) {
+  vst1_lane_s64(a, b, 0);
+}
+
+// CHECK: test_vst1_lane_f16
+// CHECK: vst1.16 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:16]
+void test_vst1_lane_f16(float16_t * a, float16x4_t b) {
+  vst1_lane_f16(a, b, 3);
+}
+
+// CHECK: test_vst1_lane_f32
+// CHECK: vst1.32 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:32]
+void test_vst1_lane_f32(float32_t * a, float32x2_t b) {
+  vst1_lane_f32(a, b, 1);
+}
+
+// CHECK: test_vst1_lane_p8
+// CHECK: vst1.8 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst1_lane_p8(poly8_t * a, poly8x8_t b) {
+  vst1_lane_p8(a, b, 7);
+}
+
+// CHECK: test_vst1_lane_p16
+// CHECK: vst1.16 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:16]
+void test_vst1_lane_p16(poly16_t * a, poly16x4_t b) {
+  vst1_lane_p16(a, b, 3);
+}
+
+
+// CHECK: test_vst2q_u8
+// CHECK: vst2.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst2q_u8(uint8_t * a, uint8x16x2_t b) {
+  vst2q_u8(a, b);
+}
+
+// CHECK: test_vst2q_u16
+// CHECK: vst2.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst2q_u16(uint16_t * a, uint16x8x2_t b) {
+  vst2q_u16(a, b);
+}
+
+// CHECK: test_vst2q_u32
+// CHECK: vst2.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst2q_u32(uint32_t * a, uint32x4x2_t b) {
+  vst2q_u32(a, b);
+}
+
+// CHECK: test_vst2q_s8
+// CHECK: vst2.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst2q_s8(int8_t * a, int8x16x2_t b) {
+  vst2q_s8(a, b);
+}
+
+// CHECK: test_vst2q_s16
+// CHECK: vst2.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst2q_s16(int16_t * a, int16x8x2_t b) {
+  vst2q_s16(a, b);
+}
+
+// CHECK: test_vst2q_s32
+// CHECK: vst2.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst2q_s32(int32_t * a, int32x4x2_t b) {
+  vst2q_s32(a, b);
+}
+
+// CHECK: test_vst2q_f16
+// CHECK: vst2.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst2q_f16(float16_t * a, float16x8x2_t b) {
+  vst2q_f16(a, b);
+}
+
+// CHECK: test_vst2q_f32
+// CHECK: vst2.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst2q_f32(float32_t * a, float32x4x2_t b) {
+  vst2q_f32(a, b);
+}
+
+// CHECK: test_vst2q_p8
+// CHECK: vst2.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst2q_p8(poly8_t * a, poly8x16x2_t b) {
+  vst2q_p8(a, b);
+}
+
+// CHECK: test_vst2q_p16
+// CHECK: vst2.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst2q_p16(poly16_t * a, poly16x8x2_t b) {
+  vst2q_p16(a, b);
+}
+
+// CHECK: test_vst2_u8
+// CHECK: vst2.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst2_u8(uint8_t * a, uint8x8x2_t b) {
+  vst2_u8(a, b);
+}
+
+// CHECK: test_vst2_u16
+// CHECK: vst2.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst2_u16(uint16_t * a, uint16x4x2_t b) {
+  vst2_u16(a, b);
+}
+
+// CHECK: test_vst2_u32
+// CHECK: vst2.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst2_u32(uint32_t * a, uint32x2x2_t b) {
+  vst2_u32(a, b);
+}
+
+// CHECK: test_vst2_u64
+// CHECK: vst1.64
+void test_vst2_u64(uint64_t * a, uint64x1x2_t b) {
+  vst2_u64(a, b);
+}
+
+// CHECK: test_vst2_s8
+// CHECK: vst2.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst2_s8(int8_t * a, int8x8x2_t b) {
+  vst2_s8(a, b);
+}
+
+// CHECK: test_vst2_s16
+// CHECK: vst2.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst2_s16(int16_t * a, int16x4x2_t b) {
+  vst2_s16(a, b);
+}
+
+// CHECK: test_vst2_s32
+// CHECK: vst2.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst2_s32(int32_t * a, int32x2x2_t b) {
+  vst2_s32(a, b);
+}
+
+// CHECK: test_vst2_s64
+// CHECK: vst1.64
+void test_vst2_s64(int64_t * a, int64x1x2_t b) {
+  vst2_s64(a, b);
+}
+
+// CHECK: test_vst2_f16
+// CHECK: vst2.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst2_f16(float16_t * a, float16x4x2_t b) {
+  vst2_f16(a, b);
+}
+
+// CHECK: test_vst2_f32
+// CHECK: vst2.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst2_f32(float32_t * a, float32x2x2_t b) {
+  vst2_f32(a, b);
+}
+
+// CHECK: test_vst2_p8
+// CHECK: vst2.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst2_p8(poly8_t * a, poly8x8x2_t b) {
+  vst2_p8(a, b);
+}
+
+// CHECK: test_vst2_p16
+// CHECK: vst2.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst2_p16(poly16_t * a, poly16x4x2_t b) {
+  vst2_p16(a, b);
+}
+
+
+// CHECK: test_vst2q_lane_u16
+// CHECK: vst2.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst2q_lane_u16(uint16_t * a, uint16x8x2_t b) {
+  vst2q_lane_u16(a, b, 7);
+}
+
+// CHECK: test_vst2q_lane_u32
+// CHECK: vst2.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst2q_lane_u32(uint32_t * a, uint32x4x2_t b) {
+  vst2q_lane_u32(a, b, 3);
+}
+
+// CHECK: test_vst2q_lane_s16
+// CHECK: vst2.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst2q_lane_s16(int16_t * a, int16x8x2_t b) {
+  vst2q_lane_s16(a, b, 7);
+}
+
+// CHECK: test_vst2q_lane_s32
+// CHECK: vst2.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst2q_lane_s32(int32_t * a, int32x4x2_t b) {
+  vst2q_lane_s32(a, b, 3);
+}
+
+// CHECK: test_vst2q_lane_f16
+// CHECK: vst2.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst2q_lane_f16(float16_t * a, float16x8x2_t b) {
+  vst2q_lane_f16(a, b, 7);
+}
+
+// CHECK: test_vst2q_lane_f32
+// CHECK: vst2.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst2q_lane_f32(float32_t * a, float32x4x2_t b) {
+  vst2q_lane_f32(a, b, 3);
+}
+
+// CHECK: test_vst2q_lane_p16
+// CHECK: vst2.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst2q_lane_p16(poly16_t * a, poly16x8x2_t b) {
+  vst2q_lane_p16(a, b, 7);
+}
+
+// CHECK: test_vst2_lane_u8
+// CHECK: vst2.8 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst2_lane_u8(uint8_t * a, uint8x8x2_t b) {
+  vst2_lane_u8(a, b, 7);
+}
+
+// CHECK: test_vst2_lane_u16
+// CHECK: vst2.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst2_lane_u16(uint16_t * a, uint16x4x2_t b) {
+  vst2_lane_u16(a, b, 3);
+}
+
+// CHECK: test_vst2_lane_u32
+// CHECK: vst2.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst2_lane_u32(uint32_t * a, uint32x2x2_t b) {
+  vst2_lane_u32(a, b, 1);
+}
+
+// CHECK: test_vst2_lane_s8
+// CHECK: vst2.8 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst2_lane_s8(int8_t * a, int8x8x2_t b) {
+  vst2_lane_s8(a, b, 7);
+}
+
+// CHECK: test_vst2_lane_s16
+// CHECK: vst2.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst2_lane_s16(int16_t * a, int16x4x2_t b) {
+  vst2_lane_s16(a, b, 3);
+}
+
+// CHECK: test_vst2_lane_s32
+// CHECK: vst2.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst2_lane_s32(int32_t * a, int32x2x2_t b) {
+  vst2_lane_s32(a, b, 1);
+}
+
+// CHECK: test_vst2_lane_f16
+// CHECK: vst2.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst2_lane_f16(float16_t * a, float16x4x2_t b) {
+  vst2_lane_f16(a, b, 3);
+}
+
+// CHECK: test_vst2_lane_f32
+// CHECK: vst2.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst2_lane_f32(float32_t * a, float32x2x2_t b) {
+  vst2_lane_f32(a, b, 1);
+}
+
+// CHECK: test_vst2_lane_p8
+// CHECK: vst2.8 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst2_lane_p8(poly8_t * a, poly8x8x2_t b) {
+  vst2_lane_p8(a, b, 7);
+}
+
+// CHECK: test_vst2_lane_p16
+// CHECK: vst2.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst2_lane_p16(poly16_t * a, poly16x4x2_t b) {
+  vst2_lane_p16(a, b, 3);
+}
+
+
+// CHECK: test_vst3q_u8
+// CHECK: vst3.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+void test_vst3q_u8(uint8_t * a, uint8x16x3_t b) {
+  vst3q_u8(a, b);
+}
+
+// CHECK: test_vst3q_u16
+// CHECK: vst3.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+void test_vst3q_u16(uint16_t * a, uint16x8x3_t b) {
+  vst3q_u16(a, b);
+}
+
+// CHECK: test_vst3q_u32
+// CHECK: vst3.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+void test_vst3q_u32(uint32_t * a, uint32x4x3_t b) {
+  vst3q_u32(a, b);
+}
+
+// CHECK: test_vst3q_s8
+// CHECK: vst3.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+void test_vst3q_s8(int8_t * a, int8x16x3_t b) {
+  vst3q_s8(a, b);
+}
+
+// CHECK: test_vst3q_s16
+// CHECK: vst3.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+void test_vst3q_s16(int16_t * a, int16x8x3_t b) {
+  vst3q_s16(a, b);
+}
+
+// CHECK: test_vst3q_s32
+// CHECK: vst3.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+void test_vst3q_s32(int32_t * a, int32x4x3_t b) {
+  vst3q_s32(a, b);
+}
+
+// CHECK: test_vst3q_f16
+// CHECK: vst3.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+void test_vst3q_f16(float16_t * a, float16x8x3_t b) {
+  vst3q_f16(a, b);
+}
+
+// CHECK: test_vst3q_f32
+// CHECK: vst3.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+void test_vst3q_f32(float32_t * a, float32x4x3_t b) {
+  vst3q_f32(a, b);
+}
+
+// CHECK: test_vst3q_p8
+// CHECK: vst3.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+void test_vst3q_p8(poly8_t * a, poly8x16x3_t b) {
+  vst3q_p8(a, b);
+}
+
+// CHECK: test_vst3q_p16
+// CHECK: vst3.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+void test_vst3q_p16(poly16_t * a, poly16x8x3_t b) {
+  vst3q_p16(a, b);
+}
+
+// CHECK: test_vst3_u8
+// CHECK: vst3.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst3_u8(uint8_t * a, uint8x8x3_t b) {
+  vst3_u8(a, b);
+}
+
+// CHECK: test_vst3_u16
+// CHECK: vst3.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst3_u16(uint16_t * a, uint16x4x3_t b) {
+  vst3_u16(a, b);
+}
+
+// CHECK: test_vst3_u32
+// CHECK: vst3.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst3_u32(uint32_t * a, uint32x2x3_t b) {
+  vst3_u32(a, b);
+}
+
+// CHECK: test_vst3_u64
+// CHECK: vst1.64
+void test_vst3_u64(uint64_t * a, uint64x1x3_t b) {
+  vst3_u64(a, b);
+}
+
+// CHECK: test_vst3_s8
+// CHECK: vst3.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst3_s8(int8_t * a, int8x8x3_t b) {
+  vst3_s8(a, b);
+}
+
+// CHECK: test_vst3_s16
+// CHECK: vst3.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst3_s16(int16_t * a, int16x4x3_t b) {
+  vst3_s16(a, b);
+}
+
+// CHECK: test_vst3_s32
+// CHECK: vst3.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst3_s32(int32_t * a, int32x2x3_t b) {
+  vst3_s32(a, b);
+}
+
+// CHECK: test_vst3_s64
+// CHECK: vst1.64
+void test_vst3_s64(int64_t * a, int64x1x3_t b) {
+  vst3_s64(a, b);
+}
+
+// CHECK: test_vst3_f16
+// CHECK: vst3.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst3_f16(float16_t * a, float16x4x3_t b) {
+  vst3_f16(a, b);
+}
+
+// CHECK: test_vst3_f32
+// CHECK: vst3.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst3_f32(float32_t * a, float32x2x3_t b) {
+  vst3_f32(a, b);
+}
+
+// CHECK: test_vst3_p8
+// CHECK: vst3.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst3_p8(poly8_t * a, poly8x8x3_t b) {
+  vst3_p8(a, b);
+}
+
+// CHECK: test_vst3_p16
+// CHECK: vst3.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst3_p16(poly16_t * a, poly16x4x3_t b) {
+  vst3_p16(a, b);
+}
+
+
+// CHECK: test_vst3q_lane_u16
+// CHECK: vst3.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+void test_vst3q_lane_u16(uint16_t * a, uint16x8x3_t b) {
+  vst3q_lane_u16(a, b, 7);
+}
+
+// CHECK: test_vst3q_lane_u32
+// CHECK: vst3.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+void test_vst3q_lane_u32(uint32_t * a, uint32x4x3_t b) {
+  vst3q_lane_u32(a, b, 3);
+}
+
+// CHECK: test_vst3q_lane_s16
+// CHECK: vst3.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+void test_vst3q_lane_s16(int16_t * a, int16x8x3_t b) {
+  vst3q_lane_s16(a, b, 7);
+}
+
+// CHECK: test_vst3q_lane_s32
+// CHECK: vst3.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+void test_vst3q_lane_s32(int32_t * a, int32x4x3_t b) {
+  vst3q_lane_s32(a, b, 3);
+}
+
+// CHECK: test_vst3q_lane_f16
+// CHECK: vst3.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+void test_vst3q_lane_f16(float16_t * a, float16x8x3_t b) {
+  vst3q_lane_f16(a, b, 7);
+}
+
+// CHECK: test_vst3q_lane_f32
+// CHECK: vst3.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+void test_vst3q_lane_f32(float32_t * a, float32x4x3_t b) {
+  vst3q_lane_f32(a, b, 3);
+}
+
+// CHECK: test_vst3q_lane_p16
+// CHECK: vst3.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+void test_vst3q_lane_p16(poly16_t * a, poly16x8x3_t b) {
+  vst3q_lane_p16(a, b, 7);
+}
+
+// CHECK: test_vst3_lane_u8
+// CHECK: vst3.8 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst3_lane_u8(uint8_t * a, uint8x8x3_t b) {
+  vst3_lane_u8(a, b, 7);
+}
+
+// CHECK: test_vst3_lane_u16
+// CHECK: vst3.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst3_lane_u16(uint16_t * a, uint16x4x3_t b) {
+  vst3_lane_u16(a, b, 3);
+}
+
+// CHECK: test_vst3_lane_u32
+// CHECK: vst3.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst3_lane_u32(uint32_t * a, uint32x2x3_t b) {
+  vst3_lane_u32(a, b, 1);
+}
+
+// CHECK: test_vst3_lane_s8
+// CHECK: vst3.8 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst3_lane_s8(int8_t * a, int8x8x3_t b) {
+  vst3_lane_s8(a, b, 7);
+}
+
+// CHECK: test_vst3_lane_s16
+// CHECK: vst3.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst3_lane_s16(int16_t * a, int16x4x3_t b) {
+  vst3_lane_s16(a, b, 3);
+}
+
+// CHECK: test_vst3_lane_s32
+// CHECK: vst3.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst3_lane_s32(int32_t * a, int32x2x3_t b) {
+  vst3_lane_s32(a, b, 1);
+}
+
+// CHECK: test_vst3_lane_f16
+// CHECK: vst3.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst3_lane_f16(float16_t * a, float16x4x3_t b) {
+  vst3_lane_f16(a, b, 3);
+}
+
+// CHECK: test_vst3_lane_f32
+// CHECK: vst3.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst3_lane_f32(float32_t * a, float32x2x3_t b) {
+  vst3_lane_f32(a, b, 1);
+}
+
+// CHECK: test_vst3_lane_p8
+// CHECK: vst3.8 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst3_lane_p8(poly8_t * a, poly8x8x3_t b) {
+  vst3_lane_p8(a, b, 7);
+}
+
+// CHECK: test_vst3_lane_p16
+// CHECK: vst3.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst3_lane_p16(poly16_t * a, poly16x4x3_t b) {
+  vst3_lane_p16(a, b, 3);
+}
+
+
+// CHECK: test_vst4q_u8
+// CHECK: vst4.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+void test_vst4q_u8(uint8_t * a, uint8x16x4_t b) {
+  vst4q_u8(a, b);
+}
+
+// CHECK: test_vst4q_u16
+// CHECK: vst4.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+void test_vst4q_u16(uint16_t * a, uint16x8x4_t b) {
+  vst4q_u16(a, b);
+}
+
+// CHECK: test_vst4q_u32
+// CHECK: vst4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+void test_vst4q_u32(uint32_t * a, uint32x4x4_t b) {
+  vst4q_u32(a, b);
+}
+
+// CHECK: test_vst4q_s8
+// CHECK: vst4.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+void test_vst4q_s8(int8_t * a, int8x16x4_t b) {
+  vst4q_s8(a, b);
+}
+
+// CHECK: test_vst4q_s16
+// CHECK: vst4.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+void test_vst4q_s16(int16_t * a, int16x8x4_t b) {
+  vst4q_s16(a, b);
+}
+
+// CHECK: test_vst4q_s32
+// CHECK: vst4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+void test_vst4q_s32(int32_t * a, int32x4x4_t b) {
+  vst4q_s32(a, b);
+}
+
+// CHECK: test_vst4q_f16
+// CHECK: vst4.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+void test_vst4q_f16(float16_t * a, float16x8x4_t b) {
+  vst4q_f16(a, b);
+}
+
+// CHECK: test_vst4q_f32
+// CHECK: vst4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+void test_vst4q_f32(float32_t * a, float32x4x4_t b) {
+  vst4q_f32(a, b);
+}
+
+// CHECK: test_vst4q_p8
+// CHECK: vst4.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+void test_vst4q_p8(poly8_t * a, poly8x16x4_t b) {
+  vst4q_p8(a, b);
+}
+
+// CHECK: test_vst4q_p16
+// CHECK: vst4.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+void test_vst4q_p16(poly16_t * a, poly16x8x4_t b) {
+  vst4q_p16(a, b);
+}
+
+// CHECK: test_vst4_u8
+// CHECK: vst4.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst4_u8(uint8_t * a, uint8x8x4_t b) {
+  vst4_u8(a, b);
+}
+
+// CHECK: test_vst4_u16
+// CHECK: vst4.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst4_u16(uint16_t * a, uint16x4x4_t b) {
+  vst4_u16(a, b);
+}
+
+// CHECK: test_vst4_u32
+// CHECK: vst4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst4_u32(uint32_t * a, uint32x2x4_t b) {
+  vst4_u32(a, b);
+}
+
+// CHECK: test_vst4_u64
+// CHECK: vst1.64
+void test_vst4_u64(uint64_t * a, uint64x1x4_t b) {
+  vst4_u64(a, b);
+}
+
+// CHECK: test_vst4_s8
+// CHECK: vst4.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst4_s8(int8_t * a, int8x8x4_t b) {
+  vst4_s8(a, b);
+}
+
+// CHECK: test_vst4_s16
+// CHECK: vst4.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst4_s16(int16_t * a, int16x4x4_t b) {
+  vst4_s16(a, b);
+}
+
+// CHECK: test_vst4_s32
+// CHECK: vst4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst4_s32(int32_t * a, int32x2x4_t b) {
+  vst4_s32(a, b);
+}
+
+// CHECK: test_vst4_s64
+// CHECK: vst1.64
+void test_vst4_s64(int64_t * a, int64x1x4_t b) {
+  vst4_s64(a, b);
+}
+
+// CHECK: test_vst4_f16
+// CHECK: vst4.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst4_f16(float16_t * a, float16x4x4_t b) {
+  vst4_f16(a, b);
+}
+
+// CHECK: test_vst4_f32
+// CHECK: vst4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst4_f32(float32_t * a, float32x2x4_t b) {
+  vst4_f32(a, b);
+}
+
+// CHECK: test_vst4_p8
+// CHECK: vst4.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst4_p8(poly8_t * a, poly8x8x4_t b) {
+  vst4_p8(a, b);
+}
+
+// CHECK: test_vst4_p16
+// CHECK: vst4.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+void test_vst4_p16(poly16_t * a, poly16x4x4_t b) {
+  vst4_p16(a, b);
+}
+
+
+// CHECK: test_vst4q_lane_u16
+// CHECK: vst4.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+void test_vst4q_lane_u16(uint16_t * a, uint16x8x4_t b) {
+  vst4q_lane_u16(a, b, 7);
+}
+
+// CHECK: test_vst4q_lane_u32
+// CHECK: vst4.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+void test_vst4q_lane_u32(uint32_t * a, uint32x4x4_t b) {
+  vst4q_lane_u32(a, b, 3);
+}
+
+// CHECK: test_vst4q_lane_s16
+// CHECK: vst4.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+void test_vst4q_lane_s16(int16_t * a, int16x8x4_t b) {
+  vst4q_lane_s16(a, b, 7);
+}
+
+// CHECK: test_vst4q_lane_s32
+// CHECK: vst4.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+void test_vst4q_lane_s32(int32_t * a, int32x4x4_t b) {
+  vst4q_lane_s32(a, b, 3);
+}
+
+// CHECK: test_vst4q_lane_f16
+// CHECK: vst4.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+void test_vst4q_lane_f16(float16_t * a, float16x8x4_t b) {
+  vst4q_lane_f16(a, b, 7);
+}
+
+// CHECK: test_vst4q_lane_f32
+// CHECK: vst4.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+void test_vst4q_lane_f32(float32_t * a, float32x4x4_t b) {
+  vst4q_lane_f32(a, b, 3);
+}
+
+// CHECK: test_vst4q_lane_p16
+// CHECK: vst4.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+void test_vst4q_lane_p16(poly16_t * a, poly16x8x4_t b) {
+  vst4q_lane_p16(a, b, 7);
+}
+
+// CHECK: test_vst4_lane_u8
+// CHECK: vst4.8 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst4_lane_u8(uint8_t * a, uint8x8x4_t b) {
+  vst4_lane_u8(a, b, 7);
+}
+
+// CHECK: test_vst4_lane_u16
+// CHECK: vst4.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst4_lane_u16(uint16_t * a, uint16x4x4_t b) {
+  vst4_lane_u16(a, b, 3);
+}
+
+// CHECK: test_vst4_lane_u32
+// CHECK: vst4.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst4_lane_u32(uint32_t * a, uint32x2x4_t b) {
+  vst4_lane_u32(a, b, 1);
+}
+
+// CHECK: test_vst4_lane_s8
+// CHECK: vst4.8 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst4_lane_s8(int8_t * a, int8x8x4_t b) {
+  vst4_lane_s8(a, b, 7);
+}
+
+// CHECK: test_vst4_lane_s16
+// CHECK: vst4.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst4_lane_s16(int16_t * a, int16x4x4_t b) {
+  vst4_lane_s16(a, b, 3);
+}
+
+// CHECK: test_vst4_lane_s32
+// CHECK: vst4.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst4_lane_s32(int32_t * a, int32x2x4_t b) {
+  vst4_lane_s32(a, b, 1);
+}
+
+// CHECK: test_vst4_lane_f16
+// CHECK: vst4.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst4_lane_f16(float16_t * a, float16x4x4_t b) {
+  vst4_lane_f16(a, b, 3);
+}
+
+// CHECK: test_vst4_lane_f32
+// CHECK: vst4.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst4_lane_f32(float32_t * a, float32x2x4_t b) {
+  vst4_lane_f32(a, b, 1);
+}
+
+// CHECK: test_vst4_lane_p8
+// CHECK: vst4.8 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst4_lane_p8(poly8_t * a, poly8x8x4_t b) {
+  vst4_lane_p8(a, b, 7);
+}
+
+// CHECK: test_vst4_lane_p16
+// CHECK: vst4.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+void test_vst4_lane_p16(poly16_t * a, poly16x4x4_t b) {
+  vst4_lane_p16(a, b, 3);
+}
+
+
+// CHECK: test_vsub_s8
+// CHECK: vsub.i8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int8x8_t test_vsub_s8(int8x8_t a, int8x8_t b) {
+  return vsub_s8(a, b);
+}
+
+// CHECK: test_vsub_s16
+// CHECK: vsub.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x4_t test_vsub_s16(int16x4_t a, int16x4_t b) {
+  return vsub_s16(a, b);
+}
+
+// CHECK: test_vsub_s32
+// CHECK: vsub.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x2_t test_vsub_s32(int32x2_t a, int32x2_t b) {
+  return vsub_s32(a, b);
+}
+
+// CHECK: test_vsub_s64
+// CHECK: vsub.i64 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int64x1_t test_vsub_s64(int64x1_t a, int64x1_t b) {
+  return vsub_s64(a, b);
+}
+
+// CHECK: test_vsub_f32
+// CHECK: vsub.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+float32x2_t test_vsub_f32(float32x2_t a, float32x2_t b) {
+  return vsub_f32(a, b);
+}
+
+// CHECK: test_vsub_u8
+// CHECK: vsub.i8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vsub_u8(uint8x8_t a, uint8x8_t b) {
+  return vsub_u8(a, b);
+}
+
+// CHECK: test_vsub_u16
+// CHECK: vsub.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vsub_u16(uint16x4_t a, uint16x4_t b) {
+  return vsub_u16(a, b);
+}
+
+// CHECK: test_vsub_u32
+// CHECK: vsub.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vsub_u32(uint32x2_t a, uint32x2_t b) {
+  return vsub_u32(a, b);
+}
+
+// CHECK: test_vsub_u64
+// CHECK: vsub.i64 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint64x1_t test_vsub_u64(uint64x1_t a, uint64x1_t b) {
+  return vsub_u64(a, b);
+}
+
+// CHECK: test_vsubq_s8
+// CHECK: vsub.i8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int8x16_t test_vsubq_s8(int8x16_t a, int8x16_t b) {
+  return vsubq_s8(a, b);
+}
+
+// CHECK: test_vsubq_s16
+// CHECK: vsub.i16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int16x8_t test_vsubq_s16(int16x8_t a, int16x8_t b) {
+  return vsubq_s16(a, b);
+}
+
+// CHECK: test_vsubq_s32
+// CHECK: vsub.i32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int32x4_t test_vsubq_s32(int32x4_t a, int32x4_t b) {
+  return vsubq_s32(a, b);
+}
+
+// CHECK: test_vsubq_s64
+// CHECK: vsub.i64 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int64x2_t test_vsubq_s64(int64x2_t a, int64x2_t b) {
+  return vsubq_s64(a, b);
+}
+
+// CHECK: test_vsubq_f32
+// CHECK: vsub.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+float32x4_t test_vsubq_f32(float32x4_t a, float32x4_t b) {
+  return vsubq_f32(a, b);
+}
+
+// CHECK: test_vsubq_u8
+// CHECK: vsub.i8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vsubq_u8(uint8x16_t a, uint8x16_t b) {
+  return vsubq_u8(a, b);
+}
+
+// CHECK: test_vsubq_u16
+// CHECK: vsub.i16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vsubq_u16(uint16x8_t a, uint16x8_t b) {
+  return vsubq_u16(a, b);
+}
+
+// CHECK: test_vsubq_u32
+// CHECK: vsub.i32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vsubq_u32(uint32x4_t a, uint32x4_t b) {
+  return vsubq_u32(a, b);
+}
+
+// CHECK: test_vsubq_u64
+// CHECK: vsub.i64 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint64x2_t test_vsubq_u64(uint64x2_t a, uint64x2_t b) {
+  return vsubq_u64(a, b);
+}
+
+
+// CHECK: test_vsubhn_s16
+// CHECK: vsubhn.i16 d{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) {
+  return vsubhn_s16(a, b);
+}
+
+// CHECK: test_vsubhn_s32
+// CHECK: vsubhn.i32 d{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) {
+  return vsubhn_s32(a, b);
+}
+
+// CHECK: test_vsubhn_s64
+// CHECK: vsubhn.i64 d{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) {
+  return vsubhn_s64(a, b);
+}
+
+// CHECK: test_vsubhn_u16
+// CHECK: vsubhn.i16 d{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) {
+  return vsubhn_u16(a, b);
+}
+
+// CHECK: test_vsubhn_u32
+// CHECK: vsubhn.i32 d{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) {
+  return vsubhn_u32(a, b);
+}
+
+// CHECK: test_vsubhn_u64
+// CHECK: vsubhn.i64 d{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) {
+  return vsubhn_u64(a, b);
+}
+
+
+// CHECK: test_vsubl_s8
+// CHECK: vsubl.s8 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int16x8_t test_vsubl_s8(int8x8_t a, int8x8_t b) {
+  return vsubl_s8(a, b);
+}
+
+// CHECK: test_vsubl_s16
+// CHECK: vsubl.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int32x4_t test_vsubl_s16(int16x4_t a, int16x4_t b) {
+  return vsubl_s16(a, b);
+}
+
+// CHECK: test_vsubl_s32
+// CHECK: vsubl.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+int64x2_t test_vsubl_s32(int32x2_t a, int32x2_t b) {
+  return vsubl_s32(a, b);
+}
+
+// CHECK: test_vsubl_u8
+// CHECK: vsubl.u8 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x8_t test_vsubl_u8(uint8x8_t a, uint8x8_t b) {
+  return vsubl_u8(a, b);
+}
+
+// CHECK: test_vsubl_u16
+// CHECK: vsubl.u16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x4_t test_vsubl_u16(uint16x4_t a, uint16x4_t b) {
+  return vsubl_u16(a, b);
+}
+
+// CHECK: test_vsubl_u32
+// CHECK: vsubl.u32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint64x2_t test_vsubl_u32(uint32x2_t a, uint32x2_t b) {
+  return vsubl_u32(a, b);
+}
+
+
+// CHECK: test_vsubw_s8
+// CHECK: vsubw.s8 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}
+int16x8_t test_vsubw_s8(int16x8_t a, int8x8_t b) {
+  return vsubw_s8(a, b);
+}
+
+// CHECK: test_vsubw_s16
+// CHECK: vsubw.s16 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}
+int32x4_t test_vsubw_s16(int32x4_t a, int16x4_t b) {
+  return vsubw_s16(a, b);
+}
+
+// CHECK: test_vsubw_s32
+// CHECK: vsubw.s32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}
+int64x2_t test_vsubw_s32(int64x2_t a, int32x2_t b) {
+  return vsubw_s32(a, b);
+}
+
+// CHECK: test_vsubw_u8
+// CHECK: vsubw.u8 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}
+uint16x8_t test_vsubw_u8(uint16x8_t a, uint8x8_t b) {
+  return vsubw_u8(a, b);
+}
+
+// CHECK: test_vsubw_u16
+// CHECK: vsubw.u16 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}
+uint32x4_t test_vsubw_u16(uint32x4_t a, uint16x4_t b) {
+  return vsubw_u16(a, b);
+}
+
+// CHECK: test_vsubw_u32
+// CHECK: vsubw.u32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}
+uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) {
+  return vsubw_u32(a, b);
+}
+
+
+// CHECK: test_vtbl1_u8
+// CHECK: vtbl.8 d{{[0-9]+}}, {d{{[0-9]+}}}, d{{[0-9]+}}
+uint8x8_t test_vtbl1_u8(uint8x8_t a, uint8x8_t b) {
+  return vtbl1_u8(a, b);
+}
+
+// CHECK: test_vtbl1_s8
+// CHECK: vtbl.8 d{{[0-9]+}}, {d{{[0-9]+}}}, d{{[0-9]+}}
+int8x8_t test_vtbl1_s8(int8x8_t a, int8x8_t b) {
+  return vtbl1_s8(a, b);
+}
+
+// CHECK: test_vtbl1_p8
+// CHECK: vtbl.8 d{{[0-9]+}}, {d{{[0-9]+}}}, d{{[0-9]+}}
+poly8x8_t test_vtbl1_p8(poly8x8_t a, uint8x8_t b) {
+  return vtbl1_p8(a, b);
+}
+
+
+// CHECK: test_vtbl2_u8
+// CHECK: vtbl.8 d{{[0-9]+}}, {d{{[0-9]+}}, d{{[0-9]+}}}, d{{[0-9]+}}
+uint8x8_t test_vtbl2_u8(uint8x8x2_t a, uint8x8_t b) {
+  return vtbl2_u8(a, b);
+}
+
+// CHECK: test_vtbl2_s8
+// CHECK: vtbl.8 d{{[0-9]+}}, {d{{[0-9]+}}, d{{[0-9]+}}}, d{{[0-9]+}}
+int8x8_t test_vtbl2_s8(int8x8x2_t a, int8x8_t b) {
+  return vtbl2_s8(a, b);
+}
+
+// CHECK: test_vtbl2_p8
+// CHECK: vtbl.8 d{{[0-9]+}}, {d{{[0-9]+}}, d{{[0-9]+}}}, d{{[0-9]+}}
+poly8x8_t test_vtbl2_p8(poly8x8x2_t a, uint8x8_t b) {
+  return vtbl2_p8(a, b);
+}
+
+
+// CHECK: test_vtbl3_u8
+// CHECK: vtbl.8 d{{[0-9]+}}, {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, d{{[0-9]+}}
+uint8x8_t test_vtbl3_u8(uint8x8x3_t a, uint8x8_t b) {
+  return vtbl3_u8(a, b);
+}
+
+// CHECK: test_vtbl3_s8
+// CHECK: vtbl.8 d{{[0-9]+}}, {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, d{{[0-9]+}}
+int8x8_t test_vtbl3_s8(int8x8x3_t a, int8x8_t b) {
+  return vtbl3_s8(a, b);
+}
+
+// CHECK: test_vtbl3_p8
+// CHECK: vtbl.8 d{{[0-9]+}}, {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, d{{[0-9]+}}
+poly8x8_t test_vtbl3_p8(poly8x8x3_t a, uint8x8_t b) {
+  return vtbl3_p8(a, b);
+}
+
+
+// CHECK: test_vtbl4_u8
+// CHECK: vtbl.8 d{{[0-9]+}}, {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, d{{[0-9]+}}
+uint8x8_t test_vtbl4_u8(uint8x8x4_t a, uint8x8_t b) {
+  return vtbl4_u8(a, b);
+}
+
+// CHECK: test_vtbl4_s8
+// CHECK: vtbl.8 d{{[0-9]+}}, {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, d{{[0-9]+}}
+int8x8_t test_vtbl4_s8(int8x8x4_t a, int8x8_t b) {
+  return vtbl4_s8(a, b);
+}
+
+// CHECK: test_vtbl4_p8
+// CHECK: vtbl.8 d{{[0-9]+}}, {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, d{{[0-9]+}}
+poly8x8_t test_vtbl4_p8(poly8x8x4_t a, uint8x8_t b) {
+  return vtbl4_p8(a, b);
+}
+
+
+// CHECK: test_vtbx1_u8
+// CHECK: vtbx.8 d{{[0-9]+}}, {d{{[0-9]+}}}, d{{[0-9]+}}
+uint8x8_t test_vtbx1_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
+  return vtbx1_u8(a, b, c);
+}
+
+// CHECK: test_vtbx1_s8
+// CHECK: vtbx.8 d{{[0-9]+}}, {d{{[0-9]+}}}, d{{[0-9]+}}
+int8x8_t test_vtbx1_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
+  return vtbx1_s8(a, b, c);
+}
+
+// CHECK: test_vtbx1_p8
+// CHECK: vtbx.8 d{{[0-9]+}}, {d{{[0-9]+}}}, d{{[0-9]+}}
+poly8x8_t test_vtbx1_p8(poly8x8_t a, poly8x8_t b, uint8x8_t c) {
+  return vtbx1_p8(a, b, c);
+}
+
+
+// CHECK: test_vtbx2_u8
+// CHECK: vtbx.8 d{{[0-9]+}}, {d{{[0-9]+}}, d{{[0-9]+}}}, d{{[0-9]+}}
+uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) {
+  return vtbx2_u8(a, b, c);
+}
+
+// CHECK: test_vtbx2_s8
+// CHECK: vtbx.8 d{{[0-9]+}}, {d{{[0-9]+}}, d{{[0-9]+}}}, d{{[0-9]+}}
+int8x8_t test_vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c) {
+  return vtbx2_s8(a, b, c);
+}
+
+// CHECK: test_vtbx2_p8
+// CHECK: vtbx.8 d{{[0-9]+}}, {d{{[0-9]+}}, d{{[0-9]+}}}, d{{[0-9]+}}
+poly8x8_t test_vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c) {
+  return vtbx2_p8(a, b, c);
+}
+
+
+// CHECK: test_vtbx3_u8
+// CHECK: vtbx.8 d{{[0-9]+}}, {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, d{{[0-9]+}}
+uint8x8_t test_vtbx3_u8(uint8x8_t a, uint8x8x3_t b, uint8x8_t c) {
+  return vtbx3_u8(a, b, c);
+}
+
+// CHECK: test_vtbx3_s8
+// CHECK: vtbx.8 d{{[0-9]+}}, {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, d{{[0-9]+}}
+int8x8_t test_vtbx3_s8(int8x8_t a, int8x8x3_t b, int8x8_t c) {
+  return vtbx3_s8(a, b, c);
+}
+
+// CHECK: test_vtbx3_p8
+// CHECK: vtbx.8 d{{[0-9]+}}, {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, d{{[0-9]+}}
+poly8x8_t test_vtbx3_p8(poly8x8_t a, poly8x8x3_t b, uint8x8_t c) {
+  return vtbx3_p8(a, b, c);
+}
+
+
+// CHECK: test_vtbx4_u8
+// CHECK: vtbx.8 d{{[0-9]+}}, {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, d{{[0-9]+}}
+uint8x8_t test_vtbx4_u8(uint8x8_t a, uint8x8x4_t b, uint8x8_t c) {
+  return vtbx4_u8(a, b, c);
+}
+
+// CHECK: test_vtbx4_s8
+// CHECK: vtbx.8 d{{[0-9]+}}, {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, d{{[0-9]+}}
+int8x8_t test_vtbx4_s8(int8x8_t a, int8x8x4_t b, int8x8_t c) {
+  return vtbx4_s8(a, b, c);
+}
+
+// CHECK: test_vtbx4_p8
+// CHECK: vtbx.8 d{{[0-9]+}}, {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, d{{[0-9]+}}
+poly8x8_t test_vtbx4_p8(poly8x8_t a, poly8x8x4_t b, uint8x8_t c) {
+  return vtbx4_p8(a, b, c);
+}
+
+
+// CHECK: test_vtrn_s8
+// CHECK: vtrn.8 d{{[0-9]+}}, d{{[0-9]+}}
+int8x8x2_t test_vtrn_s8(int8x8_t a, int8x8_t b) {
+  return vtrn_s8(a, b);
+}
+
+// CHECK: test_vtrn_s16
+// CHECK: vtrn.16 d{{[0-9]+}}, d{{[0-9]+}}
+int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) {
+  return vtrn_s16(a, b);
+}
+
+// CHECK: test_vtrn_s32
+// CHECK: vtrn.32 d{{[0-9]+}}, d{{[0-9]+}}
+int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) {
+  return vtrn_s32(a, b);
+}
+
+// CHECK: test_vtrn_u8
+// CHECK: vtrn.8 d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) {
+  return vtrn_u8(a, b);
+}
+
+// CHECK: test_vtrn_u16
+// CHECK: vtrn.16 d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) {
+  return vtrn_u16(a, b);
+}
+
+// CHECK: test_vtrn_u32
+// CHECK: vtrn.32 d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) {
+  return vtrn_u32(a, b);
+}
+
+// CHECK: test_vtrn_f32
+// CHECK: vtrn.32 d{{[0-9]+}}, d{{[0-9]+}}
+float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) {
+  return vtrn_f32(a, b);
+}
+
+// CHECK: test_vtrn_p8
+// CHECK: vtrn.8 d{{[0-9]+}}, d{{[0-9]+}}
+poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) {
+  return vtrn_p8(a, b);
+}
+
+// CHECK: test_vtrn_p16
+// CHECK: vtrn.16 d{{[0-9]+}}, d{{[0-9]+}}
+poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) {
+  return vtrn_p16(a, b);
+}
+
+// CHECK: test_vtrnq_s8
+// CHECK: vtrn.8 q{{[0-9]+}}, q{{[0-9]+}}
+int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) {
+  return vtrnq_s8(a, b);
+}
+
+// CHECK: test_vtrnq_s16
+// CHECK: vtrn.16 q{{[0-9]+}}, q{{[0-9]+}}
+int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) {
+  return vtrnq_s16(a, b);
+}
+
+// CHECK: test_vtrnq_s32
+// CHECK: vtrn.32 q{{[0-9]+}}, q{{[0-9]+}}
+int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) {
+  return vtrnq_s32(a, b);
+}
+
+// CHECK: test_vtrnq_u8
+// CHECK: vtrn.8 q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) {
+  return vtrnq_u8(a, b);
+}
+
+// CHECK: test_vtrnq_u16
+// CHECK: vtrn.16 q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) {
+  return vtrnq_u16(a, b);
+}
+
+// CHECK: test_vtrnq_u32
+// CHECK: vtrn.32 q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) {
+  return vtrnq_u32(a, b);
+}
+
+// CHECK: test_vtrnq_f32
+// CHECK: vtrn.32 q{{[0-9]+}}, q{{[0-9]+}}
+float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) {
+  return vtrnq_f32(a, b);
+}
+
+// CHECK: test_vtrnq_p8
+// CHECK: vtrn.8 q{{[0-9]+}}, q{{[0-9]+}}
+poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) {
+  return vtrnq_p8(a, b);
+}
+
+// CHECK: test_vtrnq_p16
+// CHECK: vtrn.16 q{{[0-9]+}}, q{{[0-9]+}}
+poly16x8x2_t test_vtrnq_p16(poly16x8_t a, poly16x8_t b) {
+  return vtrnq_p16(a, b);
+}
+
+
+// CHECK: test_vtst_s8
+// CHECK: vtst.8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vtst_s8(int8x8_t a, int8x8_t b) {
+  return vtst_s8(a, b);
+}
+
+// CHECK: test_vtst_s16
+// CHECK: vtst.16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vtst_s16(int16x4_t a, int16x4_t b) {
+  return vtst_s16(a, b);
+}
+
+// CHECK: test_vtst_s32
+// CHECK: vtst.32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vtst_s32(int32x2_t a, int32x2_t b) {
+  return vtst_s32(a, b);
+}
+
+// CHECK: test_vtst_u8
+// CHECK: vtst.8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vtst_u8(uint8x8_t a, uint8x8_t b) {
+  return vtst_u8(a, b);
+}
+
+// CHECK: test_vtst_u16
+// CHECK: vtst.16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vtst_u16(uint16x4_t a, uint16x4_t b) {
+  return vtst_u16(a, b);
+}
+
+// CHECK: test_vtst_u32
+// CHECK: vtst.32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2_t test_vtst_u32(uint32x2_t a, uint32x2_t b) {
+  return vtst_u32(a, b);
+}
+
+// CHECK: test_vtst_p8
+// CHECK: vtst.8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8_t test_vtst_p8(poly8x8_t a, poly8x8_t b) {
+  return vtst_p8(a, b);
+}
+
+// CHECK: test_vtst_p16
+// CHECK: vtst.16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4_t test_vtst_p16(poly16x4_t a, poly16x4_t b) {
+  return vtst_p16(a, b);
+}
+
+// CHECK: test_vtstq_s8
+// CHECK: vtst.8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vtstq_s8(int8x16_t a, int8x16_t b) {
+  return vtstq_s8(a, b);
+}
+
+// CHECK: test_vtstq_s16
+// CHECK: vtst.16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vtstq_s16(int16x8_t a, int16x8_t b) {
+  return vtstq_s16(a, b);
+}
+
+// CHECK: test_vtstq_s32
+// CHECK: vtst.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vtstq_s32(int32x4_t a, int32x4_t b) {
+  return vtstq_s32(a, b);
+}
+
+// CHECK: test_vtstq_u8
+// CHECK: vtst.8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vtstq_u8(uint8x16_t a, uint8x16_t b) {
+  return vtstq_u8(a, b);
+}
+
+// CHECK: test_vtstq_u16
+// CHECK: vtst.16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vtstq_u16(uint16x8_t a, uint16x8_t b) {
+  return vtstq_u16(a, b);
+}
+
+// CHECK: test_vtstq_u32
+// CHECK: vtst.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4_t test_vtstq_u32(uint32x4_t a, uint32x4_t b) {
+  return vtstq_u32(a, b);
+}
+
+// CHECK: test_vtstq_p8
+// CHECK: vtst.8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16_t test_vtstq_p8(poly8x16_t a, poly8x16_t b) {
+  return vtstq_p8(a, b);
+}
+
+// CHECK: test_vtstq_p16
+// CHECK: vtst.16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8_t test_vtstq_p16(poly16x8_t a, poly16x8_t b) {
+  return vtstq_p16(a, b);
+}
+
+
+// CHECK: test_vuzp_s8
+// CHECK: vuzp.8 d{{[0-9]+}}, d{{[0-9]+}}
+int8x8x2_t test_vuzp_s8(int8x8_t a, int8x8_t b) {
+  return vuzp_s8(a, b);
+}
+
+// CHECK: test_vuzp_s16
+// CHECK: vuzp.16 d{{[0-9]+}}, d{{[0-9]+}}
+int16x4x2_t test_vuzp_s16(int16x4_t a, int16x4_t b) {
+  return vuzp_s16(a, b);
+}
+
+// CHECK: test_vuzp_s32
+// CHECK: {{vtrn|vuzp}}.32 d{{[0-9]+}}, d{{[0-9]+}}
+int32x2x2_t test_vuzp_s32(int32x2_t a, int32x2_t b) {
+  return vuzp_s32(a, b);
+}
+
+// CHECK: test_vuzp_u8
+// CHECK: vuzp.8 d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8x2_t test_vuzp_u8(uint8x8_t a, uint8x8_t b) {
+  return vuzp_u8(a, b);
+}
+
+// CHECK: test_vuzp_u16
+// CHECK: vuzp.16 d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) {
+  return vuzp_u16(a, b);
+}
+
+// CHECK: test_vuzp_u32
+// CHECK: {{vtrn|vuzp}}.32 d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2x2_t test_vuzp_u32(uint32x2_t a, uint32x2_t b) {
+  return vuzp_u32(a, b);
+}
+
+// CHECK: test_vuzp_f32
+// CHECK: {{vtrn|vuzp}}.32 d{{[0-9]+}}, d{{[0-9]+}}
+float32x2x2_t test_vuzp_f32(float32x2_t a, float32x2_t b) {
+  return vuzp_f32(a, b);
+}
+
+// CHECK: test_vuzp_p8
+// CHECK: vuzp.8 d{{[0-9]+}}, d{{[0-9]+}}
+poly8x8x2_t test_vuzp_p8(poly8x8_t a, poly8x8_t b) {
+  return vuzp_p8(a, b);
+}
+
+// CHECK: test_vuzp_p16
+// CHECK: vuzp.16 d{{[0-9]+}}, d{{[0-9]+}}
+poly16x4x2_t test_vuzp_p16(poly16x4_t a, poly16x4_t b) {
+  return vuzp_p16(a, b);
+}
+
+// CHECK: test_vuzpq_s8
+// CHECK: vuzp.8 q{{[0-9]+}}, q{{[0-9]+}}
+int8x16x2_t test_vuzpq_s8(int8x16_t a, int8x16_t b) {
+  return vuzpq_s8(a, b);
+}
+
+// CHECK: test_vuzpq_s16
+// CHECK: vuzp.16 q{{[0-9]+}}, q{{[0-9]+}}
+int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) {
+  return vuzpq_s16(a, b);
+}
+
+// CHECK: test_vuzpq_s32
+// CHECK: {{vtrn|vuzp}}.32 q{{[0-9]+}}, q{{[0-9]+}}
+int32x4x2_t test_vuzpq_s32(int32x4_t a, int32x4_t b) {
+  return vuzpq_s32(a, b);
+}
+
+// CHECK: test_vuzpq_u8
+// CHECK: vuzp.8 q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) {
+  return vuzpq_u8(a, b);
+}
+
+// CHECK: test_vuzpq_u16
+// CHECK: vuzp.16 q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) {
+  return vuzpq_u16(a, b);
+}
+
+// CHECK: test_vuzpq_u32
+// CHECK: {{vtrn|vuzp}}.32 q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) {
+  return vuzpq_u32(a, b);
+}
+
+// CHECK: test_vuzpq_f32
+// CHECK: {{vtrn|vuzp}}.32 q{{[0-9]+}}, q{{[0-9]+}}
+float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) {
+  return vuzpq_f32(a, b);
+}
+
+// CHECK: test_vuzpq_p8
+// CHECK: vuzp.8 q{{[0-9]+}}, q{{[0-9]+}}
+poly8x16x2_t test_vuzpq_p8(poly8x16_t a, poly8x16_t b) {
+  return vuzpq_p8(a, b);
+}
+
+// CHECK: test_vuzpq_p16
+// CHECK: vuzp.16 q{{[0-9]+}}, q{{[0-9]+}}
+poly16x8x2_t test_vuzpq_p16(poly16x8_t a, poly16x8_t b) {
+  return vuzpq_p16(a, b);
+}
+
+
+// CHECK: test_vzip_s8
+// CHECK: vzip.8 d{{[0-9]+}}, d{{[0-9]+}}
+int8x8x2_t test_vzip_s8(int8x8_t a, int8x8_t b) {
+  return vzip_s8(a, b);
+}
+
+// CHECK: test_vzip_s16
+// CHECK: vzip.16 d{{[0-9]+}}, d{{[0-9]+}}
+int16x4x2_t test_vzip_s16(int16x4_t a, int16x4_t b) {
+  return vzip_s16(a, b);
+}
+
+// CHECK: test_vzip_s32
+// CHECK: {{vtrn|vzip}}.32 d{{[0-9]+}}, d{{[0-9]+}}
+int32x2x2_t test_vzip_s32(int32x2_t a, int32x2_t b) {
+  return vzip_s32(a, b);
+}
+
+// CHECK: test_vzip_u8
+// CHECK: vzip.8 d{{[0-9]+}}, d{{[0-9]+}}
+uint8x8x2_t test_vzip_u8(uint8x8_t a, uint8x8_t b) {
+  return vzip_u8(a, b);
+}
+
+// CHECK: test_vzip_u16
+// CHECK: vzip.16 d{{[0-9]+}}, d{{[0-9]+}}
+uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) {
+  return vzip_u16(a, b);
+}
+
+// CHECK: test_vzip_u32
+// CHECK: {{vtrn|vzip}}.32 d{{[0-9]+}}, d{{[0-9]+}}
+uint32x2x2_t test_vzip_u32(uint32x2_t a, uint32x2_t b) {
+  return vzip_u32(a, b);
+}
+
+// CHECK: test_vzip_f32
+// CHECK: {{vtrn|vzip}}.32 d{{[0-9]+}}, d{{[0-9]+}}
+float32x2x2_t test_vzip_f32(float32x2_t a, float32x2_t b) {
+  return vzip_f32(a, b);
+}
+
+// CHECK: test_vzip_p8
+// CHECK: vzip.8 d{{[0-9]+}}, d{{[0-9]+}}
+poly8x8x2_t test_vzip_p8(poly8x8_t a, poly8x8_t b) {
+  return vzip_p8(a, b);
+}
+
+// CHECK: test_vzip_p16
+// CHECK: vzip.16 d{{[0-9]+}}, d{{[0-9]+}}
+poly16x4x2_t test_vzip_p16(poly16x4_t a, poly16x4_t b) {
+  return vzip_p16(a, b);
+}
+
+// CHECK: test_vzipq_s8
+// CHECK: vzip.8 q{{[0-9]+}}, q{{[0-9]+}}
+int8x16x2_t test_vzipq_s8(int8x16_t a, int8x16_t b) {
+  return vzipq_s8(a, b);
+}
+
+// CHECK: test_vzipq_s16
+// CHECK: vzip.16 q{{[0-9]+}}, q{{[0-9]+}}
+int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) {
+  return vzipq_s16(a, b);
+}
+
+// CHECK: test_vzipq_s32
+// CHECK: {{vtrn|vzip}}.32 q{{[0-9]+}}, q{{[0-9]+}}
+int32x4x2_t test_vzipq_s32(int32x4_t a, int32x4_t b) {
+  return vzipq_s32(a, b);
+}
+
+// CHECK: test_vzipq_u8
+// CHECK: vzip.8 q{{[0-9]+}}, q{{[0-9]+}}
+uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) {
+  return vzipq_u8(a, b);
+}
+
+// CHECK: test_vzipq_u16
+// CHECK: vzip.16 q{{[0-9]+}}, q{{[0-9]+}}
+uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) {
+  return vzipq_u16(a, b);
+}
+
+// CHECK: test_vzipq_u32
+// CHECK: {{vtrn|vzip}}.32 q{{[0-9]+}}, q{{[0-9]+}}
+uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) {
+  return vzipq_u32(a, b);
+}
+
+// CHECK: test_vzipq_f32
+// CHECK: {{vtrn|vzip}}.32 q{{[0-9]+}}, q{{[0-9]+}}
+float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) {
+  return vzipq_f32(a, b);
+}
+
+// CHECK: test_vzipq_p8
+// CHECK: vzip.8 q{{[0-9]+}}, q{{[0-9]+}}
+poly8x16x2_t test_vzipq_p8(poly8x16_t a, poly8x16_t b) {
+  return vzipq_p8(a, b);
+}
+
+// CHECK: test_vzipq_p16
+// CHECK: vzip.16 q{{[0-9]+}}, q{{[0-9]+}}
+poly16x8x2_t test_vzipq_p16(poly16x8_t a, poly16x8_t b) {
+  return vzipq_p16(a, b);
+}
+
+
diff --git a/test/CodeGen/asm-label.c b/test/CodeGen/asm-label.c
index c06f11f..f944d36 100644
--- a/test/CodeGen/asm-label.c
+++ b/test/CodeGen/asm-label.c
@@ -17,3 +17,15 @@ int *test(void) {
 // DARWIN: @"\01bar" = internal global i32 0
 // DARWIN: @"\01foo" = common global i32 0
 // DARWIN: declare i8* @"\01alias"(i32)
+
+// PR7887
+int pr7887_1 asm("");
+extern int pr7887_2 asm("");
+int pr7887_3 () asm("");
+
+int pt7887_4 () {
+  static int y asm("");
+  y = pr7887_3();
+  pr7887_2 = 1;
+  return pr7887_1;
+}
diff --git a/test/CodeGen/assign.c b/test/CodeGen/assign.c
index fc00896..b2702f0 100644
--- a/test/CodeGen/assign.c
+++ b/test/CodeGen/assign.c
@@ -2,7 +2,7 @@
 
 // Check that we don't generate unnecessary reloads.
 //
-// CHECK: define void @f0()
+// CHECK-LABEL: define void @f0()
 // CHECK:      [[x_0:%.*]] = alloca i32, align 4
 // CHECK-NEXT: [[y_0:%.*]] = alloca i32, align 4
 // CHECK-NEXT: store i32 1, i32* [[x_0]]
@@ -18,7 +18,7 @@ void f0() {
 // This used to test that we generate reloads for volatile access,
 // but that does not appear to be correct behavior for C.
 //
-// CHECK: define void @f1()
+// CHECK-LABEL: define void @f1()
 // CHECK:      [[x_1:%.*]] = alloca i32, align 4
 // CHECK-NEXT: [[y_1:%.*]] = alloca i32, align 4
 // CHECK-NEXT: store volatile i32 1, i32* [[x_1]]
diff --git a/test/CodeGen/atomic-ops.c b/test/CodeGen/atomic-ops.c
index d79f405..830f21a 100644
--- a/test/CodeGen/atomic-ops.c
+++ b/test/CodeGen/atomic-ops.c
@@ -246,9 +246,6 @@ _Atomic(struct foo) bigAtomic;
 void structAtomicStore() {
   // CHECK: @structAtomicStore
   struct foo f = {0};
-  __c11_atomic_store(&bigAtomic, f, 5);
-  // CHECK: call void @__atomic_store(i32 512, i8* bitcast ({{.*}} @bigAtomic to i8*),
-
   struct bar b = {0};
   __atomic_store(&smallThing, &b, 5);
   // CHECK: call void @__atomic_store(i32 3, i8* {{.*}} @smallThing
@@ -258,13 +255,11 @@ void structAtomicStore() {
 }
 void structAtomicLoad() {
   // CHECK: @structAtomicLoad
-  struct foo f = __c11_atomic_load(&bigAtomic, 5);
-  // CHECK: call void @__atomic_load(i32 512, i8* bitcast ({{.*}} @bigAtomic to i8*),
-
   struct bar b;
   __atomic_load(&smallThing, &b, 5);
   // CHECK: call void @__atomic_load(i32 3, i8* {{.*}} @smallThing
 
+  struct foo f = {0};
   __atomic_load(&bigThing, &f, 5);
   // CHECK: call void @__atomic_load(i32 512, i8* {{.*}} @bigThing
 }
diff --git a/test/CodeGen/atomics-inlining.c b/test/CodeGen/atomics-inlining.c
index 9b0d413..6456e74 100644
--- a/test/CodeGen/atomics-inlining.c
+++ b/test/CodeGen/atomics-inlining.c
@@ -1,3 +1,4 @@
+// RUN: %clang_cc1 -triple arm-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefix=ARM
 // RUN: %clang_cc1 -triple powerpc-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefix=PPC32
 // RUN: %clang_cc1 -triple powerpc64-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefix=PPC64
 // RUN: %clang_cc1 -triple mipsel-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefix=MIPS32
@@ -7,6 +8,7 @@ unsigned char c1, c2;
 unsigned short s1, s2;
 unsigned int i1, i2;
 unsigned long long ll1, ll2;
+unsigned char a1[100], a2[100];
 
 enum memory_order {
   memory_order_relaxed,
@@ -19,31 +21,73 @@ enum memory_order {
 
 void test1(void) {
   (void)__atomic_load(&c1, &c2, memory_order_seq_cst);
+  (void)__atomic_store(&c1, &c2, memory_order_seq_cst);
   (void)__atomic_load(&s1, &s2, memory_order_seq_cst);
+  (void)__atomic_store(&s1, &s2, memory_order_seq_cst);
   (void)__atomic_load(&i1, &i2, memory_order_seq_cst);
+  (void)__atomic_store(&i1, &i2, memory_order_seq_cst);
   (void)__atomic_load(&ll1, &ll2, memory_order_seq_cst);
+  (void)__atomic_store(&ll1, &ll2, memory_order_seq_cst);
+  (void)__atomic_load(&a1, &a2, memory_order_seq_cst);
+  (void)__atomic_store(&a1, &a2, memory_order_seq_cst);
 
-// PPC32: define void @test1
-// PPC32: load atomic i8* @c1 seq_cst
-// PPC32: load atomic i16* @s1 seq_cst
-// PPC32: load atomic i32* @i1 seq_cst
-// PPC32: call void @__atomic_load(i32 8, i8* bitcast (i64* @ll1 to i8*)
-
-// PPC64: define void @test1
-// PPC64: load atomic i8* @c1 seq_cst
-// PPC64: load atomic i16* @s1 seq_cst
-// PPC64: load atomic i32* @i1 seq_cst
-// PPC64: load atomic i64* @ll1 seq_cst
-
-// MIPS32: define void @test1
-// MIPS32: load atomic i8* @c1 seq_cst
-// MIPS32: load atomic i16* @s1 seq_cst
-// MIPS32: load atomic i32* @i1 seq_cst
-// MIPS32: call void @__atomic_load(i32 8, i8* bitcast (i64* @ll1 to i8*)
-
-// MIPS64: define void @test1
-// MIPS64: load atomic i8* @c1 seq_cst
-// MIPS64: load atomic i16* @s1 seq_cst
-// MIPS64: load atomic i32* @i1 seq_cst
-// MIPS64: load atomic i64* @ll1 seq_cst
+// ARM-LABEL: define arm_aapcscc void @test1
+// ARM: = call arm_aapcscc zeroext i8 @__atomic_load_1(i8* @c1
+// ARM: call arm_aapcscc void @__atomic_store_1(i8* @c1, i8 zeroext
+// ARM: = call arm_aapcscc zeroext i16 @__atomic_load_2(i8* bitcast (i16* @s1 to i8*)
+// ARM: call arm_aapcscc void @__atomic_store_2(i8* bitcast (i16* @s1 to i8*), i16 zeroext
+// ARM: = call arm_aapcscc i32 @__atomic_load_4(i8* bitcast (i32* @i1 to i8*)
+// ARM: call arm_aapcscc void @__atomic_store_4(i8* bitcast (i32* @i1 to i8*), i32
+// ARM: = call arm_aapcscc i64 @__atomic_load_8(i8* bitcast (i64* @ll1 to i8*)
+// ARM: call arm_aapcscc void @__atomic_store_8(i8* bitcast (i64* @ll1 to i8*), i64
+// ARM: call arm_aapcscc void @__atomic_load(i32 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8]* @a2, i32 0, i32 0)
+// ARM: call arm_aapcscc void @__atomic_store(i32 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8]* @a2, i32 0, i32 0)
+
+// PPC32-LABEL: define void @test1
+// PPC32: = load atomic i8* @c1 seq_cst
+// PPC32: store atomic i8 {{.*}}, i8* @c1 seq_cst
+// PPC32: = load atomic i16* @s1 seq_cst
+// PPC32: store atomic i16 {{.*}}, i16* @s1 seq_cst
+// PPC32: = load atomic i32* @i1 seq_cst
+// PPC32: store atomic i32 {{.*}}, i32* @i1 seq_cst
+// PPC32: = call i64 @__atomic_load_8(i8* bitcast (i64* @ll1 to i8*)
+// PPC32: call void @__atomic_store_8(i8* bitcast (i64* @ll1 to i8*), i64
+// PPC32: call void @__atomic_load(i32 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8]* @a2, i32 0, i32 0)
+// PPC32: call void @__atomic_store(i32 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8]* @a2, i32 0, i32 0)
+
+// PPC64-LABEL: define void @test1
+// PPC64: = load atomic i8* @c1 seq_cst
+// PPC64: store atomic i8 {{.*}}, i8* @c1 seq_cst
+// PPC64: = load atomic i16* @s1 seq_cst
+// PPC64: store atomic i16 {{.*}}, i16* @s1 seq_cst
+// PPC64: = load atomic i32* @i1 seq_cst
+// PPC64: store atomic i32 {{.*}}, i32* @i1 seq_cst
+// PPC64: = load atomic i64* @ll1 seq_cst
+// PPC64: store atomic i64 {{.*}}, i64* @ll1 seq_cst
+// PPC64: call void @__atomic_load(i64 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8]* @a2, i32 0, i32 0)
+// PPC64: call void @__atomic_store(i64 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8]* @a2, i32 0, i32 0)
+
+// MIPS32-LABEL: define void @test1
+// MIPS32: = load atomic i8* @c1 seq_cst
+// MIPS32: store atomic i8 {{.*}}, i8* @c1 seq_cst
+// MIPS32: = load atomic i16* @s1 seq_cst
+// MIPS32: store atomic i16 {{.*}}, i16* @s1 seq_cst
+// MIPS32: = load atomic i32* @i1 seq_cst
+// MIPS32: store atomic i32 {{.*}}, i32* @i1 seq_cst
+// MIPS32: call i64 @__atomic_load_8(i8* bitcast (i64* @ll1 to i8*)
+// MIPS32: call void @__atomic_store_8(i8* bitcast (i64* @ll1 to i8*), i64
+// MIPS32: call void @__atomic_load(i32 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8]* @a2, i32 0, i32 0)
+// MIPS32: call void @__atomic_store(i32 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8]* @a2, i32 0, i32 0)
+
+// MIPS64-LABEL: define void @test1
+// MIPS64: = load atomic i8* @c1 seq_cst
+// MIPS64: store atomic i8 {{.*}}, i8* @c1 seq_cst
+// MIPS64: = load atomic i16* @s1 seq_cst
+// MIPS64: store atomic i16 {{.*}}, i16* @s1 seq_cst
+// MIPS64: = load atomic i32* @i1 seq_cst
+// MIPS64: store atomic i32 {{.*}}, i32* @i1 seq_cst
+// MIPS64: = load atomic i64* @ll1 seq_cst
+// MIPS64: store atomic i64 {{.*}}, i64* @ll1 seq_cst
+// MIPS64: call void @__atomic_load(i64 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0)
+// MIPS64: call void @__atomic_store(i64 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8]* @a2, i32 0, i32 0)
 }
diff --git a/test/CodeGen/attr-availability.c b/test/CodeGen/attr-availability.c
index 6f9c045..ccbbb62 100644
--- a/test/CodeGen/attr-availability.c
+++ b/test/CodeGen/attr-availability.c
@@ -2,15 +2,15 @@
 // RUN: %clang_cc1 -fvisibility hidden "-triple" "x86_64-apple-darwin9.0.0" -emit-llvm -o - %s | FileCheck -check-prefix=CHECK-10_5 %s
 // RUN: %clang_cc1 -fvisibility hidden "-triple" "x86_64-apple-darwin10.0.0" -emit-llvm -o - %s | FileCheck -check-prefix=CHECK-10_6 %s
 
-// CHECK-10_4: define hidden void @f2
-// CHECK-10_5: define hidden void @f2
-// CHECK-10_6: define hidden void @f2
+// CHECK-10_4-LABEL: define hidden void @f2
+// CHECK-10_5-LABEL: define hidden void @f2
+// CHECK-10_6-LABEL: define hidden void @f2
 void f2();
 void f2() { }
 
-// CHECK-10_4: define void @f3
-// CHECK-10_5: define void @f3
-// CHECK-10_6: define void @f3
+// CHECK-10_4-LABEL: define void @f3
+// CHECK-10_5-LABEL: define void @f3
+// CHECK-10_6-LABEL: define void @f3
 void f3() __attribute__((availability(macosx,introduced=10.5)));
 void f3() { }
 
diff --git a/test/CodeGen/attr-coldhot.c b/test/CodeGen/attr-coldhot.c
index a277119..ec54edd 100644
--- a/test/CodeGen/attr-coldhot.c
+++ b/test/CodeGen/attr-coldhot.c
@@ -8,4 +8,4 @@ int test1() __attribute__((__cold__)) {
 // CHECK: ret
 }
 
-// CHECK: attributes [[ATTR]] = { {{.*}}optsize{{.*}} }
+// CHECK: attributes [[ATTR]] = { {{.*}}cold{{.*}}optsize{{.*}} }
diff --git a/test/CodeGen/attr-minsize.cpp b/test/CodeGen/attr-minsize.cpp
index 997194d..0f07725 100644
--- a/test/CodeGen/attr-minsize.cpp
+++ b/test/CodeGen/attr-minsize.cpp
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -Oz -emit-llvm %s -o - | FileCheck %s -check-prefix=Oz
-// RUN: %clang_cc1 -O0 -emit-llvm %s -o - | FileCheck %s -check-prefix=OTHER
+// RUN: %clang_cc1     -emit-llvm %s -o - | FileCheck %s -check-prefix=OTHER
 // RUN: %clang_cc1 -O1 -emit-llvm %s -o - | FileCheck %s -check-prefix=OTHER
 // RUN: %clang_cc1 -O2 -emit-llvm %s -o - | FileCheck %s -check-prefix=OTHER
 // RUN: %clang_cc1 -O3 -emit-llvm %s -o - | FileCheck %s -check-prefix=OTHER
diff --git a/test/CodeGen/attr-weakref.c b/test/CodeGen/attr-weakref.c
index 560d391..248860d 100644
--- a/test/CodeGen/attr-weakref.c
+++ b/test/CodeGen/attr-weakref.c
@@ -8,7 +8,7 @@ void test1_h(void) {
   test1_g();
 }
 
-// CHECK: define void @test2_f()
+// CHECK-LABEL: define void @test2_f()
 void test2_f(void) {}
 static void test2_g(void) __attribute__((weakref("test2_f")));
 void test2_h(void) {
@@ -25,7 +25,7 @@ void test3_h(void) {
   test3_g();
 }
 
-// CHECK: define void @test4_f()
+// CHECK-LABEL: define void @test4_f()
 void test4_f(void);
 static void test4_g(void) __attribute__((weakref("test4_f")));
 void test4_h(void) {
diff --git a/test/CodeGen/available-externally-suppress.c b/test/CodeGen/available-externally-suppress.c
index 46b6e74..390d201 100644
--- a/test/CodeGen/available-externally-suppress.c
+++ b/test/CodeGen/available-externally-suppress.c
@@ -1,11 +1,11 @@
-// RUN: %clang_cc1 -emit-llvm -o - -O0 -triple x86_64-apple-darwin10 %s | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -o - -triple x86_64-apple-darwin10 %s | FileCheck %s
 
 // Ensure that we don't emit available_externally functions at -O0.
 int x;
 
 inline void f0(int y) { x = y; }
 
-// CHECK: define void @test()
+// CHECK-LABEL: define void @test()
 // CHECK: declare void @f0(i32)
 void test() {
   f0(17);
diff --git a/test/CodeGen/avx-builtins.c b/test/CodeGen/avx-builtins.c
index 0e5a741..c88946f 100644
--- a/test/CodeGen/avx-builtins.c
+++ b/test/CodeGen/avx-builtins.c
@@ -93,3 +93,21 @@ int test_mm_cmpistrz(__m128i A, __m128i B) {
   // CHECK: @llvm.x86.sse42.pcmpistriz128
   return _mm_cmpistrz(A, B, 7);
 }
+
+int test_extract_epi32(__m256i __a) {
+  // CHECK-LABEL: @test_extract_epi32
+  // CHECK: extractelement <8 x i32> %{{.*}}, i32 0
+  return _mm256_extract_epi32(__a, 8);
+}
+
+int test_extract_epi16(__m256i __a) {
+  // CHECK-LABEL: @test_extract_epi16
+  // CHECK: extractelement <16 x i16> %{{.*}}, i32 0
+  return _mm256_extract_epi16(__a, 16);
+}
+
+int test_extract_epi8(__m256i __a) {
+  // CHECK-LABEL: @test_extract_epi8
+  // CHECK: extractelement <32 x i8> %{{.*}}, i32 0
+  return _mm256_extract_epi8(__a, 32);
+}
diff --git a/test/CodeGen/avx-cmp-builtins.c b/test/CodeGen/avx-cmp-builtins.c
index 1ac1c31..5b205d7 100644
--- a/test/CodeGen/avx-cmp-builtins.c
+++ b/test/CodeGen/avx-cmp-builtins.c
@@ -44,3 +44,51 @@ __m128d test_cmp_ss(__m128 a, __m128 b) {
   // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 13)
   return _mm_cmp_ss(a, b, _CMP_GE_OS);
 }
+
+__m128 test_cmpgt_ss(__m128 a, __m128 b) {
+  // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 1)
+  // CHECK: shufflevector <{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+  return _mm_cmpgt_ss(a, b);
+}
+
+__m128 test_cmpge_ss(__m128 a, __m128 b) {
+  // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 2)
+  // CHECK: shufflevector <{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+  return _mm_cmpge_ss(a, b);
+}
+
+__m128 test_cmpngt_ss(__m128 a, __m128 b) {
+  // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 5)
+  // CHECK: shufflevector <{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+  return _mm_cmpngt_ss(a, b);
+}
+
+__m128 test_cmpnge_ss(__m128 a, __m128 b) {
+  // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 6)
+  // CHECK: shufflevector <{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+  return _mm_cmpnge_ss(a, b);
+}
+
+__m128d test_cmpgt_sd(__m128d a, __m128d b) {
+  // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 1)
+  // CHECK: shufflevector <{{.*}}, <2 x i32> <i32 0, i32 3>
+  return _mm_cmpgt_sd(a, b);
+}
+
+__m128d test_cmpge_sd(__m128d a, __m128d b) {
+  // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 2)
+  // CHECK: shufflevector <{{.*}}, <2 x i32> <i32 0, i32 3>
+  return _mm_cmpge_sd(a, b);
+}
+
+__m128d test_cmpngt_sd(__m128d a, __m128d b) {
+  // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 5)
+  // CHECK: shufflevector <{{.*}}, <2 x i32> <i32 0, i32 3>
+  return _mm_cmpngt_sd(a, b);
+}
+
+__m128d test_cmpnge_sd(__m128d a, __m128d b) {
+  // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 6)
+  // CHECK: shufflevector <{{.*}}, <2 x i32> <i32 0, i32 3>
+  return _mm_cmpnge_sd(a, b);
+}
diff --git a/test/CodeGen/avx2-builtins.c b/test/CodeGen/avx2-builtins.c
index b5bc605..5024d94 100644
--- a/test/CodeGen/avx2-builtins.c
+++ b/test/CodeGen/avx2-builtins.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - -Werror | FileCheck %s
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
@@ -606,9 +606,9 @@ __m256d test_mm256_broadcastsd_pd(__m128d a) {
   return _mm256_broadcastsd_pd(a);
 }
 
-__m256i test_mm_broadcastsi128_si256(__m128i *a) {
+__m256i test_mm256_broadcastsi128_si256(__m128i a) {
   // CHECK: @llvm.x86.avx2.vbroadcasti128
-  return _mm_broadcastsi128_si256(a);
+  return _mm256_broadcastsi128_si256(a);
 }
 
 __m128i test_mm_blend_epi32(__m128i a, __m128i b) {
@@ -850,22 +850,22 @@ __m128i test_mm256_mask_i64gather_epi32(__m128i a, int const *b, __m256i c,
   return _mm256_mask_i64gather_epi32(a, b, c, d, 2);
 }
 
-__m128i test_mm_mask_i32gather_epi64(__m128i a, int const *b, __m128i c,
+__m128i test_mm_mask_i32gather_epi64(__m128i a, long long const *b, __m128i c,
                                      __m128i d) {
   // CHECK: @llvm.x86.avx2.gather.d.q
   return _mm_mask_i32gather_epi64(a, b, c, d, 2);
 }
-__m256i test_mm256_mask_i32gather_epi64(__m256i a, int const *b, __m128i c,
+__m256i test_mm256_mask_i32gather_epi64(__m256i a, long long const *b, __m128i c,
                                         __m256i d) {
   // CHECK: @llvm.x86.avx2.gather.d.q.256
   return _mm256_mask_i32gather_epi64(a, b, c, d, 2);
 }
-__m128i test_mm_mask_i64gather_epi64(__m128i a, int const *b, __m128i c,
+__m128i test_mm_mask_i64gather_epi64(__m128i a, long long const *b, __m128i c,
                                      __m128i d) {
   // CHECK: @llvm.x86.avx2.gather.q.q
   return _mm_mask_i64gather_epi64(a, b, c, d, 2);
 }
-__m256i test_mm256_mask_i64gather_epi64(__m256i a, int const *b, __m256i c,
+__m256i test_mm256_mask_i64gather_epi64(__m256i a, long long const *b, __m256i c,
                                         __m256i d) {
   // CHECK: @llvm.x86.avx2.gather.q.q.256
   return _mm256_mask_i64gather_epi64(a, b, c, d, 2);
@@ -920,19 +920,19 @@ __m128i test_mm256_i64gather_epi32(int const *b, __m256i c) {
   // CHECK: @llvm.x86.avx2.gather.q.d.256
   return _mm256_i64gather_epi32(b, c, 2);
 }
-__m128i test_mm_i32gather_epi64(int const *b, __m128i c) {
+__m128i test_mm_i32gather_epi64(long long const *b, __m128i c) {
   // CHECK: @llvm.x86.avx2.gather.d.q
   return _mm_i32gather_epi64(b, c, 2);
 }
-__m256i test_mm256_i32gather_epi64(int const *b, __m128i c) {
+__m256i test_mm256_i32gather_epi64(long long const *b, __m128i c) {
   // CHECK: @llvm.x86.avx2.gather.d.q.256
   return _mm256_i32gather_epi64(b, c, 2);
 }
-__m128i test_mm_i64gather_epi64(int const *b, __m128i c) {
+__m128i test_mm_i64gather_epi64(long long const *b, __m128i c) {
   // CHECK: @llvm.x86.avx2.gather.q.q
   return _mm_i64gather_epi64(b, c, 2);
 }
-__m256i test_mm256_i64gather_epi64(int const *b, __m256i c) {
+__m256i test_mm256_i64gather_epi64(long long const *b, __m256i c) {
   // CHECK: @llvm.x86.avx2.gather.q.q.256
   return _mm256_i64gather_epi64(b, c, 2);
 }
diff --git a/test/CodeGen/big-atomic-ops.c b/test/CodeGen/big-atomic-ops.c
new file mode 100644
index 0000000..b09aede
--- /dev/null
+++ b/test/CodeGen/big-atomic-ops.c
@@ -0,0 +1,323 @@
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple=x86_64-apple-macosx10.9.0 | FileCheck %s
+
+// Also test serialization of atomic operations here, to avoid duplicating the
+// test.
+// RUN: %clang_cc1 %s -emit-pch -o %t -triple=x86_64-apple-macosx10.9.0
+// RUN: %clang_cc1 %s -include-pch %t -triple=x86_64-apple-macosx10.9.0 -emit-llvm -o - | FileCheck %s
+#ifndef ALREADY_INCLUDED
+#define ALREADY_INCLUDED
+
+// Basic IRGen tests for __c11_atomic_* and GNU __atomic_*
+
+typedef enum memory_order {
+  memory_order_relaxed, memory_order_consume, memory_order_acquire,
+  memory_order_release, memory_order_acq_rel, memory_order_seq_cst
+} memory_order;
+
+int fi1(_Atomic(int) *i) {
+  // CHECK: @fi1
+  // CHECK: load atomic i32* {{.*}} seq_cst
+  return __c11_atomic_load(i, memory_order_seq_cst);
+}
+
+int fi1a(int *i) {
+  // CHECK: @fi1a
+  // CHECK: load atomic i32* {{.*}} seq_cst
+  int v;
+  __atomic_load(i, &v, memory_order_seq_cst);
+  return v;
+}
+
+int fi1b(int *i) {
+  // CHECK: @fi1b
+  // CHECK: load atomic i32* {{.*}} seq_cst
+  return __atomic_load_n(i, memory_order_seq_cst);
+}
+
+void fi2(_Atomic(int) *i) {
+  // CHECK: @fi2
+  // CHECK: store atomic i32 {{.*}} seq_cst
+  __c11_atomic_store(i, 1, memory_order_seq_cst);
+}
+
+void fi2a(int *i) {
+  // CHECK: @fi2a
+  // CHECK: store atomic i32 {{.*}} seq_cst
+  int v = 1;
+  __atomic_store(i, &v, memory_order_seq_cst);
+}
+
+void fi2b(int *i) {
+  // CHECK: @fi2b
+  // CHECK: store atomic i32 {{.*}} seq_cst
+  __atomic_store_n(i, 1, memory_order_seq_cst);
+}
+
+int fi3(_Atomic(int) *i) {
+  // CHECK: @fi3
+  // CHECK: atomicrmw and
+  // CHECK-NOT: and
+  return __c11_atomic_fetch_and(i, 1, memory_order_seq_cst);
+}
+
+int fi3a(int *i) {
+  // CHECK: @fi3a
+  // CHECK: atomicrmw xor
+  // CHECK-NOT: xor
+  return __atomic_fetch_xor(i, 1, memory_order_seq_cst);
+}
+
+int fi3b(int *i) {
+  // CHECK: @fi3b
+  // CHECK: atomicrmw add
+  // CHECK: add
+  return __atomic_add_fetch(i, 1, memory_order_seq_cst);
+}
+
+int fi3c(int *i) {
+  // CHECK: @fi3c
+  // CHECK: atomicrmw nand
+  // CHECK-NOT: and
+  return __atomic_fetch_nand(i, 1, memory_order_seq_cst);
+}
+
+int fi3d(int *i) {
+  // CHECK: @fi3d
+  // CHECK: atomicrmw nand
+  // CHECK: and
+  // CHECK: xor
+  return __atomic_nand_fetch(i, 1, memory_order_seq_cst);
+}
+
+_Bool fi4(_Atomic(int) *i) {
+  // CHECK: @fi4
+  // CHECK: cmpxchg i32*
+  int cmp = 0;
+  return __c11_atomic_compare_exchange_strong(i, &cmp, 1, memory_order_acquire, memory_order_acquire);
+}
+
+_Bool fi4a(int *i) {
+  // CHECK: @fi4
+  // CHECK: cmpxchg i32*
+  int cmp = 0;
+  int desired = 1;
+  return __atomic_compare_exchange(i, &cmp, &desired, 0, memory_order_acquire, memory_order_acquire);
+}
+
+_Bool fi4b(int *i) {
+  // CHECK: @fi4
+  // CHECK: cmpxchg i32*
+  int cmp = 0;
+  return __atomic_compare_exchange_n(i, &cmp, 1, 1, memory_order_acquire, memory_order_acquire);
+}
+
+float ff1(_Atomic(float) *d) {
+  // CHECK: @ff1
+  // CHECK: load atomic i32* {{.*}} monotonic
+  return __c11_atomic_load(d, memory_order_relaxed);
+}
+
+void ff2(_Atomic(float) *d) {
+  // CHECK: @ff2
+  // CHECK: store atomic i32 {{.*}} release
+  __c11_atomic_store(d, 1, memory_order_release);
+}
+
+float ff3(_Atomic(float) *d) {
+  return __c11_atomic_exchange(d, 2, memory_order_seq_cst);
+}
+
+int* fp1(_Atomic(int*) *p) {
+  // CHECK: @fp1
+  // CHECK: load atomic i64* {{.*}} seq_cst
+  return __c11_atomic_load(p, memory_order_seq_cst);
+}
+
+int* fp2(_Atomic(int*) *p) {
+  // CHECK: @fp2
+  // CHECK: store i64 4
+  // CHECK: atomicrmw add {{.*}} monotonic
+  return __c11_atomic_fetch_add(p, 1, memory_order_relaxed);
+}
+
+int *fp2a(int **p) {
+  // CHECK: @fp2a
+  // CHECK: store i64 4
+  // CHECK: atomicrmw sub {{.*}} monotonic
+  // Note, the GNU builtins do not multiply by sizeof(T)!
+  return __atomic_fetch_sub(p, 4, memory_order_relaxed);
+}
+
+_Complex float fc(_Atomic(_Complex float) *c) {
+  // CHECK: @fc
+  // CHECK: atomicrmw xchg i64*
+  return __c11_atomic_exchange(c, 2, memory_order_seq_cst);
+}
+
+typedef struct X { int x; } X;
+X fs(_Atomic(X) *c) {
+  // CHECK: @fs
+  // CHECK: atomicrmw xchg i32*
+  return __c11_atomic_exchange(c, (X){2}, memory_order_seq_cst);
+}
+
+X fsa(X *c, X *d) {
+  // CHECK: @fsa
+  // CHECK: atomicrmw xchg i32*
+  X ret;
+  __atomic_exchange(c, d, &ret, memory_order_seq_cst);
+  return ret;
+}
+
+_Bool fsb(_Bool *c) {
+  // CHECK: @fsb
+  // CHECK: atomicrmw xchg i8*
+  return __atomic_exchange_n(c, 1, memory_order_seq_cst);
+}
+
+char flag1;
+volatile char flag2;
+void test_and_set() {
+  // CHECK: atomicrmw xchg i8* @flag1, i8 1 seq_cst
+  __atomic_test_and_set(&flag1, memory_order_seq_cst);
+  // CHECK: atomicrmw volatile xchg i8* @flag2, i8 1 acquire
+  __atomic_test_and_set(&flag2, memory_order_acquire);
+  // CHECK: store atomic volatile i8 0, i8* @flag2 release
+  __atomic_clear(&flag2, memory_order_release);
+  // CHECK: store atomic i8 0, i8* @flag1 seq_cst
+  __atomic_clear(&flag1, memory_order_seq_cst);
+}
+
+struct Sixteen {
+  char c[16];
+} sixteen;
+struct Seventeen {
+  char c[17];
+} seventeen;
+
+int lock_free(struct Incomplete *incomplete) {
+  // CHECK: @lock_free
+
+  // CHECK: call i32 @__atomic_is_lock_free(i64 3, i8* null)
+  __c11_atomic_is_lock_free(3);
+
+  // CHECK: call i32 @__atomic_is_lock_free(i64 16, i8* {{.*}}@sixteen{{.*}})
+  __atomic_is_lock_free(16, &sixteen);
+
+  // CHECK: call i32 @__atomic_is_lock_free(i64 17, i8* {{.*}}@seventeen{{.*}})
+  __atomic_is_lock_free(17, &seventeen);
+
+  // CHECK: call i32 @__atomic_is_lock_free(i64 4, {{.*}})
+  __atomic_is_lock_free(4, incomplete);
+
+  char cs[20];
+  // CHECK: call i32 @__atomic_is_lock_free(i64 4, {{.*}})
+  __atomic_is_lock_free(4, cs+1);
+
+  // CHECK-NOT: call
+  __atomic_always_lock_free(3, 0);
+  __atomic_always_lock_free(16, 0);
+  __atomic_always_lock_free(17, 0);
+  __atomic_always_lock_free(16, &sixteen);
+  __atomic_always_lock_free(17, &seventeen);
+
+  int n;
+  __atomic_is_lock_free(4, &n);
+
+  // CHECK: ret i32 1
+  return __c11_atomic_is_lock_free(sizeof(_Atomic(int)));
+}
+
+// Tests for atomic operations on big values.  These should call the functions
+// defined here:
+// http://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary#The_Library_interface
+
+struct foo {
+  int big[128];
+};
+struct bar {
+  char c[3];
+};
+
+struct bar smallThing, thing1, thing2;
+struct foo bigThing;
+_Atomic(struct foo) bigAtomic;
+
+void structAtomicStore() {
+  // CHECK: @structAtomicStore
+  struct foo f = {0};
+  __c11_atomic_store(&bigAtomic, f, 5);
+  // CHECK: call void @__atomic_store(i64 512, i8* bitcast ({{.*}} @bigAtomic to i8*),
+
+  struct bar b = {0};
+  __atomic_store(&smallThing, &b, 5);
+  // CHECK: call void @__atomic_store(i64 3, i8* {{.*}} @smallThing
+
+  __atomic_store(&bigThing, &f, 5);
+  // CHECK: call void @__atomic_store(i64 512, i8* {{.*}} @bigThing
+}
+void structAtomicLoad() {
+  // CHECK: @structAtomicLoad
+  struct foo f = __c11_atomic_load(&bigAtomic, 5);
+  // CHECK: call void @__atomic_load(i64 512, i8* bitcast ({{.*}} @bigAtomic to i8*),
+
+  struct bar b;
+  __atomic_load(&smallThing, &b, 5);
+  // CHECK: call void @__atomic_load(i64 3, i8* {{.*}} @smallThing
+
+  __atomic_load(&bigThing, &f, 5);
+  // CHECK: call void @__atomic_load(i64 512, i8* {{.*}} @bigThing
+}
+struct foo structAtomicExchange() {
+  // CHECK: @structAtomicExchange
+  struct foo f = {0};
+  struct foo old;
+  __atomic_exchange(&f, &bigThing, &old, 5);
+  // CHECK: call void @__atomic_exchange(i64 512, {{.*}}, i8* bitcast ({{.*}} @bigThing to i8*),
+
+  return __c11_atomic_exchange(&bigAtomic, f, 5);
+  // CHECK: call void @__atomic_exchange(i64 512, i8* bitcast ({{.*}} @bigAtomic to i8*),
+}
+int structAtomicCmpExchange() {
+  // CHECK: @structAtomicCmpExchange
+  _Bool x = __atomic_compare_exchange(&smallThing, &thing1, &thing2, 1, 5, 5);
+  // CHECK: call zeroext i1 @__atomic_compare_exchange(i64 3, {{.*}} @smallThing{{.*}} @thing1{{.*}} @thing2
+
+  struct foo f = {0};
+  struct foo g = {0};
+  g.big[12] = 12;
+  return x & __c11_atomic_compare_exchange_strong(&bigAtomic, &f, g, 5, 5);
+  // CHECK: call zeroext i1 @__atomic_compare_exchange(i64 512, i8* bitcast ({{.*}} @bigAtomic to i8*),
+}
+
+// Check that no atomic operations are used in any initialisation of _Atomic
+// types.
+_Atomic(int) atomic_init_i = 42;
+
+// CHECK: @atomic_init_foo
+void atomic_init_foo()
+{
+  // CHECK-NOT: }
+  // CHECK-NOT: atomic
+  // CHECK: store
+  _Atomic(int) j = 12;
+
+  // CHECK-NOT: }
+  // CHECK-NOT: atomic
+  // CHECK: store
+  __c11_atomic_init(&j, 42);
+
+  // CHECK-NOT: atomic
+  // CHECK: }
+}
+
+// CHECK: @invalid_atomic
+void invalid_atomic(_Atomic(int) *i) {
+  __c11_atomic_store(i, 1, memory_order_consume);
+  __c11_atomic_store(i, 1, memory_order_acquire);
+  __c11_atomic_store(i, 1, memory_order_acq_rel);
+  __c11_atomic_load(i, memory_order_release);
+  __c11_atomic_load(i, memory_order_acq_rel);
+}
+
+#endif
diff --git a/test/CodeGen/bitfield-2.c b/test/CodeGen/bitfield-2.c
index bec55ff..58b17f1 100644
--- a/test/CodeGen/bitfield-2.c
+++ b/test/CodeGen/bitfield-2.c
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -emit-llvm -triple x86_64 -O3 -o %t.opt.ll %s \
-// RUN:   -fdump-record-layouts 2> %t.dump.txt
+// RUN:   -fdump-record-layouts > %t.dump.txt
 // RUN: FileCheck -check-prefix=CHECK-RECORD < %t.dump.txt %s
 // RUN: FileCheck -check-prefix=CHECK-OPT < %t.opt.ll %s
 
@@ -32,7 +32,7 @@ int f0_reload(struct s0 *a0) {
   return (a0->f0 += 1);
 }
 
-// CHECK-OPT: define i64 @test_0()
+// CHECK-OPT-LABEL: define i64 @test_0()
 // CHECK-OPT:  ret i64 1
 // CHECK-OPT: }
 unsigned long long test_0() {
@@ -78,7 +78,7 @@ int f1_reload(struct s1 *a0) {
   return (a0->f1 += 1234);
 }
 
-// CHECK-OPT: define i64 @test_1()
+// CHECK-OPT-LABEL: define i64 @test_1()
 // CHECK-OPT:  ret i64 210
 // CHECK-OPT: }
 unsigned long long test_1() {
@@ -120,7 +120,7 @@ int f2_reload(union u2 *a0) {
   return (a0->f0 += 1234);
 }
 
-// CHECK-OPT: define i64 @test_2()
+// CHECK-OPT-LABEL: define i64 @test_2()
 // CHECK-OPT:  ret i64 2
 // CHECK-OPT: }
 unsigned long long test_2() {
@@ -156,7 +156,7 @@ int f3_reload(struct s3 *a0) {
   return (a0->f0 += 1234);
 }
 
-// CHECK-OPT: define i64 @test_3()
+// CHECK-OPT-LABEL: define i64 @test_3()
 // CHECK-OPT:  ret i64 -559039940
 // CHECK-OPT: }
 unsigned long long test_3() {
@@ -190,7 +190,7 @@ int f4_reload(struct s4 *a0) {
   return (a0->f0 += 1234) ^ (a0->f1 += 5678);
 }
 
-// CHECK-OPT: define i64 @test_4()
+// CHECK-OPT-LABEL: define i64 @test_4()
 // CHECK-OPT:  ret i64 4860
 // CHECK-OPT: }
 unsigned long long test_4() {
@@ -222,7 +222,7 @@ int f5_reload(struct s5 *a0) {
   return (a0->f0 += 0xF) ^ (a0->f1 += 0xF) ^ (a0->f2 += 0xF);
 }
 
-// CHECK-OPT: define i64 @test_5()
+// CHECK-OPT-LABEL: define i64 @test_5()
 // CHECK-OPT:  ret i64 2
 // CHECK-OPT: }
 unsigned long long test_5() {
@@ -252,7 +252,7 @@ int f6_reload(struct s6 *a0) {
   return (a0->f0 += 0xF);
 }
 
-// CHECK-OPT: define zeroext i1 @test_6()
+// CHECK-OPT-LABEL: define zeroext i1 @test_6()
 // CHECK-OPT:  ret i1 true
 // CHECK-OPT: }
 _Bool test_6() {
@@ -310,7 +310,7 @@ int f8_reload(struct s8 *a0) {
   return (a0->f0 += 0xFD) ^ (a0->f2 += 0xFD) ^ (a0->f3 += 0xFD);
 }
 
-// CHECK-OPT: define i32 @test_8()
+// CHECK-OPT-LABEL: define i32 @test_8()
 // CHECK-OPT:  ret i32 -3
 // CHECK-OPT: }
 unsigned test_8() {
diff --git a/test/CodeGen/bitfield-assign.c b/test/CodeGen/bitfield-assign.c
index b8ab613..270f44d 100644
--- a/test/CodeGen/bitfield-assign.c
+++ b/test/CodeGen/bitfield-assign.c
@@ -4,14 +4,14 @@
 /* Check that we get one load for each simple assign and two for the
    compound assign (load the old value before the add then load again
    to store back). Also check that our g0 pattern is good. */
-// RUN: %clang_cc1 -triple i386-unknown-unknown -O0 -emit-llvm -o %t %s
+// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm -o %t %s
 // RUN: grep 'load ' %t | count 5
 // RUN: grep "@g0" %t | count 4
 
 // Check that we got the right value.
 // RUN: %clang_cc1 -triple i386-unknown-unknown -O3 -emit-llvm -o %t %s
-// RUN: grep 'load ' %t | count 0
-// RUN: grep "@g0" %t | count 0
+// RUN: not grep 'load ' %t
+// RUN: not grep "@g0" %t
 
 struct s0 {
   int f0 : 2;
diff --git a/test/CodeGen/bitfield.c b/test/CodeGen/bitfield.c
index dea5e43..c624d00 100644
--- a/test/CodeGen/bitfield.c
+++ b/test/CodeGen/bitfield.c
@@ -1,6 +1,5 @@
-// RUN: %clang_cc1 -triple i386-unknown-unknown %s -emit-llvm -o %t -O3
-// RUN: grep "ret i32" %t | count 4
-// RUN: grep "ret i32 1" %t | count 4
+// RUN: %clang_cc1 -triple i386-unknown-unknown %s -emit-llvm -o - -O3 -no-struct-path-tbaa | FileCheck %s
+// RUN: %clang_cc1 -triple i386-unknown-unknown %s -emit-llvm -o - -O3 | FileCheck %s --check-prefix=PATH
 
 static int f0(int n) {
   struct s0 {
@@ -17,6 +16,10 @@ static int f0(int n) {
 }
 
 int g0(void) {
+// CHECK-LABEL: @g0()
+// CHECK: ret i32 1
+// PATH-LABEL: @g0()
+// PATH: ret i32 1
   return f0(-1) + 44335655;
 }
 
@@ -35,6 +38,10 @@ static int f1(void) {
 }
 
 int g1(void) {
+// CHECK-LABEL: @g1()
+// CHECK: ret i32 1
+// PATH-LABEL: @g1()
+// PATH: ret i32 1
   return f1() + 16;
 }
 
@@ -51,6 +58,10 @@ static int f2(void) {
 }
 
 int g2(void) {
+// CHECK-LABEL: @g2()
+// CHECK: ret i32 1
+// PATH-LABEL: @g2()
+// PATH: ret i32 1
   return f2() - 9;
 }
 
@@ -70,5 +81,9 @@ static int f3(int n) {
 }
 
 int g3(void) {
+// CHECK-LABEL: @g3()
+// CHECK: ret i32 1
+// PATH-LABEL: @g3()
+// PATH: ret i32 1
   return f3(20) + 130725747;
 }
diff --git a/test/CodeGen/block-byref-aggr.c b/test/CodeGen/block-byref-aggr.c
index eb342b8..eed0239 100644
--- a/test/CodeGen/block-byref-aggr.c
+++ b/test/CodeGen/block-byref-aggr.c
@@ -12,7 +12,7 @@ void test0() {
 
  a = makeAgg();
 }
-// CHECK:    define void @test0()
+// CHECK-LABEL:    define void @test0()
 // CHECK:      [[A:%.*]] = alloca [[BYREF:%.*]], align 8
 // CHECK-NEXT: [[TEMP:%.*]] = alloca [[AGG]], align 4
 // CHECK:      [[RESULT:%.*]] = call i32 @makeAgg()
@@ -37,7 +37,7 @@ void test1() {
   __block Agg a, b;
   a = b = makeAgg();
 }
-// CHECK:    define void @test1()
+// CHECK-LABEL:    define void @test1()
 // CHECK:      [[A:%.*]] = alloca [[A_BYREF:%.*]], align 8
 // CHECK-NEXT: [[B:%.*]] = alloca [[B_BYREF:%.*]], align 8
 // CHECK-NEXT: [[TEMP:%.*]] = alloca [[AGG]], align 4
diff --git a/test/CodeGen/blocks-2.c b/test/CodeGen/blocks-2.c
deleted file mode 100644
index 4e574da..0000000
--- a/test/CodeGen/blocks-2.c
+++ /dev/null
@@ -1,18 +0,0 @@
-// RUN: %clang_cc1 -g %s -emit-llvm -o %t -fblocks
-// RUN: grep "func.start" %t | count 4
-// RUN: %clang_cc1 -g %s -triple i386-unknown-unknown -emit-llvm -o %t -fblocks -fblock-introspection
-// RUN: grep "v8@?0i4" %t | count 1
-// RUN: %clang_cc1 -g %s -triple i386-unknown-unknown -emit-llvm -o %t -fblocks
-// RUN: grep "v8@?0i4" %t | count 0
-// 1 declaration, 1 bar, 1 test_block_dbg and 1 for the block.
-// XFAIL: *
-
-static __inline__ __attribute__((always_inline)) int bar(int va, int vb) { return (va == vb); }
-
-int test_block_dbg() {
-  extern int g;
-  static int i = 1;
-  ^(int j){ i = bar(3,4); }(0);
-  return i + g;
-}
-
diff --git a/test/CodeGen/blocks.c b/test/CodeGen/blocks.c
index 71f7171..5871e8c 100644
--- a/test/CodeGen/blocks.c
+++ b/test/CodeGen/blocks.c
@@ -45,7 +45,7 @@ void f3() {
 // The bool can fill in between the header and the long long.
 // Add the appropriate amount of padding between them.
 void f4_helper(long long (^)(void));
-// CHECK: define void @f4()
+// CHECK-LABEL: define void @f4()
 void f4(void) {
   _Bool b = 0;
   long long ll = 0;
@@ -60,9 +60,21 @@ struct F5 {
   char buffer[32] __attribute((aligned));
 };
 void f5_helper(void (^)(struct F5 *));
-// CHECK: define void @f5()
+// CHECK-LABEL: define void @f5()
 void f5(void) {
   struct F5 value;
   // CHECK: alloca <{ i8*, i32, i32, i8*, {{%.*}}*, [12 x i8], [[F5:%.*]] }>, align 16
   f5_helper(^(struct F5 *slot) { *slot = value; });
 }
+
+// rdar://14085217
+void (^b)() = ^{};
+int main() {
+   (b?: ^{})();
+}
+// CHECK: [[ZERO:%.*]] = load void (...)** @b
+// CHECK-NEXT: [[TB:%.*]] = icmp ne void (...)* [[ZERO]], null
+// CHECK-NEXT: br i1 [[TB]], label [[CT:%.*]], label [[CF:%.*]]
+// CHECK: [[ONE:%.*]] = bitcast void (...)* [[ZERO]] to void ()*
+// CHECK-NEXT:   br label [[CE:%.*]]
+
diff --git a/test/CodeGen/bool_test.c b/test/CodeGen/bool_test.c
index 83d8330..a4aa669 100644
--- a/test/CodeGen/bool_test.c
+++ b/test/CodeGen/bool_test.c
@@ -8,7 +8,7 @@ void f(_Bool *x, _Bool *y) {
   *x = *y;
 }
 
-// CHECK: define void @f(
+// CHECK-LABEL: define void @f(
 // CHECK: [[FROMMEM:%.*]] = load i32* %
 // CHECK: [[BOOLVAL:%.*]] = trunc i32 [[FROMMEM]] to i1
 // CHECK: [[TOMEM:%.*]] = zext i1 [[BOOLVAL]] to i32
diff --git a/test/CodeGen/bounds-checking.c b/test/CodeGen/bounds-checking.c
index fa7541f..d93cd3e 100644
--- a/test/CodeGen/bounds-checking.c
+++ b/test/CodeGen/bounds-checking.c
@@ -1,26 +1,29 @@
-// RUN: %clang_cc1 -fsanitize=bounds -emit-llvm -triple x86_64-apple-darwin10 < %s | FileCheck %s
+// RUN: %clang_cc1 -fsanitize=local-bounds -emit-llvm -triple x86_64-apple-darwin10 %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fsanitize=array-bounds -O -fsanitize-undefined-trap-on-error -emit-llvm -triple x86_64-apple-darwin10 -DNO_DYNAMIC %s -o - | FileCheck %s
 
-// CHECK: @f
+// CHECK-LABEL: @f
 double f(int b, int i) {
   double a[b];
-  // CHECK: trap
+  // CHECK: call {{.*}} @llvm.trap
   return a[i];
 }
 
-// CHECK: @f2
+// CHECK-LABEL: @f2
 void f2() {
   // everything is constant; no trap possible
-  // CHECK-NOT: trap
+  // CHECK-NOT: call {{.*}} @llvm.trap
   int a[2];
   a[1] = 42;
-  
+
+#ifndef NO_DYNAMIC
   short *b = malloc(64);
   b[5] = *a + a[1] + 2;
+#endif
 }
 
-// CHECK: @f3
+// CHECK-LABEL: @f3
 void f3() {
   int a[1];
-  // CHECK: trap
+  // CHECK: call {{.*}} @llvm.trap
   a[2] = 1;
 }
diff --git a/test/CodeGen/branch-on-bool.c b/test/CodeGen/branch-on-bool.c
new file mode 100644
index 0000000..78dae1b
--- /dev/null
+++ b/test/CodeGen/branch-on-bool.c
@@ -0,0 +1,22 @@
+// RUN: %clang %s -O0 -emit-llvm -S -o - | FileCheck %s
+
+void foo();
+void bar();
+
+void fold_if(int a, int b) {
+  // CHECK: define {{.*}} @fold_if(
+  // CHECK-NOT: = phi
+  // CHECK: }
+  if (a && b)
+    foo();
+  else
+    bar();
+}
+
+void fold_for(int a, int b) {
+  // CHECK: define {{.*}} @fold_for(
+  // CHECK-NOT: = phi
+  // CHECK: }
+  for (int i = 0; a && i < b; ++i) foo();
+  for (int i = 0; a || i < b; ++i) bar();
+}
diff --git a/test/CodeGen/builtin-ms-noop.cpp b/test/CodeGen/builtin-ms-noop.cpp
index 42c2501..7c5068d 100644
--- a/test/CodeGen/builtin-ms-noop.cpp
+++ b/test/CodeGen/builtin-ms-noop.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple i686-pc-win32 -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fms-extensions -triple i686-pc-win32 -emit-llvm %s -o - | FileCheck %s
 
 class A {
  public:
diff --git a/test/CodeGen/builtins-arm-exclusive.c b/test/CodeGen/builtins-arm-exclusive.c
new file mode 100644
index 0000000..7eccb9e
--- /dev/null
+++ b/test/CodeGen/builtins-arm-exclusive.c
@@ -0,0 +1,112 @@
+// REQUIRES: arm-registered-target
+// RUN: %clang_cc1 -Wall -Werror -triple thumbv7-linux-gnueabi -fno-signed-char -O3 -emit-llvm -o - %s | FileCheck %s
+
+// Make sure the canonical use works before going into smaller details:
+int atomic_inc(int *addr) {
+  int Failure, OldVal;
+  do {
+    OldVal = __builtin_arm_ldrex(addr);
+    Failure = __builtin_arm_strex(OldVal + 1, addr);
+  } while (Failure);
+
+  return OldVal;
+}
+
+// CHECK: @atomic_inc
+// CHECK:   [[OLDVAL:%.*]] = tail call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
+// CHECK:   [[INC:%.*]] = add nsw i32 [[OLDVAL]], 1
+// CHECK:   [[FAILURE:%.*]] = tail call i32 @llvm.arm.strex.p0i32(i32 [[INC]], i32* %addr)
+// CHECK:   [[TST:%.*]] = icmp eq i32 [[FAILURE]], 0
+// CHECK:   br i1 [[TST]], label {{%[a-zA-Z0-9.]+}}, label {{%[a-zA-Z0-9.]+}}
+
+struct Simple {
+  char a, b;
+};
+
+int test_ldrex(char *addr, long long *addr64, float *addrfloat) {
+// CHECK: @test_ldrex
+  int sum = 0;
+  sum += __builtin_arm_ldrex(addr);
+// CHECK: [[INTRES:%.*]] = tail call i32 @llvm.arm.ldrex.p0i8(i8* %addr)
+// CHECK: and i32 [[INTRES]], 255
+
+  sum += __builtin_arm_ldrex((short *)addr);
+// CHECK: [[ADDR16:%.*]] = bitcast i8* %addr to i16*
+// CHECK: [[INTRES:%.*]] = tail call i32 @llvm.arm.ldrex.p0i16(i16* [[ADDR16]])
+// CHECK: [[TMPSEXT:%.*]] = shl i32 [[INTRES]], 16
+// CHECK: ashr exact i32 [[TMPSEXT]], 16
+
+  sum += __builtin_arm_ldrex((int *)addr);
+// CHECK: [[ADDR32:%.*]] = bitcast i8* %addr to i32*
+// CHECK:  call i32 @llvm.arm.ldrex.p0i32(i32* [[ADDR32]])
+
+  sum += __builtin_arm_ldrex((long long *)addr);
+// CHECK: call { i32, i32 } @llvm.arm.ldrexd(i8* %addr)
+
+  sum += __builtin_arm_ldrex(addr64);
+// CHECK: [[ADDR64_AS8:%.*]] = bitcast i64* %addr64 to i8*
+// CHECK: call { i32, i32 } @llvm.arm.ldrexd(i8* [[ADDR64_AS8]])
+
+  sum += __builtin_arm_ldrex(addrfloat);
+// CHECK: [[INTADDR:%.*]] = bitcast float* %addrfloat to i32*
+// CHECK: [[INTRES:%.*]] = tail call i32 @llvm.arm.ldrex.p0i32(i32* [[INTADDR]])
+// CHECK: bitcast i32 [[INTRES]] to float
+
+  sum += __builtin_arm_ldrex((double *)addr);
+// CHECK: [[STRUCTRES:%.*]] = tail call { i32, i32 } @llvm.arm.ldrexd(i8* %addr)
+// CHECK: [[RESHI:%.*]] = extractvalue { i32, i32 } [[STRUCTRES]], 1
+// CHECK: [[RESLO:%.*]] = extractvalue { i32, i32 } [[STRUCTRES]], 0
+// CHECK: [[RESHI64:%.*]] = zext i32 [[RESHI]] to i64
+// CHECK: [[RESLO64:%.*]] = zext i32 [[RESLO]] to i64
+// CHECK: [[RESHIHI:%.*]] = shl nuw i64 [[RESHI64]], 32
+// CHECK: [[INTRES:%.*]] = or i64 [[RESHIHI]], [[RESLO64]]
+// CHECK: bitcast i64 [[INTRES]] to double
+
+  sum += *__builtin_arm_ldrex((int **)addr);
+// CHECK: [[INTRES:%.*]] = tail call i32 @llvm.arm.ldrex.p0i32(i32* [[ADDR32]])
+// CHECK: inttoptr i32 [[INTRES]] to i32*
+
+  sum += __builtin_arm_ldrex((struct Simple **)addr)->a;
+// CHECK: [[INTRES:%.*]] = tail call i32 @llvm.arm.ldrex.p0i32(i32* [[ADDR32]])
+// CHECK: inttoptr i32 [[INTRES]] to %struct.Simple*
+
+  return sum;
+}
+
+int test_strex(char *addr) {
+// CHECK: @test_strex
+  int res = 0;
+  struct Simple var = {0};
+  res |= __builtin_arm_strex(4, addr);
+// CHECK: call i32 @llvm.arm.strex.p0i8(i32 4, i8* %addr)
+
+  res |= __builtin_arm_strex(42, (short *)addr);
+// CHECK: [[ADDR16:%.*]] = bitcast i8* %addr to i16*
+// CHECK:  call i32 @llvm.arm.strex.p0i16(i32 42, i16* [[ADDR16]])
+
+  res |= __builtin_arm_strex(42, (int *)addr);
+// CHECK: [[ADDR32:%.*]] = bitcast i8* %addr to i32*
+// CHECK: call i32 @llvm.arm.strex.p0i32(i32 42, i32* [[ADDR32]])
+
+  res |= __builtin_arm_strex(42, (long long *)addr);
+// CHECK: call i32 @llvm.arm.strexd(i32 42, i32 0, i8* %addr)
+
+  res |= __builtin_arm_strex(2.71828f, (float *)addr);
+// CHECK: call i32 @llvm.arm.strex.p0i32(i32 1076754509, i32* [[ADDR32]])
+
+  res |= __builtin_arm_strex(3.14159, (double *)addr);
+// CHECK: call i32 @llvm.arm.strexd(i32 -266631570, i32 1074340345, i8* %addr)
+
+  res |= __builtin_arm_strex(&var, (struct Simple **)addr);
+// CHECK: [[INTVAL:%.*]] = ptrtoint i16* %var to i32
+// CHECK: call i32 @llvm.arm.strex.p0i32(i32 [[INTVAL]], i32* [[ADDR32]])
+
+  return res;
+}
+
+void test_clrex() {
+// CHECK: @test_clrex
+
+  __builtin_arm_clrex();
+// CHECK: call void @llvm.arm.clrex()
+}
diff --git a/test/CodeGen/builtins-arm.c b/test/CodeGen/builtins-arm.c
index e6c7ced..937e1d9 100644
--- a/test/CodeGen/builtins-arm.c
+++ b/test/CodeGen/builtins-arm.c
@@ -18,3 +18,13 @@ void test_eh_return_data_regno()
   res = __builtin_eh_return_data_regno(0);  // CHECK: store volatile i32 0
   res = __builtin_eh_return_data_regno(1);  // CHECK: store volatile i32 1
 }
+
+void sevl() {
+  __builtin_arm_sevl();
+}
+// CHECK: call {{.*}} @llvm.arm.sevl
+
+void test_barrier() {
+  __builtin_arm_dmb(1); //CHECK: call {{.*}} @llvm.arm.dmb(i32 1)
+  __builtin_arm_dsb(2); //CHECK: call {{.*}} @llvm.arm.dsb(i32 2)
+}
diff --git a/test/CodeGen/builtins-mips-msa.c b/test/CodeGen/builtins-mips-msa.c
new file mode 100644
index 0000000..69cb8e2
--- /dev/null
+++ b/test/CodeGen/builtins-mips-msa.c
@@ -0,0 +1,829 @@
+// REQUIRES: mips-registered-target
+// RUN: %clang_cc1 -triple mips-unknown-linux-gnu -emit-llvm %s -o - \
+// RUN:   | FileCheck %s
+
+typedef signed char      v16i8 __attribute__ ((vector_size(16)));
+typedef signed short     v8i16 __attribute__ ((vector_size(16)));
+typedef signed int       v4i32 __attribute__ ((vector_size(16)));
+typedef signed long long v2i64 __attribute__ ((vector_size(16)));
+typedef unsigned char      v16u8 __attribute__ ((vector_size(16)));
+typedef unsigned short     v8u16 __attribute__ ((vector_size(16)));
+typedef unsigned int       v4u32 __attribute__ ((vector_size(16)));
+typedef unsigned long long v2u64 __attribute__ ((vector_size(16)));
+typedef __fp16 v8f16 __attribute__ ((vector_size(16)));
+typedef float  v4f32 __attribute__ ((vector_size(16)));
+typedef double v2f64 __attribute__ ((vector_size(16)));
+
+void test(void) {
+  v16i8 v16i8_a = (v16i8) {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+  v16i8 v16i8_b = (v16i8) {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+  v16i8 v16i8_r;
+  v8i16 v8i16_a = (v8i16) {0, 1, 2, 3, 4, 5, 6, 7};
+  v8i16 v8i16_b = (v8i16) {1, 2, 3, 4, 5, 6, 7, 8};
+  v8i16 v8i16_r;
+  v4i32 v4i32_a = (v4i32) {0, 1, 2, 3};
+  v4i32 v4i32_b = (v4i32) {1, 2, 3, 4};
+  v4i32 v4i32_r;
+  v2i64 v2i64_a = (v2i64) {0, 1};
+  v2i64 v2i64_b = (v2i64) {1, 2};
+  v2i64 v2i64_r;
+
+  v16u8 v16u8_a = (v16u8) {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+  v16u8 v16u8_b = (v16u8) {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+  v16u8 v16u8_r;
+  v8u16 v8u16_a = (v8u16) {0, 1, 2, 3, 4, 5, 6, 7};
+  v8u16 v8u16_b = (v8u16) {1, 2, 3, 4, 5, 6, 7, 8};
+  v8u16 v8u16_r;
+  v4u32 v4u32_a = (v4u32) {0, 1, 2, 3};
+  v4u32 v4u32_b = (v4u32) {1, 2, 3, 4};
+  v4u32 v4u32_r;
+  v2u64 v2u64_a = (v2u64) {0, 1};
+  v2u64 v2u64_b = (v2u64) {1, 2};
+  v2u64 v2u64_r;
+
+  v8f16 v8f16_a = (v8f16) {0.5, 1, 2, 3, 4, 5, 6, 7};
+  v8f16 v8f16_b = (v8f16) {1.5, 2, 3, 4, 5, 6, 7, 8};
+  v8f16 v8f16_r;
+  v4f32 v4f32_a = (v4f32) {0.5, 1, 2, 3};
+  v4f32 v4f32_b = (v4f32) {1.5, 2, 3, 4};
+  v4f32 v4f32_r;
+  v2f64 v2f64_a = (v2f64) {0.5, 1};
+  v2f64 v2f64_b = (v2f64) {1.5, 2};
+  v2f64 v2f64_r;
+
+  int int_r;
+  long long ll_r;
+  int int_a = 0;
+
+  v16i8_r = __builtin_msa_add_a_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.add.a.b(
+  v8i16_r = __builtin_msa_add_a_h(v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.add.a.h(
+  v4i32_r = __builtin_msa_add_a_w(v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.add.a.w(
+  v2i64_r = __builtin_msa_add_a_d(v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.add.a.d(
+
+  v16i8_r = __builtin_msa_adds_a_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.adds.a.b(
+  v8i16_r = __builtin_msa_adds_a_h(v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.adds.a.h(
+  v4i32_r = __builtin_msa_adds_a_w(v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.adds.a.w(
+  v2i64_r = __builtin_msa_adds_a_d(v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.adds.a.d(
+
+  v16i8_r = __builtin_msa_adds_s_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.adds.s.b(
+  v8i16_r = __builtin_msa_adds_s_h(v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.adds.s.h(
+  v4i32_r = __builtin_msa_adds_s_w(v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.adds.s.w(
+  v2i64_r = __builtin_msa_adds_s_d(v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.adds.s.d(
+
+  v16u8_r = __builtin_msa_adds_u_b(v16u8_a, v16u8_b); // CHECK: call <16 x i8>  @llvm.mips.adds.u.b(
+  v8u16_r = __builtin_msa_adds_u_h(v8u16_a, v8u16_b); // CHECK: call <8  x i16> @llvm.mips.adds.u.h(
+  v4u32_r = __builtin_msa_adds_u_w(v4u32_a, v4u32_b); // CHECK: call <4  x i32> @llvm.mips.adds.u.w(
+  v2u64_r = __builtin_msa_adds_u_d(v2u64_a, v2u64_b); // CHECK: call <2  x i64> @llvm.mips.adds.u.d(
+
+  v16i8_r = __builtin_msa_addv_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.addv.b(
+  v8i16_r = __builtin_msa_addv_h(v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.addv.h(
+  v4i32_r = __builtin_msa_addv_w(v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.addv.w(
+  v2i64_r = __builtin_msa_addv_d(v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.addv.d(
+
+  v16u8_r = __builtin_msa_addv_b(v16u8_a, v16u8_b); // CHECK: call <16 x i8>  @llvm.mips.addv.b(
+  v8u16_r = __builtin_msa_addv_h(v8u16_a, v8u16_b); // CHECK: call <8  x i16> @llvm.mips.addv.h(
+  v4u32_r = __builtin_msa_addv_w(v4u32_a, v4u32_b); // CHECK: call <4  x i32> @llvm.mips.addv.w(
+  v2u64_r = __builtin_msa_addv_d(v2u64_a, v2u64_b); // CHECK: call <2  x i64> @llvm.mips.addv.d(
+
+  v16i8_r = __builtin_msa_addvi_b(v16i8_a, 25); // CHECK: call <16 x i8>  @llvm.mips.addvi.b(
+  v8i16_r = __builtin_msa_addvi_h(v8i16_a, 25); // CHECK: call <8  x i16> @llvm.mips.addvi.h(
+  v4i32_r = __builtin_msa_addvi_w(v4i32_a, 25); // CHECK: call <4  x i32> @llvm.mips.addvi.w(
+  v2i64_r = __builtin_msa_addvi_d(v2i64_a, 25); // CHECK: call <2  x i64> @llvm.mips.addvi.d(
+
+  v16u8_r = __builtin_msa_addvi_b(v16u8_a, 25); // CHECK: call <16 x i8>  @llvm.mips.addvi.b(
+  v8u16_r = __builtin_msa_addvi_h(v8u16_a, 25); // CHECK: call <8  x i16> @llvm.mips.addvi.h(
+  v4u32_r = __builtin_msa_addvi_w(v4u32_a, 25); // CHECK: call <4  x i32> @llvm.mips.addvi.w(
+  v2u64_r = __builtin_msa_addvi_d(v2u64_a, 25); // CHECK: call <2  x i64> @llvm.mips.addvi.d(
+
+  v16i8_r = __builtin_msa_and_v(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.mips.and.v(
+  v8i16_r = __builtin_msa_and_v(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.mips.and.v(
+  v4i32_r = __builtin_msa_and_v(v4i32_a, v4i32_b); // CHECK: call <16 x i8> @llvm.mips.and.v(
+  v2i64_r = __builtin_msa_and_v(v2i64_a, v2i64_b); // CHECK: call <16 x i8> @llvm.mips.and.v(
+
+  v16i8_r = __builtin_msa_andi_b(v16i8_a, 25); // CHECK: call <16 x i8> @llvm.mips.andi.b(
+  v8i16_r = __builtin_msa_andi_b(v8i16_a, 25); // CHECK: call <16 x i8> @llvm.mips.andi.b(
+  v4i32_r = __builtin_msa_andi_b(v4i32_a, 25); // CHECK: call <16 x i8> @llvm.mips.andi.b(
+  v2i64_r = __builtin_msa_andi_b(v2i64_a, 25); // CHECK: call <16 x i8> @llvm.mips.andi.b(
+
+  v16u8_r = __builtin_msa_andi_b(v16u8_a, 25); // CHECK: call <16 x i8> @llvm.mips.andi.b(
+  v8u16_r = __builtin_msa_andi_b(v8u16_a, 25); // CHECK: call <16 x i8> @llvm.mips.andi.b(
+  v4u32_r = __builtin_msa_andi_b(v4u32_a, 25); // CHECK: call <16 x i8> @llvm.mips.andi.b(
+  v2u64_r = __builtin_msa_andi_b(v2u64_a, 25); // CHECK: call <16 x i8> @llvm.mips.andi.b(
+
+  v16i8_r = __builtin_msa_asub_s_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.asub.s.b(
+  v8i16_r = __builtin_msa_asub_s_h(v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.asub.s.h(
+  v4i32_r = __builtin_msa_asub_s_w(v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.asub.s.w(
+  v2i64_r = __builtin_msa_asub_s_d(v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.asub.s.d(
+
+  v16u8_r = __builtin_msa_asub_u_b(v16u8_a, v16u8_b); // CHECK: call <16 x i8>  @llvm.mips.asub.u.b(
+  v8u16_r = __builtin_msa_asub_u_h(v8u16_a, v8u16_b); // CHECK: call <8  x i16> @llvm.mips.asub.u.h(
+  v4u32_r = __builtin_msa_asub_u_w(v4u32_a, v4u32_b); // CHECK: call <4  x i32> @llvm.mips.asub.u.w(
+  v2u64_r = __builtin_msa_asub_u_d(v2u64_a, v2u64_b); // CHECK: call <2  x i64> @llvm.mips.asub.u.d(
+
+  v16i8_r = __builtin_msa_ave_s_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.ave.s.b(
+  v8i16_r = __builtin_msa_ave_s_h(v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.ave.s.h(
+  v4i32_r = __builtin_msa_ave_s_w(v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.ave.s.w(
+  v2i64_r = __builtin_msa_ave_s_d(v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.ave.s.d(
+
+  v16u8_r = __builtin_msa_ave_u_b(v16u8_a, v16u8_b); // CHECK: call <16 x i8>  @llvm.mips.ave.u.b(
+  v8u16_r = __builtin_msa_ave_u_h(v8u16_a, v8u16_b); // CHECK: call <8  x i16> @llvm.mips.ave.u.h(
+  v4u32_r = __builtin_msa_ave_u_w(v4u32_a, v4u32_b); // CHECK: call <4  x i32> @llvm.mips.ave.u.w(
+  v2u64_r = __builtin_msa_ave_u_d(v2u64_a, v2u64_b); // CHECK: call <2  x i64> @llvm.mips.ave.u.d(
+
+  v16i8_r = __builtin_msa_aver_s_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.aver.s.b(
+  v8i16_r = __builtin_msa_aver_s_h(v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.aver.s.h(
+  v4i32_r = __builtin_msa_aver_s_w(v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.aver.s.w(
+  v2i64_r = __builtin_msa_aver_s_d(v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.aver.s.d(
+
+  v16u8_r = __builtin_msa_aver_u_b(v16u8_a, v16u8_b); // CHECK: call <16 x i8>  @llvm.mips.aver.u.b(
+  v8u16_r = __builtin_msa_aver_u_h(v8u16_a, v8u16_b); // CHECK: call <8  x i16> @llvm.mips.aver.u.h(
+  v4u32_r = __builtin_msa_aver_u_w(v4u32_a, v4u32_b); // CHECK: call <4  x i32> @llvm.mips.aver.u.w(
+  v2u64_r = __builtin_msa_aver_u_d(v2u64_a, v2u64_b); // CHECK: call <2  x i64> @llvm.mips.aver.u.d(
+
+  v16i8_r = __builtin_msa_bclr_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.bclr.b(
+  v8i16_r = __builtin_msa_bclr_h(v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.bclr.h(
+  v4i32_r = __builtin_msa_bclr_w(v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.bclr.w(
+  v2i64_r = __builtin_msa_bclr_d(v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.bclr.d(
+
+  v16i8_r = __builtin_msa_bclri_b(v16i8_a, 25); // CHECK: call <16 x i8>  @llvm.mips.bclri.b(
+  v8i16_r = __builtin_msa_bclri_h(v8i16_a, 25); // CHECK: call <8  x i16> @llvm.mips.bclri.h(
+  v4i32_r = __builtin_msa_bclri_w(v4i32_a, 25); // CHECK: call <4  x i32> @llvm.mips.bclri.w(
+  v2i64_r = __builtin_msa_bclri_d(v2i64_a, 25); // CHECK: call <2  x i64> @llvm.mips.bclri.d(
+
+  v16i8_r = __builtin_msa_binsl_b(v16i8_r, v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.binsl.b(
+  v8i16_r = __builtin_msa_binsl_h(v8i16_r, v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.binsl.h(
+  v4i32_r = __builtin_msa_binsl_w(v4i32_r, v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.binsl.w(
+  v2i64_r = __builtin_msa_binsl_d(v2i64_r, v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.binsl.d(
+
+  v16i8_r = __builtin_msa_binsli_b(v16i8_r, v16i8_a, 25); // CHECK: call <16 x i8>  @llvm.mips.binsli.b(
+  v8i16_r = __builtin_msa_binsli_h(v8i16_r, v8i16_a, 25); // CHECK: call <8  x i16> @llvm.mips.binsli.h(
+  v4i32_r = __builtin_msa_binsli_w(v4i32_r, v4i32_a, 25); // CHECK: call <4  x i32> @llvm.mips.binsli.w(
+  v2i64_r = __builtin_msa_binsli_d(v2i64_r, v2i64_a, 25); // CHECK: call <2  x i64> @llvm.mips.binsli.d(
+
+  v16i8_r = __builtin_msa_binsr_b(v16i8_r, v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.binsr.b(
+  v8i16_r = __builtin_msa_binsr_h(v8i16_r, v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.binsr.h(
+  v4i32_r = __builtin_msa_binsr_w(v4i32_r, v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.binsr.w(
+  v2i64_r = __builtin_msa_binsr_d(v2i64_r, v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.binsr.d(
+
+  v16i8_r = __builtin_msa_binsri_b(v16i8_r, v16i8_a, 25); // CHECK: call <16 x i8>  @llvm.mips.binsri.b(
+  v8i16_r = __builtin_msa_binsri_h(v8i16_r, v8i16_a, 25); // CHECK: call <8  x i16> @llvm.mips.binsri.h(
+  v4i32_r = __builtin_msa_binsri_w(v4i32_r, v4i32_a, 25); // CHECK: call <4  x i32> @llvm.mips.binsri.w(
+  v2i64_r = __builtin_msa_binsri_d(v2i64_r, v2i64_a, 25); // CHECK: call <2  x i64> @llvm.mips.binsri.d(
+
+  v16i8_r = __builtin_msa_bmnz_v(v16i8_r, v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.mips.bmnz.v(
+  v8i16_r = __builtin_msa_bmnz_v(v8i16_r, v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.mips.bmnz.v(
+  v4i32_r = __builtin_msa_bmnz_v(v4i32_r, v4i32_a, v4i32_b); // CHECK: call <16 x i8> @llvm.mips.bmnz.v(
+  v2i64_r = __builtin_msa_bmnz_v(v2i64_r, v2i64_a, v2i64_b); // CHECK: call <16 x i8> @llvm.mips.bmnz.v(
+
+  v16i8_r = __builtin_msa_bmnzi_b(v16i8_r, v16i8_a, 25); // CHECK: call <16 x i8>  @llvm.mips.bmnzi.b(
+
+  v16i8_r = __builtin_msa_bmz_v(v16i8_r, v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.mips.bmz.v(
+  v8i16_r = __builtin_msa_bmz_v(v8i16_r, v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.mips.bmz.v(
+  v4i32_r = __builtin_msa_bmz_v(v4i32_r, v4i32_a, v4i32_b); // CHECK: call <16 x i8> @llvm.mips.bmz.v(
+  v2i64_r = __builtin_msa_bmz_v(v2i64_r, v2i64_a, v2i64_b); // CHECK: call <16 x i8> @llvm.mips.bmz.v(
+
+  v16i8_r = __builtin_msa_bmzi_b(v16i8_r, v16i8_a, 25); // CHECK: call <16 x i8>  @llvm.mips.bmzi.b(
+
+  v16i8_r = __builtin_msa_bneg_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.bneg.b(
+  v8i16_r = __builtin_msa_bneg_h(v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.bneg.h(
+  v4i32_r = __builtin_msa_bneg_w(v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.bneg.w(
+  v2i64_r = __builtin_msa_bneg_d(v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.bneg.d(
+
+  v16i8_r = __builtin_msa_bnegi_b(v16i8_a, 25); // CHECK: call <16 x i8>  @llvm.mips.bnegi.b(
+  v8i16_r = __builtin_msa_bnegi_h(v8i16_a, 25); // CHECK: call <8  x i16> @llvm.mips.bnegi.h(
+  v4i32_r = __builtin_msa_bnegi_w(v4i32_a, 25); // CHECK: call <4  x i32> @llvm.mips.bnegi.w(
+  v2i64_r = __builtin_msa_bnegi_d(v2i64_a, 25); // CHECK: call <2  x i64> @llvm.mips.bnegi.d(
+
+  int_r = __builtin_msa_bnz_b(v16i8_a); // CHECK: call i32 @llvm.mips.bnz.b(
+  int_r = __builtin_msa_bnz_h(v16i8_a); // CHECK: call i32 @llvm.mips.bnz.h(
+  int_r = __builtin_msa_bnz_w(v16i8_a); // CHECK: call i32 @llvm.mips.bnz.w(
+  int_r = __builtin_msa_bnz_d(v16i8_a); // CHECK: call i32 @llvm.mips.bnz.d(
+
+  int_r = __builtin_msa_bnz_v(v16i8_a); // CHECK: call i32 @llvm.mips.bnz.v(
+
+  v16i8_r = __builtin_msa_bsel_v(v16i8_r, v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.mips.bsel.v(
+  v8i16_r = __builtin_msa_bsel_v(v8i16_r, v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.mips.bsel.v(
+  v4i32_r = __builtin_msa_bsel_v(v4i32_r, v4i32_a, v4i32_b); // CHECK: call <16 x i8> @llvm.mips.bsel.v(
+  v2i64_r = __builtin_msa_bsel_v(v2i64_r, v2i64_a, v2i64_b); // CHECK: call <16 x i8> @llvm.mips.bsel.v(
+
+  v16i8_r = __builtin_msa_bseli_b(v16i8_r, v16i8_a, 25); // CHECK: call <16 x i8>  @llvm.mips.bseli.b(
+
+  v16i8_r = __builtin_msa_bset_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.bset.b(
+  v8i16_r = __builtin_msa_bset_h(v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.bset.h(
+  v4i32_r = __builtin_msa_bset_w(v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.bset.w(
+  v2i64_r = __builtin_msa_bset_d(v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.bset.d(
+
+  v16i8_r = __builtin_msa_bseti_b(v16i8_a, 25); // CHECK: call <16 x i8>  @llvm.mips.bseti.b(
+  v8i16_r = __builtin_msa_bseti_h(v8i16_a, 25); // CHECK: call <8  x i16> @llvm.mips.bseti.h(
+  v4i32_r = __builtin_msa_bseti_w(v4i32_a, 25); // CHECK: call <4  x i32> @llvm.mips.bseti.w(
+  v2i64_r = __builtin_msa_bseti_d(v2i64_a, 25); // CHECK: call <2  x i64> @llvm.mips.bseti.d(
+
+  int_r = __builtin_msa_bz_b(v16i8_a); // CHECK: call i32 @llvm.mips.bz.b(
+  int_r = __builtin_msa_bz_h(v16i8_a); // CHECK: call i32 @llvm.mips.bz.h(
+  int_r = __builtin_msa_bz_w(v16i8_a); // CHECK: call i32 @llvm.mips.bz.w(
+  int_r = __builtin_msa_bz_d(v16i8_a); // CHECK: call i32 @llvm.mips.bz.d(
+
+  int_r = __builtin_msa_bz_v(v16i8_a); // CHECK: call i32 @llvm.mips.bz.v(
+
+  v16i8_r = __builtin_msa_ceq_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.ceq.b(
+  v8i16_r = __builtin_msa_ceq_h(v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.ceq.h(
+  v4i32_r = __builtin_msa_ceq_w(v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.ceq.w(
+  v2i64_r = __builtin_msa_ceq_d(v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.ceq.d(
+
+  v16i8_r = __builtin_msa_ceqi_b(v16i8_a, 25); // CHECK: call <16 x i8>  @llvm.mips.ceqi.b(
+  v8i16_r = __builtin_msa_ceqi_h(v8i16_a, 25); // CHECK: call <8  x i16> @llvm.mips.ceqi.h(
+  v4i32_r = __builtin_msa_ceqi_w(v4i32_a, 25); // CHECK: call <4  x i32> @llvm.mips.ceqi.w(
+  v2i64_r = __builtin_msa_ceqi_d(v2i64_a, 25); // CHECK: call <2  x i64> @llvm.mips.ceqi.d(
+
+  int_r = __builtin_msa_cfcmsa(1); // CHECK: call i32 @llvm.mips.cfcmsa(
+
+  v16i8_r = __builtin_msa_cle_s_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.cle.s.b(
+  v8i16_r = __builtin_msa_cle_s_h(v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.cle.s.h(
+  v4i32_r = __builtin_msa_cle_s_w(v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.cle.s.w(
+  v2i64_r = __builtin_msa_cle_s_d(v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.cle.s.d(
+
+  v16u8_r = __builtin_msa_cle_u_b(v16u8_a, v16u8_b); // CHECK: call <16 x i8>  @llvm.mips.cle.u.b(
+  v8u16_r = __builtin_msa_cle_u_h(v8u16_a, v8u16_b); // CHECK: call <8  x i16> @llvm.mips.cle.u.h(
+  v4u32_r = __builtin_msa_cle_u_w(v4u32_a, v4u32_b); // CHECK: call <4  x i32> @llvm.mips.cle.u.w(
+  v2u64_r = __builtin_msa_cle_u_d(v2u64_a, v2u64_b); // CHECK: call <2  x i64> @llvm.mips.cle.u.d(
+
+  v16i8_r = __builtin_msa_clei_s_b(v16i8_a, 25); // CHECK: call <16 x i8>  @llvm.mips.clei.s.b(
+  v8i16_r = __builtin_msa_clei_s_h(v8i16_a, 25); // CHECK: call <8  x i16> @llvm.mips.clei.s.h(
+  v4i32_r = __builtin_msa_clei_s_w(v4i32_a, 25); // CHECK: call <4  x i32> @llvm.mips.clei.s.w(
+  v2i64_r = __builtin_msa_clei_s_d(v2i64_a, 25); // CHECK: call <2  x i64> @llvm.mips.clei.s.d(
+
+  v16u8_r = __builtin_msa_clei_u_b(v16u8_a, 25); // CHECK: call <16 x i8>  @llvm.mips.clei.u.b(
+  v8u16_r = __builtin_msa_clei_u_h(v8u16_a, 25); // CHECK: call <8  x i16> @llvm.mips.clei.u.h(
+  v4u32_r = __builtin_msa_clei_u_w(v4u32_a, 25); // CHECK: call <4  x i32> @llvm.mips.clei.u.w(
+  v2u64_r = __builtin_msa_clei_u_d(v2u64_a, 25); // CHECK: call <2  x i64> @llvm.mips.clei.u.d(
+
+  v16i8_r = __builtin_msa_clt_s_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.clt.s.b(
+  v8i16_r = __builtin_msa_clt_s_h(v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.clt.s.h(
+  v4i32_r = __builtin_msa_clt_s_w(v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.clt.s.w(
+  v2i64_r = __builtin_msa_clt_s_d(v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.clt.s.d(
+
+  v16u8_r = __builtin_msa_clt_u_b(v16u8_a, v16u8_b); // CHECK: call <16 x i8>  @llvm.mips.clt.u.b(
+  v8u16_r = __builtin_msa_clt_u_h(v8u16_a, v8u16_b); // CHECK: call <8  x i16> @llvm.mips.clt.u.h(
+  v4u32_r = __builtin_msa_clt_u_w(v4u32_a, v4u32_b); // CHECK: call <4  x i32> @llvm.mips.clt.u.w(
+  v2u64_r = __builtin_msa_clt_u_d(v2u64_a, v2u64_b); // CHECK: call <2  x i64> @llvm.mips.clt.u.d(
+
+  v16i8_r = __builtin_msa_clti_s_b(v16i8_a, 25); // CHECK: call <16 x i8>  @llvm.mips.clti.s.b(
+  v8i16_r = __builtin_msa_clti_s_h(v8i16_a, 25); // CHECK: call <8  x i16> @llvm.mips.clti.s.h(
+  v4i32_r = __builtin_msa_clti_s_w(v4i32_a, 25); // CHECK: call <4  x i32> @llvm.mips.clti.s.w(
+  v2i64_r = __builtin_msa_clti_s_d(v2i64_a, 25); // CHECK: call <2  x i64> @llvm.mips.clti.s.d(
+
+  v16u8_r = __builtin_msa_clti_u_b(v16u8_a, 25); // CHECK: call <16 x i8>  @llvm.mips.clti.u.b(
+  v8u16_r = __builtin_msa_clti_u_h(v8u16_a, 25); // CHECK: call <8  x i16> @llvm.mips.clti.u.h(
+  v4u32_r = __builtin_msa_clti_u_w(v4u32_a, 25); // CHECK: call <4  x i32> @llvm.mips.clti.u.w(
+  v2u64_r = __builtin_msa_clti_u_d(v2u64_a, 25); // CHECK: call <2  x i64> @llvm.mips.clti.u.d(
+
+  int_r = __builtin_msa_copy_s_b(v16i8_a, 1); // CHECK: call i32 @llvm.mips.copy.s.b(
+  int_r = __builtin_msa_copy_s_h(v8i16_a, 1); // CHECK: call i32 @llvm.mips.copy.s.h(
+  int_r = __builtin_msa_copy_s_w(v4i32_a, 1); // CHECK: call i32 @llvm.mips.copy.s.w(
+  ll_r  = __builtin_msa_copy_s_d(v2i64_a, 1); // CHECK: call i64 @llvm.mips.copy.s.d(
+
+  int_r = __builtin_msa_copy_u_b(v16u8_a, 1); // CHECK: call i32 @llvm.mips.copy.u.b(
+  int_r = __builtin_msa_copy_u_h(v8u16_a, 1); // CHECK: call i32 @llvm.mips.copy.u.h(
+  int_r = __builtin_msa_copy_u_w(v4u32_a, 1); // CHECK: call i32 @llvm.mips.copy.u.w(
+  ll_r  = __builtin_msa_copy_u_d(v2i64_a, 1); // CHECK: call i64 @llvm.mips.copy.u.d(
+
+  __builtin_msa_ctcmsa(1, int_a); // CHECK: call void @llvm.mips.ctcmsa(
+
+  v16i8_r = __builtin_msa_div_s_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.div.s.b(
+  v8i16_r = __builtin_msa_div_s_h(v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.div.s.h(
+  v4i32_r = __builtin_msa_div_s_w(v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.div.s.w(
+  v2i64_r = __builtin_msa_div_s_d(v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.div.s.d(
+
+  v16u8_r = __builtin_msa_div_u_b(v16u8_a, v16u8_b); // CHECK: call <16 x i8>  @llvm.mips.div.u.b(
+  v8u16_r = __builtin_msa_div_u_h(v8u16_a, v8u16_b); // CHECK: call <8  x i16> @llvm.mips.div.u.h(
+  v4u32_r = __builtin_msa_div_u_w(v4u32_a, v4u32_b); // CHECK: call <4  x i32> @llvm.mips.div.u.w(
+  v2u64_r = __builtin_msa_div_u_d(v2u64_a, v2u64_b); // CHECK: call <2  x i64> @llvm.mips.div.u.d(
+
+  v8i16_r = __builtin_msa_dotp_s_h(v16i8_a, v16i8_b); // CHECK: call <8  x i16> @llvm.mips.dotp.s.h(
+  v4i32_r = __builtin_msa_dotp_s_w(v8i16_a, v8i16_b); // CHECK: call <4  x i32> @llvm.mips.dotp.s.w(
+  v2i64_r = __builtin_msa_dotp_s_d(v4i32_a, v4i32_b); // CHECK: call <2  x i64> @llvm.mips.dotp.s.d(
+
+  v8u16_r = __builtin_msa_dotp_u_h(v16u8_a, v16u8_b); // CHECK: call <8  x i16> @llvm.mips.dotp.u.h(
+  v4u32_r = __builtin_msa_dotp_u_w(v8u16_a, v8u16_b); // CHECK: call <4  x i32> @llvm.mips.dotp.u.w(
+  v2u64_r = __builtin_msa_dotp_u_d(v4u32_a, v4u32_b); // CHECK: call <2  x i64> @llvm.mips.dotp.u.d(
+
+  v8i16_r = __builtin_msa_dpadd_s_h(v8i16_r, v16i8_a, v16i8_b); // CHECK: call <8  x i16> @llvm.mips.dpadd.s.h(
+  v4i32_r = __builtin_msa_dpadd_s_w(v4i32_r, v8i16_a, v8i16_b); // CHECK: call <4  x i32> @llvm.mips.dpadd.s.w(
+  v2i64_r = __builtin_msa_dpadd_s_d(v2i64_r, v4i32_a, v4i32_b); // CHECK: call <2  x i64> @llvm.mips.dpadd.s.d(
+
+  v8u16_r = __builtin_msa_dpadd_u_h(v8u16_r, v16u8_a, v16u8_b); // CHECK: call <8  x i16> @llvm.mips.dpadd.u.h(
+  v4u32_r = __builtin_msa_dpadd_u_w(v4u32_r, v8u16_a, v8u16_b); // CHECK: call <4  x i32> @llvm.mips.dpadd.u.w(
+  v2u64_r = __builtin_msa_dpadd_u_d(v2u64_r, v4u32_a, v4u32_b); // CHECK: call <2  x i64> @llvm.mips.dpadd.u.d(
+
+  v8i16_r = __builtin_msa_dpsub_s_h(v8i16_r, v16i8_a, v16i8_b); // CHECK: call <8  x i16> @llvm.mips.dpsub.s.h(
+  v4i32_r = __builtin_msa_dpsub_s_w(v4i32_r, v8i16_a, v8i16_b); // CHECK: call <4  x i32> @llvm.mips.dpsub.s.w(
+  v2i64_r = __builtin_msa_dpsub_s_d(v2i64_r, v4i32_a, v4i32_b); // CHECK: call <2  x i64> @llvm.mips.dpsub.s.d(
+
+  v8u16_r = __builtin_msa_dpsub_u_h(v8u16_r, v16u8_a, v16u8_b); // CHECK: call <8  x i16> @llvm.mips.dpsub.u.h(
+  v4u32_r = __builtin_msa_dpsub_u_w(v4u32_r, v8u16_a, v8u16_b); // CHECK: call <4  x i32> @llvm.mips.dpsub.u.w(
+  v2u64_r = __builtin_msa_dpsub_u_d(v2u64_r, v4u32_a, v4u32_b); // CHECK: call <2  x i64> @llvm.mips.dpsub.u.d(
+
+  v4f32_r = __builtin_msa_fadd_w(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.mips.fadd.w(
+  v2f64_r = __builtin_msa_fadd_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.mips.fadd.d(
+
+  v4i32_r = __builtin_msa_fcaf_w(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.mips.fcaf.w(
+  v2i64_r = __builtin_msa_fcaf_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.mips.fcaf.d(
+
+  v4i32_r = __builtin_msa_fceq_w(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.mips.fceq.w(
+  v2i64_r = __builtin_msa_fceq_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.mips.fceq.d(
+
+  v4i32_r = __builtin_msa_fclass_w(v4f32_a); // CHECK: call <4 x i32> @llvm.mips.fclass.w(
+  v2i64_r = __builtin_msa_fclass_d(v2f64_a); // CHECK: call <2 x i64> @llvm.mips.fclass.d(
+
+  v4i32_r = __builtin_msa_fcle_w(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.mips.fcle.w(
+  v2i64_r = __builtin_msa_fcle_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.mips.fcle.d(
+
+  v4i32_r = __builtin_msa_fclt_w(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.mips.fclt.w(
+  v2i64_r = __builtin_msa_fclt_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.mips.fclt.d(
+
+  v4i32_r = __builtin_msa_fcne_w(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.mips.fcne.w(
+  v2i64_r = __builtin_msa_fcne_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.mips.fcne.d(
+
+  v4i32_r = __builtin_msa_fcor_w(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.mips.fcor.w(
+  v2i64_r = __builtin_msa_fcor_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.mips.fcor.d(
+
+  v4i32_r = __builtin_msa_fcueq_w(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.mips.fcueq.w(
+  v2i64_r = __builtin_msa_fcueq_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.mips.fcueq.d(
+
+  v4i32_r = __builtin_msa_fcule_w(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.mips.fcule.w(
+  v2i64_r = __builtin_msa_fcule_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.mips.fcule.d(
+
+  v4i32_r = __builtin_msa_fcult_w(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.mips.fcult.w(
+  v2i64_r = __builtin_msa_fcult_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.mips.fcult.d(
+
+  v4i32_r = __builtin_msa_fcun_w(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.mips.fcun.w(
+  v2i64_r = __builtin_msa_fcun_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.mips.fcun.d(
+
+  v4i32_r = __builtin_msa_fcune_w(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.mips.fcune.w(
+  v2i64_r = __builtin_msa_fcune_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.mips.fcune.d(
+
+  v4f32_r = __builtin_msa_fdiv_w(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.mips.fdiv.w(
+  v2f64_r = __builtin_msa_fdiv_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.mips.fdiv.d(
+
+  v8f16_r = __builtin_msa_fexdo_h(v4f32_a, v4f32_b); // CHECK: call <8 x half> @llvm.mips.fexdo.h(
+  v4f32_r = __builtin_msa_fexdo_w(v2f64_a, v2f64_b); // CHECK: call <4 x float> @llvm.mips.fexdo.w(
+
+  v4f32_r = __builtin_msa_fexp2_w(v4f32_a, v4i32_b); // CHECK: call <4 x float> @llvm.mips.fexp2.w(
+  v2f64_r = __builtin_msa_fexp2_d(v2f64_a, v2i64_b); // CHECK: call <2 x double> @llvm.mips.fexp2.d(
+
+  v4f32_r = __builtin_msa_fexupl_w(v8f16_a); // CHECK: call <4 x float> @llvm.mips.fexupl.w(
+  v2f64_r = __builtin_msa_fexupl_d(v4f32_a); // CHECK: call <2 x double> @llvm.mips.fexupl.d(
+
+  v4f32_r = __builtin_msa_fexupr_w(v8f16_a); // CHECK: call <4 x float> @llvm.mips.fexupr.w(
+  v2f64_r = __builtin_msa_fexupr_d(v4f32_a); // CHECK: call <2 x double> @llvm.mips.fexupr.d(
+
+  v4f32_r = __builtin_msa_ffint_s_w(v4i32_a); // CHECK: call <4 x float> @llvm.mips.ffint.s.w(
+  v2f64_r = __builtin_msa_ffint_s_d(v2i64_a); // CHECK: call <2 x double> @llvm.mips.ffint.s.d(
+
+  v4f32_r = __builtin_msa_ffint_u_w(v4i32_a); // CHECK: call <4 x float> @llvm.mips.ffint.u.w(
+  v2f64_r = __builtin_msa_ffint_u_d(v2i64_a); // CHECK: call <2 x double> @llvm.mips.ffint.u.d(
+
+  v4f32_r = __builtin_msa_ffql_w(v8i16_a); // CHECK: call <4 x float> @llvm.mips.ffql.w(
+  v2f64_r = __builtin_msa_ffql_d(v4i32_a); // CHECK: call <2 x double> @llvm.mips.ffql.d(
+
+  v4f32_r = __builtin_msa_ffqr_w(v8i16_a); // CHECK: call <4 x float> @llvm.mips.ffqr.w(
+  v2f64_r = __builtin_msa_ffqr_d(v4i32_a); // CHECK: call <2 x double> @llvm.mips.ffqr.d(
+
+  v16i8_r = __builtin_msa_fill_b(3); // CHECK: call <16 x i8>  @llvm.mips.fill.b(
+  v8i16_r = __builtin_msa_fill_h(3); // CHECK: call <8  x i16> @llvm.mips.fill.h(
+  v4i32_r = __builtin_msa_fill_w(3); // CHECK: call <4  x i32> @llvm.mips.fill.w(
+  v2i64_r = __builtin_msa_fill_d(3); // CHECK: call <2  x i64> @llvm.mips.fill.d(
+
+  v4f32_r = __builtin_msa_flog2_w(v8f16_a); // CHECK: call <4 x float>  @llvm.mips.flog2.w(
+  v2f64_r = __builtin_msa_flog2_d(v4f32_a); // CHECK: call <2 x double> @llvm.mips.flog2.d(
+
+  v4f32_r = __builtin_msa_fmadd_w(v8f16_r, v8f16_a, v8f16_b); // CHECK: call <4 x float>  @llvm.mips.fmadd.w(
+  v2f64_r = __builtin_msa_fmadd_d(v4f32_r, v4f32_a, v4f32_b); // CHECK: call <2 x double> @llvm.mips.fmadd.d(
+
+  v4f32_r = __builtin_msa_fmax_w(v4f32_a, v4f32_b); // CHECK: call <4 x float>  @llvm.mips.fmax.w(
+  v2f64_r = __builtin_msa_fmax_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.mips.fmax.d(
+
+  v4f32_r = __builtin_msa_fmax_a_w(v4f32_a, v4f32_b); // CHECK: call <4 x float>  @llvm.mips.fmax.a.w(
+  v2f64_r = __builtin_msa_fmax_a_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.mips.fmax.a.d(
+
+  v4f32_r = __builtin_msa_fmin_w(v4f32_a, v4f32_b); // CHECK: call <4 x float>  @llvm.mips.fmin.w(
+  v2f64_r = __builtin_msa_fmin_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.mips.fmin.d(
+
+  v4f32_r = __builtin_msa_fmin_a_w(v4f32_a, v4f32_b); // CHECK: call <4 x float>  @llvm.mips.fmin.a.w(
+  v2f64_r = __builtin_msa_fmin_a_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.mips.fmin.a.d(
+
+  v4f32_r = __builtin_msa_fmsub_w(v8f16_r, v8f16_a, v8f16_b); // CHECK: call <4 x float>  @llvm.mips.fmsub.w(
+  v2f64_r = __builtin_msa_fmsub_d(v4f32_r, v4f32_a, v4f32_b); // CHECK: call <2 x double> @llvm.mips.fmsub.d(
+
+  v4f32_r = __builtin_msa_fmul_w(v4f32_a, v4f32_b); // CHECK: call <4 x float>  @llvm.mips.fmul.w(
+  v2f64_r = __builtin_msa_fmul_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.mips.fmul.d(
+
+  v4f32_r = __builtin_msa_frint_w(v8f16_a); // CHECK: call <4 x float>  @llvm.mips.frint.w(
+  v2f64_r = __builtin_msa_frint_d(v4f32_a); // CHECK: call <2 x double> @llvm.mips.frint.d(
+
+  v4f32_r = __builtin_msa_frcp_w(v8f16_a); // CHECK: call <4 x float>  @llvm.mips.frcp.w(
+  v2f64_r = __builtin_msa_frcp_d(v4f32_a); // CHECK: call <2 x double> @llvm.mips.frcp.d(
+
+  v4f32_r = __builtin_msa_frsqrt_w(v8f16_a); // CHECK: call <4 x float>  @llvm.mips.frsqrt.w(
+  v2f64_r = __builtin_msa_frsqrt_d(v4f32_a); // CHECK: call <2 x double> @llvm.mips.frsqrt.d(
+
+  v4i32_r = __builtin_msa_fseq_w(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.mips.fseq.w(
+  v2i64_r = __builtin_msa_fseq_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.mips.fseq.d(
+
+  v4i32_r = __builtin_msa_fsaf_w(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.mips.fsaf.w(
+  v2i64_r = __builtin_msa_fsaf_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.mips.fsaf.d(
+
+  v4i32_r = __builtin_msa_fsle_w(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.mips.fsle.w(
+  v2i64_r = __builtin_msa_fsle_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.mips.fsle.d(
+
+  v4i32_r = __builtin_msa_fslt_w(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.mips.fslt.w(
+  v2i64_r = __builtin_msa_fslt_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.mips.fslt.d(
+
+  v4i32_r = __builtin_msa_fsne_w(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.mips.fsne.w(
+  v2i64_r = __builtin_msa_fsne_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.mips.fsne.d(
+
+  v4i32_r = __builtin_msa_fsor_w(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.mips.fsor.w(
+  v2i64_r = __builtin_msa_fsor_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.mips.fsor.d(
+
+  v4f32_r = __builtin_msa_fsqrt_w(v8f16_a); // CHECK: call <4 x float>  @llvm.mips.fsqrt.w(
+  v2f64_r = __builtin_msa_fsqrt_d(v4f32_a); // CHECK: call <2 x double> @llvm.mips.fsqrt.d(
+
+  v4f32_r = __builtin_msa_fsub_w(v4f32_a, v4f32_b); // CHECK: call <4 x float>  @llvm.mips.fsub.w(
+  v2f64_r = __builtin_msa_fsub_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.mips.fsub.d(
+
+  v4i32_r = __builtin_msa_fsueq_w(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.mips.fsueq.w(
+  v2i64_r = __builtin_msa_fsueq_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.mips.fsueq.d(
+
+  v4i32_r = __builtin_msa_fsule_w(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.mips.fsule.w(
+  v2i64_r = __builtin_msa_fsule_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.mips.fsule.d(
+
+  v4i32_r = __builtin_msa_fsult_w(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.mips.fsult.w(
+  v2i64_r = __builtin_msa_fsult_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.mips.fsult.d(
+
+  v4i32_r = __builtin_msa_fsun_w(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.mips.fsun.w(
+  v2i64_r = __builtin_msa_fsun_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.mips.fsun.d(
+
+  v4i32_r = __builtin_msa_fsune_w(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.mips.fsune.w(
+  v2i64_r = __builtin_msa_fsune_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.mips.fsune.d(
+
+  v4i32_r = __builtin_msa_ftint_s_w(v4f32_a); // CHECK: call <4 x i32> @llvm.mips.ftint.s.w(
+  v2i64_r = __builtin_msa_ftint_s_d(v2f64_a); // CHECK: call <2 x i64> @llvm.mips.ftint.s.d(
+
+  v4i32_r = __builtin_msa_ftint_u_w(v4f32_a); // CHECK: call <4 x i32> @llvm.mips.ftint.u.w(
+  v2i64_r = __builtin_msa_ftint_u_d(v2f64_a); // CHECK: call <2 x i64> @llvm.mips.ftint.u.d(
+
+  v8i16_r = __builtin_msa_ftq_h(v4f32_a, v4f32_b); // CHECK: call <8 x i16> @llvm.mips.ftq.h(
+  v4i32_r = __builtin_msa_ftq_w(v2f64_a, v2f64_b); // CHECK: call <4 x i32> @llvm.mips.ftq.w(
+
+  v4i32_r = __builtin_msa_ftrunc_s_w(v4f32_a); // CHECK: call <4 x i32> @llvm.mips.ftrunc.s.w(
+  v2i64_r = __builtin_msa_ftrunc_s_d(v2f64_a); // CHECK: call <2 x i64> @llvm.mips.ftrunc.s.d(
+
+  v4i32_r = __builtin_msa_ftrunc_u_w(v4f32_a); // CHECK: call <4 x i32> @llvm.mips.ftrunc.u.w(
+  v2i64_r = __builtin_msa_ftrunc_u_d(v2f64_a); // CHECK: call <2 x i64> @llvm.mips.ftrunc.u.d(
+
+  v8i16_r = __builtin_msa_hadd_s_h(v16i8_a, v16i8_b); // CHECK: call <8  x i16> @llvm.mips.hadd.s.h(
+  v4i32_r = __builtin_msa_hadd_s_w(v8i16_a, v8i16_b); // CHECK: call <4  x i32> @llvm.mips.hadd.s.w(
+  v2i64_r = __builtin_msa_hadd_s_d(v4i32_a, v4i32_b); // CHECK: call <2  x i64> @llvm.mips.hadd.s.d(
+
+  v8u16_r = __builtin_msa_hadd_u_h(v16u8_a, v16u8_b); // CHECK: call <8  x i16> @llvm.mips.hadd.u.h(
+  v4u32_r = __builtin_msa_hadd_u_w(v8u16_a, v8u16_b); // CHECK: call <4  x i32> @llvm.mips.hadd.u.w(
+  v2u64_r = __builtin_msa_hadd_u_d(v4u32_a, v4u32_b); // CHECK: call <2  x i64> @llvm.mips.hadd.u.d(
+
+  v8i16_r = __builtin_msa_hsub_s_h(v16i8_a, v16i8_b); // CHECK: call <8  x i16> @llvm.mips.hsub.s.h(
+  v4i32_r = __builtin_msa_hsub_s_w(v8i16_a, v8i16_b); // CHECK: call <4  x i32> @llvm.mips.hsub.s.w(
+  v2i64_r = __builtin_msa_hsub_s_d(v4i32_a, v4i32_b); // CHECK: call <2  x i64> @llvm.mips.hsub.s.d(
+
+  v8u16_r = __builtin_msa_hsub_u_h(v16u8_a, v16u8_b); // CHECK: call <8  x i16> @llvm.mips.hsub.u.h(
+  v4u32_r = __builtin_msa_hsub_u_w(v8u16_a, v8u16_b); // CHECK: call <4  x i32> @llvm.mips.hsub.u.w(
+  v2u64_r = __builtin_msa_hsub_u_d(v4u32_a, v4u32_b); // CHECK: call <2  x i64> @llvm.mips.hsub.u.d(
+
+  v16i8_r = __builtin_msa_ilvev_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.ilvev.b(
+  v8i16_r = __builtin_msa_ilvev_h(v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.ilvev.h(
+  v4i32_r = __builtin_msa_ilvev_w(v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.ilvev.w(
+  v2i64_r = __builtin_msa_ilvev_d(v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.ilvev.d(
+
+  v16i8_r = __builtin_msa_ilvl_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.ilvl.b(
+  v8i16_r = __builtin_msa_ilvl_h(v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.ilvl.h(
+  v4i32_r = __builtin_msa_ilvl_w(v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.ilvl.w(
+  v2i64_r = __builtin_msa_ilvl_d(v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.ilvl.d(
+
+  v16i8_r = __builtin_msa_ilvod_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.ilvod.b(
+  v8i16_r = __builtin_msa_ilvod_h(v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.ilvod.h(
+  v4i32_r = __builtin_msa_ilvod_w(v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.ilvod.w(
+  v2i64_r = __builtin_msa_ilvod_d(v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.ilvod.d(
+
+  v16i8_r = __builtin_msa_ilvr_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.ilvr.b(
+  v8i16_r = __builtin_msa_ilvr_h(v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.ilvr.h(
+  v4i32_r = __builtin_msa_ilvr_w(v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.ilvr.w(
+  v2i64_r = __builtin_msa_ilvr_d(v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.ilvr.d(
+
+  v16i8_r = __builtin_msa_insert_b(v16i8_r, 1, 25); // CHECK: call <16 x i8>  @llvm.mips.insert.b(
+  v8i16_r = __builtin_msa_insert_h(v8i16_r, 1, 25); // CHECK: call <8  x i16> @llvm.mips.insert.h(
+  v4i32_r = __builtin_msa_insert_w(v4i32_r, 1, 25); // CHECK: call <4  x i32> @llvm.mips.insert.w(
+  v2i64_r = __builtin_msa_insert_d(v2i64_r, 1, 25); // CHECK: call <2  x i64> @llvm.mips.insert.d(
+
+  v16i8_r = __builtin_msa_insve_b(v16i8_r, 1, v16i8_a); // CHECK: call <16 x i8>  @llvm.mips.insve.b(
+  v8i16_r = __builtin_msa_insve_h(v8i16_r, 1, v8i16_a); // CHECK: call <8  x i16> @llvm.mips.insve.h(
+  v4i32_r = __builtin_msa_insve_w(v4i32_r, 1, v4i32_a); // CHECK: call <4  x i32> @llvm.mips.insve.w(
+  v2i64_r = __builtin_msa_insve_d(v2i64_r, 1, v2i64_a); // CHECK: call <2  x i64> @llvm.mips.insve.d(
+
+  v16i8_r = __builtin_msa_ld_b(&v16i8_a, 1); // CHECK: call <16 x i8>  @llvm.mips.ld.b(
+  v8i16_r = __builtin_msa_ld_h(&v8i16_a, 2); // CHECK: call <8  x i16> @llvm.mips.ld.h(
+  v4i32_r = __builtin_msa_ld_w(&v4i32_a, 4); // CHECK: call <4  x i32> @llvm.mips.ld.w(
+  v2i64_r = __builtin_msa_ld_d(&v2i64_a, 8); // CHECK: call <2  x i64> @llvm.mips.ld.d(
+
+  v16i8_r = __builtin_msa_ldi_b(3); // CHECK: call <16 x i8>  @llvm.mips.ldi.b(
+  v8i16_r = __builtin_msa_ldi_h(3); // CHECK: call <8  x i16> @llvm.mips.ldi.h(
+  v4i32_r = __builtin_msa_ldi_w(3); // CHECK: call <4  x i32> @llvm.mips.ldi.w(
+  v2i64_r = __builtin_msa_ldi_d(3); // CHECK: call <2  x i64> @llvm.mips.ldi.d(
+
+  v8i16_r = __builtin_msa_madd_q_h(v8i16_r, v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.madd.q.h(
+  v4i32_r = __builtin_msa_madd_q_w(v4i32_r, v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.madd.q.w(
+
+  v8i16_r = __builtin_msa_maddr_q_h(v8i16_r, v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.maddr.q.h(
+  v4i32_r = __builtin_msa_maddr_q_w(v4i32_r, v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.maddr.q.w(
+
+  v16i8_r = __builtin_msa_maddv_b(v16i8_r, v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.maddv.b(
+  v8i16_r = __builtin_msa_maddv_h(v8i16_r, v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.maddv.h(
+  v4i32_r = __builtin_msa_maddv_w(v4i32_r, v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.maddv.w(
+  v2i64_r = __builtin_msa_maddv_d(v2i64_r, v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.maddv.d(
+
+  v16i8_r = __builtin_msa_max_a_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.max.a.b(
+  v8i16_r = __builtin_msa_max_a_h(v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.max.a.h(
+  v4i32_r = __builtin_msa_max_a_w(v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.max.a.w(
+  v2i64_r = __builtin_msa_max_a_d(v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.max.a.d(
+
+  v16i8_r = __builtin_msa_max_s_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.max.s.b(
+  v8i16_r = __builtin_msa_max_s_h(v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.max.s.h(
+  v4i32_r = __builtin_msa_max_s_w(v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.max.s.w(
+  v2i64_r = __builtin_msa_max_s_d(v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.max.s.d(
+
+  v16u8_r = __builtin_msa_max_u_b(v16u8_a, v16u8_b); // CHECK: call <16 x i8>  @llvm.mips.max.u.b(
+  v8u16_r = __builtin_msa_max_u_h(v8u16_a, v8u16_b); // CHECK: call <8  x i16> @llvm.mips.max.u.h(
+  v4u32_r = __builtin_msa_max_u_w(v4u32_a, v4u32_b); // CHECK: call <4  x i32> @llvm.mips.max.u.w(
+  v2u64_r = __builtin_msa_max_u_d(v2u64_a, v2u64_b); // CHECK: call <2  x i64> @llvm.mips.max.u.d(
+
+  v16i8_r = __builtin_msa_maxi_s_b(v16i8_a, 2); // CHECK: call <16 x i8>  @llvm.mips.maxi.s.b(
+  v8i16_r = __builtin_msa_maxi_s_h(v8i16_a, 2); // CHECK: call <8  x i16> @llvm.mips.maxi.s.h(
+  v4i32_r = __builtin_msa_maxi_s_w(v4i32_a, 2); // CHECK: call <4  x i32> @llvm.mips.maxi.s.w(
+  v2i64_r = __builtin_msa_maxi_s_d(v2i64_a, 2); // CHECK: call <2  x i64> @llvm.mips.maxi.s.d(
+
+  v16u8_r = __builtin_msa_maxi_u_b(v16u8_a, 2); // CHECK: call <16 x i8>  @llvm.mips.maxi.u.b(
+  v8u16_r = __builtin_msa_maxi_u_h(v8u16_a, 2); // CHECK: call <8  x i16> @llvm.mips.maxi.u.h(
+  v4u32_r = __builtin_msa_maxi_u_w(v4u32_a, 2); // CHECK: call <4  x i32> @llvm.mips.maxi.u.w(
+  v2u64_r = __builtin_msa_maxi_u_d(v2u64_a, 2); // CHECK: call <2  x i64> @llvm.mips.maxi.u.d(
+
+  v16i8_r = __builtin_msa_min_a_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.min.a.b(
+  v8i16_r = __builtin_msa_min_a_h(v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.min.a.h(
+  v4i32_r = __builtin_msa_min_a_w(v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.min.a.w(
+  v2i64_r = __builtin_msa_min_a_d(v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.min.a.d(
+
+  v16i8_r = __builtin_msa_min_s_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.min.s.b(
+  v8i16_r = __builtin_msa_min_s_h(v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.min.s.h(
+  v4i32_r = __builtin_msa_min_s_w(v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.min.s.w(
+  v2i64_r = __builtin_msa_min_s_d(v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.min.s.d(
+
+  v16u8_r = __builtin_msa_min_u_b(v16u8_a, v16u8_b); // CHECK: call <16 x i8>  @llvm.mips.min.u.b(
+  v8u16_r = __builtin_msa_min_u_h(v8u16_a, v8u16_b); // CHECK: call <8  x i16> @llvm.mips.min.u.h(
+  v4u32_r = __builtin_msa_min_u_w(v4u32_a, v4u32_b); // CHECK: call <4  x i32> @llvm.mips.min.u.w(
+  v2u64_r = __builtin_msa_min_u_d(v2u64_a, v2u64_b); // CHECK: call <2  x i64> @llvm.mips.min.u.d(
+
+  v16i8_r = __builtin_msa_mini_s_b(v16i8_a, 2); // CHECK: call <16 x i8>  @llvm.mips.mini.s.b(
+  v8i16_r = __builtin_msa_mini_s_h(v8i16_a, 2); // CHECK: call <8  x i16> @llvm.mips.mini.s.h(
+  v4i32_r = __builtin_msa_mini_s_w(v4i32_a, 2); // CHECK: call <4  x i32> @llvm.mips.mini.s.w(
+  v2i64_r = __builtin_msa_mini_s_d(v2i64_a, 2); // CHECK: call <2  x i64> @llvm.mips.mini.s.d(
+
+  v16u8_r = __builtin_msa_mini_u_b(v16u8_a, 2); // CHECK: call <16 x i8>  @llvm.mips.mini.u.b(
+  v8u16_r = __builtin_msa_mini_u_h(v8u16_a, 2); // CHECK: call <8  x i16> @llvm.mips.mini.u.h(
+  v4u32_r = __builtin_msa_mini_u_w(v4u32_a, 2); // CHECK: call <4  x i32> @llvm.mips.mini.u.w(
+  v2u64_r = __builtin_msa_mini_u_d(v2u64_a, 2); // CHECK: call <2  x i64> @llvm.mips.mini.u.d(
+
+  v16i8_r = __builtin_msa_mod_s_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.mod.s.b(
+  v8i16_r = __builtin_msa_mod_s_h(v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.mod.s.h(
+  v4i32_r = __builtin_msa_mod_s_w(v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.mod.s.w(
+  v2i64_r = __builtin_msa_mod_s_d(v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.mod.s.d(
+
+  v16u8_r = __builtin_msa_mod_u_b(v16u8_a, v16u8_b); // CHECK: call <16 x i8>  @llvm.mips.mod.u.b(
+  v8u16_r = __builtin_msa_mod_u_h(v8u16_a, v8u16_b); // CHECK: call <8  x i16> @llvm.mips.mod.u.h(
+  v4u32_r = __builtin_msa_mod_u_w(v4u32_a, v4u32_b); // CHECK: call <4  x i32> @llvm.mips.mod.u.w(
+  v2u64_r = __builtin_msa_mod_u_d(v2u64_a, v2u64_b); // CHECK: call <2  x i64> @llvm.mips.mod.u.d(
+
+  v16i8_r = __builtin_msa_move_v(v16i8_a); // CHECK: call <16 x i8>  @llvm.mips.move.v(
+
+  v8i16_r = __builtin_msa_msub_q_h(v8i16_r, v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.msub.q.h(
+  v4i32_r = __builtin_msa_msub_q_w(v4i32_r, v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.msub.q.w(
+
+  v8i16_r = __builtin_msa_msubr_q_h(v8i16_r, v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.msubr.q.h(
+  v4i32_r = __builtin_msa_msubr_q_w(v4i32_r, v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.msubr.q.w(
+
+  v16i8_r = __builtin_msa_msubv_b(v16i8_r, v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.msubv.b(
+  v8i16_r = __builtin_msa_msubv_h(v8i16_r, v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.msubv.h(
+  v4i32_r = __builtin_msa_msubv_w(v4i32_r, v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.msubv.w(
+  v2i64_r = __builtin_msa_msubv_d(v2i64_r, v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.msubv.d(
+
+  v8i16_r = __builtin_msa_mul_q_h(v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.mul.q.h(
+  v4i32_r = __builtin_msa_mul_q_w(v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.mul.q.w(
+
+  v8i16_r = __builtin_msa_mulr_q_h(v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.mulr.q.h(
+  v4i32_r = __builtin_msa_mulr_q_w(v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.mulr.q.w(
+
+  v16i8_r = __builtin_msa_mulv_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.mulv.b(
+  v8i16_r = __builtin_msa_mulv_h(v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.mulv.h(
+  v4i32_r = __builtin_msa_mulv_w(v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.mulv.w(
+  v2i64_r = __builtin_msa_mulv_d(v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.mulv.d(
+
+  v16i8_r = __builtin_msa_nloc_b(v16i8_a); // CHECK: call <16 x i8>  @llvm.mips.nloc.b(
+  v8i16_r = __builtin_msa_nloc_h(v8i16_a); // CHECK: call <8  x i16> @llvm.mips.nloc.h(
+  v4i32_r = __builtin_msa_nloc_w(v4i32_a); // CHECK: call <4  x i32> @llvm.mips.nloc.w(
+  v2i64_r = __builtin_msa_nloc_d(v2i64_a); // CHECK: call <2  x i64> @llvm.mips.nloc.d(
+
+  v16i8_r = __builtin_msa_nlzc_b(v16i8_a); // CHECK: call <16 x i8>  @llvm.mips.nlzc.b(
+  v8i16_r = __builtin_msa_nlzc_h(v8i16_a); // CHECK: call <8  x i16> @llvm.mips.nlzc.h(
+  v4i32_r = __builtin_msa_nlzc_w(v4i32_a); // CHECK: call <4  x i32> @llvm.mips.nlzc.w(
+  v2i64_r = __builtin_msa_nlzc_d(v2i64_a); // CHECK: call <2  x i64> @llvm.mips.nlzc.d(
+
+  v16i8_r = __builtin_msa_nor_v(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.mips.nor.v(
+  v8i16_r = __builtin_msa_nor_v(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.mips.nor.v(
+  v4i32_r = __builtin_msa_nor_v(v4i32_a, v4i32_b); // CHECK: call <16 x i8> @llvm.mips.nor.v(
+  v2i64_r = __builtin_msa_nor_v(v2i64_a, v2i64_b); // CHECK: call <16 x i8> @llvm.mips.nor.v(
+
+  v16i8_r = __builtin_msa_nori_b(v16i8_a, 25); // CHECK: call <16 x i8> @llvm.mips.nori.b(
+  v8i16_r = __builtin_msa_nori_b(v8i16_a, 25); // CHECK: call <16 x i8> @llvm.mips.nori.b(
+  v4i32_r = __builtin_msa_nori_b(v4i32_a, 25); // CHECK: call <16 x i8> @llvm.mips.nori.b(
+  v2i64_r = __builtin_msa_nori_b(v2i64_a, 25); // CHECK: call <16 x i8> @llvm.mips.nori.b(
+
+  v16u8_r = __builtin_msa_nori_b(v16u8_a, 25); // CHECK: call <16 x i8> @llvm.mips.nori.b(
+  v8u16_r = __builtin_msa_nori_b(v8u16_a, 25); // CHECK: call <16 x i8> @llvm.mips.nori.b(
+  v4u32_r = __builtin_msa_nori_b(v4u32_a, 25); // CHECK: call <16 x i8> @llvm.mips.nori.b(
+  v2u64_r = __builtin_msa_nori_b(v2u64_a, 25); // CHECK: call <16 x i8> @llvm.mips.nori.b(
+
+  v16i8_r = __builtin_msa_or_v(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.mips.or.v(
+  v8i16_r = __builtin_msa_or_v(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.mips.or.v(
+  v4i32_r = __builtin_msa_or_v(v4i32_a, v4i32_b); // CHECK: call <16 x i8> @llvm.mips.or.v(
+  v2i64_r = __builtin_msa_or_v(v2i64_a, v2i64_b); // CHECK: call <16 x i8> @llvm.mips.or.v(
+
+  v16i8_r = __builtin_msa_ori_b(v16i8_a, 25); // CHECK: call <16 x i8> @llvm.mips.ori.b(
+  v8i16_r = __builtin_msa_ori_b(v8i16_a, 25); // CHECK: call <16 x i8> @llvm.mips.ori.b(
+  v4i32_r = __builtin_msa_ori_b(v4i32_a, 25); // CHECK: call <16 x i8> @llvm.mips.ori.b(
+  v2i64_r = __builtin_msa_ori_b(v2i64_a, 25); // CHECK: call <16 x i8> @llvm.mips.ori.b(
+
+  v16u8_r = __builtin_msa_ori_b(v16u8_a, 25); // CHECK: call <16 x i8> @llvm.mips.ori.b(
+  v8u16_r = __builtin_msa_ori_b(v8u16_a, 25); // CHECK: call <16 x i8> @llvm.mips.ori.b(
+  v4u32_r = __builtin_msa_ori_b(v4u32_a, 25); // CHECK: call <16 x i8> @llvm.mips.ori.b(
+  v2u64_r = __builtin_msa_ori_b(v2u64_a, 25); // CHECK: call <16 x i8> @llvm.mips.ori.b(
+
+  v16i8_r = __builtin_msa_pckev_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.pckev.b(
+  v8i16_r = __builtin_msa_pckev_h(v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.pckev.h(
+  v4i32_r = __builtin_msa_pckev_w(v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.pckev.w(
+  v2i64_r = __builtin_msa_pckev_d(v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.pckev.d(
+
+  v16i8_r = __builtin_msa_pckod_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.pckod.b(
+  v8i16_r = __builtin_msa_pckod_h(v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.pckod.h(
+  v4i32_r = __builtin_msa_pckod_w(v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.pckod.w(
+  v2i64_r = __builtin_msa_pckod_d(v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.pckod.d(
+
+  v16i8_r = __builtin_msa_pcnt_b(v16i8_a); // CHECK: call <16 x i8>  @llvm.mips.pcnt.b(
+  v8i16_r = __builtin_msa_pcnt_h(v8i16_a); // CHECK: call <8  x i16> @llvm.mips.pcnt.h(
+  v4i32_r = __builtin_msa_pcnt_w(v4i32_a); // CHECK: call <4  x i32> @llvm.mips.pcnt.w(
+  v2i64_r = __builtin_msa_pcnt_d(v2i64_a); // CHECK: call <2  x i64> @llvm.mips.pcnt.d(
+
+  v16i8_r = __builtin_msa_sat_s_b(v16i8_a, 3); // CHECK: call <16 x i8>  @llvm.mips.sat.s.b(
+  v8i16_r = __builtin_msa_sat_s_h(v8i16_a, 3); // CHECK: call <8  x i16> @llvm.mips.sat.s.h(
+  v4i32_r = __builtin_msa_sat_s_w(v4i32_a, 3); // CHECK: call <4  x i32> @llvm.mips.sat.s.w(
+  v2i64_r = __builtin_msa_sat_s_d(v2i64_a, 3); // CHECK: call <2  x i64> @llvm.mips.sat.s.d(
+
+  v16i8_r = __builtin_msa_sat_u_b(v16i8_a, 3); // CHECK: call <16 x i8>  @llvm.mips.sat.u.b(
+  v8i16_r = __builtin_msa_sat_u_h(v8i16_a, 3); // CHECK: call <8  x i16> @llvm.mips.sat.u.h(
+  v4i32_r = __builtin_msa_sat_u_w(v4i32_a, 3); // CHECK: call <4  x i32> @llvm.mips.sat.u.w(
+  v2i64_r = __builtin_msa_sat_u_d(v2i64_a, 3); // CHECK: call <2  x i64> @llvm.mips.sat.u.d(
+
+  v16i8_r = __builtin_msa_shf_b(v16i8_a, 3); // CHECK: call <16 x i8>  @llvm.mips.shf.b(
+  v8i16_r = __builtin_msa_shf_h(v8i16_a, 3); // CHECK: call <8  x i16> @llvm.mips.shf.h(
+  v4i32_r = __builtin_msa_shf_w(v4i32_a, 3); // CHECK: call <4  x i32> @llvm.mips.shf.w(
+
+  v16i8_r = __builtin_msa_sld_b(v16i8_a, 10); // CHECK: call <16 x i8>  @llvm.mips.sld.b(
+  v8i16_r = __builtin_msa_sld_h(v8i16_a, 10); // CHECK: call <8  x i16> @llvm.mips.sld.h(
+  v4i32_r = __builtin_msa_sld_w(v4i32_a, 10); // CHECK: call <4  x i32> @llvm.mips.sld.w(
+  v2i64_r = __builtin_msa_sld_d(v2i64_a, 10); // CHECK: call <2  x i64> @llvm.mips.sld.d(
+
+  v16i8_r = __builtin_msa_sldi_b(v16i8_a, 3); // CHECK: call <16 x i8>  @llvm.mips.sldi.b(
+  v8i16_r = __builtin_msa_sldi_h(v8i16_a, 3); // CHECK: call <8  x i16> @llvm.mips.sldi.h(
+  v4i32_r = __builtin_msa_sldi_w(v4i32_a, 3); // CHECK: call <4  x i32> @llvm.mips.sldi.w(
+  v2i64_r = __builtin_msa_sldi_d(v2i64_a, 3); // CHECK: call <2  x i64> @llvm.mips.sldi.d(
+
+  v16i8_r = __builtin_msa_sll_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.sll.b(
+  v8i16_r = __builtin_msa_sll_h(v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.sll.h(
+  v4i32_r = __builtin_msa_sll_w(v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.sll.w(
+  v2i64_r = __builtin_msa_sll_d(v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.sll.d(
+
+  v16i8_r = __builtin_msa_slli_b(v16i8_a, 3); // CHECK: call <16 x i8>  @llvm.mips.slli.b(
+  v8i16_r = __builtin_msa_slli_h(v8i16_a, 3); // CHECK: call <8  x i16> @llvm.mips.slli.h(
+  v4i32_r = __builtin_msa_slli_w(v4i32_a, 3); // CHECK: call <4  x i32> @llvm.mips.slli.w(
+  v2i64_r = __builtin_msa_slli_d(v2i64_a, 3); // CHECK: call <2  x i64> @llvm.mips.slli.d(
+
+  v16i8_r = __builtin_msa_splat_b(v16i8_a, 3); // CHECK: call <16 x i8>  @llvm.mips.splat.b(
+  v8i16_r = __builtin_msa_splat_h(v8i16_a, 3); // CHECK: call <8  x i16> @llvm.mips.splat.h(
+  v4i32_r = __builtin_msa_splat_w(v4i32_a, 3); // CHECK: call <4  x i32> @llvm.mips.splat.w(
+  v2i64_r = __builtin_msa_splat_d(v2i64_a, 3); // CHECK: call <2  x i64> @llvm.mips.splat.d(
+
+  v16i8_r = __builtin_msa_splati_b(v16i8_a, 3); // CHECK: call <16 x i8>  @llvm.mips.splati.b(
+  v8i16_r = __builtin_msa_splati_h(v8i16_a, 3); // CHECK: call <8  x i16> @llvm.mips.splati.h(
+  v4i32_r = __builtin_msa_splati_w(v4i32_a, 3); // CHECK: call <4  x i32> @llvm.mips.splati.w(
+  v2i64_r = __builtin_msa_splati_d(v2i64_a, 3); // CHECK: call <2  x i64> @llvm.mips.splati.d(
+
+  v16i8_r = __builtin_msa_sra_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.sra.b(
+  v8i16_r = __builtin_msa_sra_h(v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.sra.h(
+  v4i32_r = __builtin_msa_sra_w(v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.sra.w(
+  v2i64_r = __builtin_msa_sra_d(v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.sra.d(
+
+  v16i8_r = __builtin_msa_srai_b(v16i8_a, 3); // CHECK: call <16 x i8>  @llvm.mips.srai.b(
+  v8i16_r = __builtin_msa_srai_h(v8i16_a, 3); // CHECK: call <8  x i16> @llvm.mips.srai.h(
+  v4i32_r = __builtin_msa_srai_w(v4i32_a, 3); // CHECK: call <4  x i32> @llvm.mips.srai.w(
+  v2i64_r = __builtin_msa_srai_d(v2i64_a, 3); // CHECK: call <2  x i64> @llvm.mips.srai.d(
+
+  v16i8_r = __builtin_msa_srar_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.srar.b(
+  v8i16_r = __builtin_msa_srar_h(v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.srar.h(
+  v4i32_r = __builtin_msa_srar_w(v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.srar.w(
+  v2i64_r = __builtin_msa_srar_d(v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.srar.d(
+
+  v16i8_r = __builtin_msa_srari_b(v16i8_a, 3); // CHECK: call <16 x i8>  @llvm.mips.srari.b(
+  v8i16_r = __builtin_msa_srari_h(v8i16_a, 3); // CHECK: call <8  x i16> @llvm.mips.srari.h(
+  v4i32_r = __builtin_msa_srari_w(v4i32_a, 3); // CHECK: call <4  x i32> @llvm.mips.srari.w(
+  v2i64_r = __builtin_msa_srari_d(v2i64_a, 3); // CHECK: call <2  x i64> @llvm.mips.srari.d(
+
+  v16i8_r = __builtin_msa_srl_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.srl.b(
+  v8i16_r = __builtin_msa_srl_h(v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.srl.h(
+  v4i32_r = __builtin_msa_srl_w(v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.srl.w(
+  v2i64_r = __builtin_msa_srl_d(v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.srl.d(
+
+  v16i8_r = __builtin_msa_srli_b(v16i8_a, 3); // CHECK: call <16 x i8>  @llvm.mips.srli.b(
+  v8i16_r = __builtin_msa_srli_h(v8i16_a, 3); // CHECK: call <8  x i16> @llvm.mips.srli.h(
+  v4i32_r = __builtin_msa_srli_w(v4i32_a, 3); // CHECK: call <4  x i32> @llvm.mips.srli.w(
+  v2i64_r = __builtin_msa_srli_d(v2i64_a, 3); // CHECK: call <2  x i64> @llvm.mips.srli.d(
+
+  v16i8_r = __builtin_msa_srlr_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.srlr.b(
+  v8i16_r = __builtin_msa_srlr_h(v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.srlr.h(
+  v4i32_r = __builtin_msa_srlr_w(v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.srlr.w(
+  v2i64_r = __builtin_msa_srlr_d(v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.srlr.d(
+
+  v16i8_r = __builtin_msa_srlri_b(v16i8_a, 3); // CHECK: call <16 x i8>  @llvm.mips.srlri.b(
+  v8i16_r = __builtin_msa_srlri_h(v8i16_a, 3); // CHECK: call <8  x i16> @llvm.mips.srlri.h(
+  v4i32_r = __builtin_msa_srlri_w(v4i32_a, 3); // CHECK: call <4  x i32> @llvm.mips.srlri.w(
+  v2i64_r = __builtin_msa_srlri_d(v2i64_a, 3); // CHECK: call <2  x i64> @llvm.mips.srlri.d(
+
+  __builtin_msa_st_b(v16i8_b, &v16i8_a, 1); // CHECK: call void @llvm.mips.st.b(
+  __builtin_msa_st_h(v8i16_b, &v8i16_a, 2); // CHECK: call void @llvm.mips.st.h(
+  __builtin_msa_st_w(v4i32_b, &v4i32_a, 4); // CHECK: call void @llvm.mips.st.w(
+  __builtin_msa_st_d(v2i64_b, &v2i64_a, 8); // CHECK: call void @llvm.mips.st.d(
+
+  v16i8_r = __builtin_msa_subs_s_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.subs.s.b(
+  v8i16_r = __builtin_msa_subs_s_h(v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.subs.s.h(
+  v4i32_r = __builtin_msa_subs_s_w(v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.subs.s.w(
+  v2i64_r = __builtin_msa_subs_s_d(v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.subs.s.d(
+
+  v16u8_r = __builtin_msa_subs_u_b(v16u8_a, v16u8_b); // CHECK: call <16 x i8>  @llvm.mips.subs.u.b(
+  v8u16_r = __builtin_msa_subs_u_h(v8u16_a, v8u16_b); // CHECK: call <8  x i16> @llvm.mips.subs.u.h(
+  v4u32_r = __builtin_msa_subs_u_w(v4u32_a, v4u32_b); // CHECK: call <4  x i32> @llvm.mips.subs.u.w(
+  v2u64_r = __builtin_msa_subs_u_d(v2u64_a, v2u64_b); // CHECK: call <2  x i64> @llvm.mips.subs.u.d(
+
+  v16u8_r = __builtin_msa_subsus_u_b(v16u8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.subsus.u.b(
+  v8u16_r = __builtin_msa_subsus_u_h(v8u16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.subsus.u.h(
+  v4u32_r = __builtin_msa_subsus_u_w(v4u32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.subsus.u.w(
+  v2u64_r = __builtin_msa_subsus_u_d(v2u64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.subsus.u.d(
+
+  v16i8_r = __builtin_msa_subsuu_s_b(v16u8_a, v16u8_b); // CHECK: call <16 x i8>  @llvm.mips.subsuu.s.b(
+  v8i16_r = __builtin_msa_subsuu_s_h(v8u16_a, v8u16_b); // CHECK: call <8  x i16> @llvm.mips.subsuu.s.h(
+  v4i32_r = __builtin_msa_subsuu_s_w(v4u32_a, v4u32_b); // CHECK: call <4  x i32> @llvm.mips.subsuu.s.w(
+  v2i64_r = __builtin_msa_subsuu_s_d(v2u64_a, v2u64_b); // CHECK: call <2  x i64> @llvm.mips.subsuu.s.d(
+
+  v16i8_r = __builtin_msa_subv_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.subv.b(
+  v8i16_r = __builtin_msa_subv_h(v8i16_a, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.subv.h(
+  v4i32_r = __builtin_msa_subv_w(v4i32_a, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.subv.w(
+  v2i64_r = __builtin_msa_subv_d(v2i64_a, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.subv.d(
+
+  v16i8_r = __builtin_msa_subvi_b(v16i8_a, 25); // CHECK: call <16 x i8>  @llvm.mips.subvi.b(
+  v8i16_r = __builtin_msa_subvi_h(v8i16_a, 25); // CHECK: call <8  x i16> @llvm.mips.subvi.h(
+  v4i32_r = __builtin_msa_subvi_w(v4i32_a, 25); // CHECK: call <4  x i32> @llvm.mips.subvi.w(
+  v2i64_r = __builtin_msa_subvi_d(v2i64_a, 25); // CHECK: call <2  x i64> @llvm.mips.subvi.d(
+
+  v16i8_r = __builtin_msa_vshf_b(v16i8_a, v16i8_b, v16i8_b); // CHECK: call <16 x i8>  @llvm.mips.vshf.b(
+  v8i16_r = __builtin_msa_vshf_h(v8i16_a, v8i16_b, v8i16_b); // CHECK: call <8  x i16> @llvm.mips.vshf.h(
+  v4i32_r = __builtin_msa_vshf_w(v4i32_a, v4i32_b, v4i32_b); // CHECK: call <4  x i32> @llvm.mips.vshf.w(
+  v2i64_r = __builtin_msa_vshf_d(v2i64_a, v2i64_b, v2i64_b); // CHECK: call <2  x i64> @llvm.mips.vshf.d(
+
+  v16i8_r = __builtin_msa_xor_v(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.mips.xor.v(
+  v8i16_r = __builtin_msa_xor_v(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.mips.xor.v(
+  v4i32_r = __builtin_msa_xor_v(v4i32_a, v4i32_b); // CHECK: call <16 x i8> @llvm.mips.xor.v(
+  v2i64_r = __builtin_msa_xor_v(v2i64_a, v2i64_b); // CHECK: call <16 x i8> @llvm.mips.xor.v(
+
+  v16i8_r = __builtin_msa_xori_b(v16i8_a, 25); // CHECK: call <16 x i8> @llvm.mips.xori.b(
+  v8i16_r = __builtin_msa_xori_b(v8i16_a, 25); // CHECK: call <16 x i8> @llvm.mips.xori.b(
+  v4i32_r = __builtin_msa_xori_b(v4i32_a, 25); // CHECK: call <16 x i8> @llvm.mips.xori.b(
+  v2i64_r = __builtin_msa_xori_b(v2i64_a, 25); // CHECK: call <16 x i8> @llvm.mips.xori.b(
+
+  v16u8_r = __builtin_msa_xori_b(v16u8_a, 25); // CHECK: call <16 x i8> @llvm.mips.xori.b(
+  v8u16_r = __builtin_msa_xori_b(v8u16_a, 25); // CHECK: call <16 x i8> @llvm.mips.xori.b(
+  v4u32_r = __builtin_msa_xori_b(v4u32_a, 25); // CHECK: call <16 x i8> @llvm.mips.xori.b(
+  v2u64_r = __builtin_msa_xori_b(v2u64_a, 25); // CHECK: call <16 x i8> @llvm.mips.xori.b(
+
+}
diff --git a/test/CodeGen/builtins-ms.c b/test/CodeGen/builtins-ms.c
new file mode 100644
index 0000000..0676e9d
--- /dev/null
+++ b/test/CodeGen/builtins-ms.c
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 %s -emit-llvm -o - -fms-extensions -triple i686-pc-win32 | FileCheck %s
+
+// CHECK-LABEL: define void @test_alloca
+void capture(void *);
+void test_alloca(int n) {
+  capture(_alloca(n));
+  // CHECK: %[[arg:.*]] = alloca i8, i32 %
+  // CHECK: call void @capture(i8* %[[arg]])
+}
diff --git a/test/CodeGen/builtins-multiprecision.c b/test/CodeGen/builtins-multiprecision.c
index 172f683..4d19608 100644
--- a/test/CodeGen/builtins-multiprecision.c
+++ b/test/CodeGen/builtins-multiprecision.c
@@ -2,6 +2,25 @@
 // RUN: %clang_cc1 -triple "x86_64-unknown-unknown" -emit-llvm -x c %s -o - -O3 | FileCheck %s
 // RUN: %clang_cc1 -triple "x86_64-mingw32"         -emit-llvm -x c %s -o - -O3 | FileCheck %s
 
+unsigned char test_addcb(unsigned char x, unsigned char y,
+                         unsigned char carryin, unsigned char *z) {
+  // CHECK: @test_addcb
+  // CHECK: %{{.+}} = {{.*}} call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 %x, i8 %y)
+  // CHECK: %{{.+}} = extractvalue { i8, i1 } %{{.+}}, 1
+  // CHECK: %{{.+}} = extractvalue { i8, i1 } %{{.+}}, 0
+  // CHECK: %{{.+}} = {{.*}} call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 %{{.+}}, i8 %carryin)
+  // CHECK: %{{.+}} = extractvalue { i8, i1 } %{{.+}}, 1
+  // CHECK: %{{.+}} = extractvalue { i8, i1 } %{{.+}}, 0
+  // CHECK: %{{.+}} = or i1 %{{.+}}, %{{.+}}
+  // CHECK: %{{.+}} = zext i1 %{{.+}} to i8
+  // CHECK: store i8 %{{.+}}, i8* %z, align 1
+
+  unsigned char carryout;
+  *z = __builtin_addcb(x, y, carryin, &carryout);
+
+  return carryout;
+}
+
 unsigned short test_addcs(unsigned short x, unsigned short y,
                           unsigned short carryin, unsigned short *z) {
   // CHECK: @test_addcs
@@ -76,6 +95,25 @@ unsigned long long test_addcll(unsigned long long x, unsigned long long y,
   return carryout;
 }
 
+unsigned char test_subcb(unsigned char x, unsigned char y,
+                         unsigned char carryin, unsigned char *z) {
+  // CHECK: @test_subcb
+  // CHECK: %{{.+}} = {{.*}} call { i8, i1 } @llvm.usub.with.overflow.i8(i8 %x, i8 %y)
+  // CHECK: %{{.+}} = extractvalue { i8, i1 } %{{.+}}, 1
+  // CHECK: %{{.+}} = extractvalue { i8, i1 } %{{.+}}, 0
+  // CHECK: %{{.+}} = {{.*}} call { i8, i1 } @llvm.usub.with.overflow.i8(i8 %{{.+}}, i8 %carryin)
+  // CHECK: %{{.+}} = extractvalue { i8, i1 } %{{.+}}, 1
+  // CHECK: %{{.+}} = extractvalue { i8, i1 } %{{.+}}, 0
+  // CHECK: %{{.+}} = or i1 %{{.+}}, %{{.+}}
+  // CHECK: %{{.+}} = zext i1 %{{.+}} to i8
+  // CHECK: store i8 %{{.+}}, i8* %z, align 1
+
+  unsigned char carryout;
+  *z = __builtin_subcb(x, y, carryin, &carryout);
+
+  return carryout;
+}
+
 unsigned short test_subcs(unsigned short x, unsigned short y,
                           unsigned short carryin, unsigned short *z) {
   // CHECK: @test_subcs
diff --git a/test/CodeGen/builtins-nvptx.c b/test/CodeGen/builtins-nvptx.c
index 2c7e0c1..7deee8e 100644
--- a/test/CodeGen/builtins-nvptx.c
+++ b/test/CodeGen/builtins-nvptx.c
@@ -165,4 +165,13 @@ void nvvm_math(float f1, float f2, double d1, double d2) {
   double td3 = __nvvm_sqrt_rn_d(d1);
 // CHECK: call double @llvm.nvvm.rcp.rn.d
   double td4 = __nvvm_rcp_rn_d(d2);
+
+// CHECK: call void @llvm.nvvm.membar.cta()
+  __nvvm_membar_cta();
+// CHECK: call void @llvm.nvvm.membar.gl()
+  __nvvm_membar_gl();
+// CHECK: call void @llvm.nvvm.membar.sys()
+  __nvvm_membar_sys();
+// CHECK: call void @llvm.nvvm.barrier0()
+  __nvvm_bar0();
 }
diff --git a/test/CodeGen/builtins-overflow.c b/test/CodeGen/builtins-overflow.c
new file mode 100644
index 0000000..5c5500d
--- /dev/null
+++ b/test/CodeGen/builtins-overflow.c
@@ -0,0 +1,175 @@
+// Test CodeGen for Security Check Overflow Builtins.
+// rdar://13421498
+
+// RUN: %clang_cc1 -triple "i686-unknown-unknown"   -emit-llvm -x c %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple "x86_64-unknown-unknown" -emit-llvm -x c %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple "x86_64-mingw32"         -emit-llvm -x c %s -o - | FileCheck %s
+
+extern unsigned UnsignedErrorCode;
+extern unsigned long UnsignedLongErrorCode;
+extern unsigned long long UnsignedLongLongErrorCode;
+extern int IntErrorCode;
+extern long LongErrorCode;
+extern long long LongLongErrorCode;
+
+unsigned test_uadd_overflow(unsigned x, unsigned y) {
+// CHECK: @test_uadd_overflow
+// CHECK: %{{.+}} = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %{{.+}}, i32 %{{.+}})
+  unsigned result;
+  if (__builtin_uadd_overflow(x, y, &result))
+    return UnsignedErrorCode;
+  return result;
+}
+
+unsigned long test_uaddl_overflow(unsigned long x, unsigned long y) {
+// CHECK: @test_uaddl_overflow([[UL:i32|i64]] %x
+// CHECK: %{{.+}} = call { [[UL]], i1 } @llvm.uadd.with.overflow.[[UL]]([[UL]] %{{.+}}, [[UL]] %{{.+}})
+  unsigned long result;
+  if (__builtin_uaddl_overflow(x, y, &result))
+    return UnsignedLongErrorCode;
+  return result;
+}
+
+unsigned long long test_uaddll_overflow(unsigned long long x, unsigned long long y) {
+// CHECK: @test_uaddll_overflow
+// CHECK: %{{.+}} = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %{{.+}}, i64 %{{.+}})
+  unsigned long long result;
+  if (__builtin_uaddll_overflow(x, y, &result))
+    return UnsignedLongLongErrorCode;
+  return result;
+}
+
+unsigned test_usub_overflow(unsigned x, unsigned y) {
+// CHECK: @test_usub_overflow
+// CHECK: %{{.+}} = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %{{.+}}, i32 %{{.+}})
+  unsigned result;
+  if (__builtin_usub_overflow(x, y, &result))
+    return UnsignedErrorCode;
+  return result;
+}
+
+unsigned long test_usubl_overflow(unsigned long x, unsigned long y) {
+// CHECK: @test_usubl_overflow([[UL:i32|i64]] %x
+// CHECK: %{{.+}} = call { [[UL]], i1 } @llvm.usub.with.overflow.[[UL]]([[UL]] %{{.+}}, [[UL]] %{{.+}})
+  unsigned long result;
+  if (__builtin_usubl_overflow(x, y, &result))
+    return UnsignedLongErrorCode;
+  return result;
+}
+
+unsigned long long test_usubll_overflow(unsigned long long x, unsigned long long y) {
+// CHECK: @test_usubll_overflow
+// CHECK: %{{.+}} = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %{{.+}}, i64 %{{.+}})
+  unsigned long long result;
+  if (__builtin_usubll_overflow(x, y, &result))
+    return UnsignedLongLongErrorCode;
+  return result;
+}
+
+unsigned test_umul_overflow(unsigned x, unsigned y) {
+// CHECK: @test_umul_overflow
+// CHECK: %{{.+}} = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %{{.+}}, i32 %{{.+}})
+  unsigned result;
+  if (__builtin_umul_overflow(x, y, &result))
+    return UnsignedErrorCode;
+  return result;
+}
+
+unsigned long test_umull_overflow(unsigned long x, unsigned long y) {
+// CHECK: @test_umull_overflow([[UL:i32|i64]] %x
+// CHECK: %{{.+}} = call { [[UL]], i1 } @llvm.umul.with.overflow.[[UL]]([[UL]] %{{.+}}, [[UL]] %{{.+}})
+  unsigned long result;
+  if (__builtin_umull_overflow(x, y, &result))
+    return UnsignedLongErrorCode;
+  return result;
+}
+
+unsigned long long test_umulll_overflow(unsigned long long x, unsigned long long y) {
+// CHECK: @test_umulll_overflow
+// CHECK: %{{.+}} = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %{{.+}}, i64 %{{.+}})
+  unsigned long long result;
+  if (__builtin_umulll_overflow(x, y, &result))
+    return UnsignedLongLongErrorCode;
+  return result;
+}
+
+int test_sadd_overflow(int x, int y) {
+// CHECK: @test_sadd_overflow
+// CHECK: %{{.+}} = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %{{.+}}, i32 %{{.+}})
+  int result;
+  if (__builtin_sadd_overflow(x, y, &result))
+    return IntErrorCode;
+  return result;
+}
+
+long test_saddl_overflow(long x, long y) {
+// CHECK: @test_saddl_overflow([[UL:i32|i64]] %x
+// CHECK: %{{.+}} = call { [[UL]], i1 } @llvm.sadd.with.overflow.[[UL]]([[UL]] %{{.+}}, [[UL]] %{{.+}})
+  long result;
+  if (__builtin_saddl_overflow(x, y, &result))
+    return LongErrorCode;
+  return result;
+}
+
+long long test_saddll_overflow(long long x, long long y) {
+// CHECK: @test_saddll_overflow
+// CHECK: %{{.+}} = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %{{.+}}, i64 %{{.+}})
+  long long result;
+  if (__builtin_saddll_overflow(x, y, &result))
+    return LongLongErrorCode;
+  return result;
+}
+
+int test_ssub_overflow(int x, int y) {
+// CHECK: @test_ssub_overflow
+// CHECK: %{{.+}} = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %{{.+}}, i32 %{{.+}})
+  int result;
+  if (__builtin_ssub_overflow(x, y, &result))
+    return IntErrorCode;
+  return result;
+}
+
+long test_ssubl_overflow(long x, long y) {
+// CHECK: @test_ssubl_overflow([[UL:i32|i64]] %x
+// CHECK: %{{.+}} = call { [[UL]], i1 } @llvm.ssub.with.overflow.[[UL]]([[UL]] %{{.+}}, [[UL]] %{{.+}})
+  long result;
+  if (__builtin_ssubl_overflow(x, y, &result))
+    return LongErrorCode;
+  return result;
+}
+
+long long test_ssubll_overflow(long long x, long long y) {
+// CHECK: @test_ssubll_overflow
+// CHECK: %{{.+}} = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %{{.+}}, i64 %{{.+}})
+  long long result;
+  if (__builtin_ssubll_overflow(x, y, &result))
+    return LongLongErrorCode;
+  return result;
+}
+
+int test_smul_overflow(int x, int y) {
+// CHECK: @test_smul_overflow
+// CHECK: %{{.+}} = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %{{.+}}, i32 %{{.+}})
+  int result;
+  if (__builtin_smul_overflow(x, y, &result))
+    return IntErrorCode;
+  return result;
+}
+
+long test_smull_overflow(long x, long y) {
+// CHECK: @test_smull_overflow([[UL:i32|i64]] %x
+// CHECK: %{{.+}} = call { [[UL]], i1 } @llvm.smul.with.overflow.[[UL]]([[UL]] %{{.+}}, [[UL]] %{{.+}})
+  long result;
+  if (__builtin_smull_overflow(x, y, &result))
+    return LongErrorCode;
+  return result;
+}
+
+long long test_smulll_overflow(long long x, long long y) {
+// CHECK: @test_smulll_overflow
+// CHECK: %{{.+}} = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %{{.+}}, i64 %{{.+}})
+  long long result;
+  if (__builtin_smulll_overflow(x, y, &result))
+    return LongLongErrorCode;
+  return result;
+}
diff --git a/test/CodeGen/builtins-ppc-altivec.c b/test/CodeGen/builtins-ppc-altivec.c
index 9427a8a..47a198f 100644
--- a/test/CodeGen/builtins-ppc-altivec.c
+++ b/test/CodeGen/builtins-ppc-altivec.c
@@ -41,7 +41,7 @@ int res_i;
 int res_ui;
 int res_f;
 
-// CHECK: define void @test1
+// CHECK-LABEL: define void @test1
 void test1() {
 
   /* vec_abs */
@@ -333,7 +333,7 @@ void test1() {
 
 }
 
-// CHECK: define void @test2
+// CHECK-LABEL: define void @test2
 void test2() {
   /* vec_avg */
   res_vsc = vec_avg(vsc, vsc);                  // CHECK: @llvm.ppc.altivec.vavgsb
@@ -371,7 +371,7 @@ void test2() {
   res_vbi = vec_vcmpgefp(vf, vf);               // CHECK: @llvm.ppc.altivec.vcmpgefp
 }
 
-// CHECK: define void @test5
+// CHECK-LABEL: define void @test5
 void test5() {
   
   /* vec_cmpgt */
@@ -394,7 +394,7 @@ void test5() {
   res_vbi = vec_cmple(vf, vf);                  // CHECK: @llvm.ppc.altivec.vcmpgefp
 }
 
-// CHECK: define void @test6
+// CHECK-LABEL: define void @test6
 void test6() {
   /* vec_cmplt */
   res_vbc = vec_cmplt(vsc, vsc);                // CHECK: @llvm.ppc.altivec.vcmpgtsb
@@ -3055,7 +3055,7 @@ void test6() {
 }
 
 /* ------------------------------ Relational Operators ------------------------------ */
-// CHECK: define void @test7
+// CHECK-LABEL: define void @test7
 void test7() {
   vector signed char vsc1 = (vector signed char)(-1);
   vector signed char vsc2 = (vector signed char)(-2);
diff --git a/test/CodeGen/builtins-x86.c b/test/CodeGen/builtins-x86.c
index fcf1512..261bf2f 100644
--- a/test/CodeGen/builtins-x86.c
+++ b/test/CodeGen/builtins-x86.c
@@ -55,6 +55,7 @@ void f0() {
   const float*   tmp_fCp;
   double*        tmp_dp;
   const double*  tmp_dCp;
+  long long*     tmp_LLip;
 
 #define imm_i 32
 #define imm_i_0_2 0
@@ -288,6 +289,9 @@ void f0() {
   tmp_i = __builtin_ia32_movmskpd(tmp_V2d);
   tmp_i = __builtin_ia32_pmovmskb128(tmp_V16c);
   (void) __builtin_ia32_movnti(tmp_ip, tmp_i);
+#ifdef USE_64
+  (void) __builtin_ia32_movnti64(tmp_LLip, tmp_LLi);
+#endif
   (void) __builtin_ia32_movntpd(tmp_dp, tmp_V2d);
   (void) __builtin_ia32_movntdq(tmp_V2LLip, tmp_V2LLi);
   tmp_V2LLi = __builtin_ia32_psadbw128(tmp_V16c, tmp_V16c);
@@ -491,5 +495,13 @@ void f0() {
   tmp_V2f = __builtin_ia32_pi2fw(tmp_V2i);
   tmp_V2f = __builtin_ia32_pswapdsf(tmp_V2f);
   tmp_V2i = __builtin_ia32_pswapdsi(tmp_V2i);
+
+  tmp_V4i = __builtin_ia32_sha1rnds4(tmp_V4i, tmp_V4i, imm_i);
+  tmp_V4i = __builtin_ia32_sha1nexte(tmp_V4i, tmp_V4i);
+  tmp_V4i = __builtin_ia32_sha1msg1(tmp_V4i, tmp_V4i);
+  tmp_V4i = __builtin_ia32_sha1msg2(tmp_V4i, tmp_V4i);
+  tmp_V4i = __builtin_ia32_sha256rnds2(tmp_V4i, tmp_V4i, tmp_V4i);
+  tmp_V4i = __builtin_ia32_sha256msg1(tmp_V4i, tmp_V4i);
+  tmp_V4i = __builtin_ia32_sha256msg2(tmp_V4i, tmp_V4i);
 #endif
 }
diff --git a/test/CodeGen/builtins.c b/test/CodeGen/builtins.c
index 9ba12bb..39bd84c 100644
--- a/test/CodeGen/builtins.c
+++ b/test/CodeGen/builtins.c
@@ -131,7 +131,7 @@ void foo() {
  __builtin_strcat(0, 0);
 }
 
-// CHECK: define void @bar(
+// CHECK-LABEL: define void @bar(
 void bar() {
   float f;
   double d;
@@ -167,7 +167,7 @@ void bar() {
 // CHECK: }
 
 
-// CHECK: define void @test_float_builtins
+// CHECK-LABEL: define void @test_float_builtins
 void test_float_builtins(float F, double D, long double LD) {
   volatile int res;
   res = __builtin_isinf(F);
@@ -197,7 +197,7 @@ void test_float_builtins(float F, double D, long double LD) {
   // CHECK: and i1
 }
 
-// CHECK: define void @test_builtin_longjmp
+// CHECK-LABEL: define void @test_builtin_longjmp
 void test_builtin_longjmp(void **buffer) {
   // CHECK: [[BITCAST:%.*]] = bitcast
   // CHECK-NEXT: call void @llvm.eh.sjlj.longjmp(i8* [[BITCAST]])
@@ -205,7 +205,7 @@ void test_builtin_longjmp(void **buffer) {
   // CHECK-NEXT: unreachable
 }
 
-// CHECK: define i64 @test_builtin_readcyclecounter
+// CHECK-LABEL: define i64 @test_builtin_readcyclecounter
 long long test_builtin_readcyclecounter() {
   // CHECK: call i64 @llvm.readcyclecounter()
   return __builtin_readcyclecounter();
diff --git a/test/CodeGen/builtinshufflevector2.c b/test/CodeGen/builtinshufflevector2.c
index ac0e07a..04405b5 100644
--- a/test/CodeGen/builtinshufflevector2.c
+++ b/test/CodeGen/builtinshufflevector2.c
@@ -3,7 +3,7 @@
 typedef float float4 __attribute__((ext_vector_type(4)));
 typedef unsigned int uint4 __attribute__((ext_vector_type(4)));
 
-// CHECK: define void @clang_shufflevector_v_v(
+// CHECK-LABEL: define void @clang_shufflevector_v_v(
 void clang_shufflevector_v_v( float4* A, float4 x, uint4 mask ) {
 // CHECK: [[MASK:%.*]] = and <4 x i32> {{%.*}}, <i32 3, i32 3, i32 3, i32 3>
 // CHECK: [[I:%.*]] = extractelement <4 x i32> [[MASK]], i32 0
@@ -27,9 +27,16 @@ void clang_shufflevector_v_v( float4* A, float4 x, uint4 mask ) {
   *A = __builtin_shufflevector( x, mask );
 }
 
-// CHECK: define void @clang_shufflevector_v_v_c(
-void clang_shufflevector_v_v_c( float4* A, float4 x, float4 y, uint4 mask ) {
+// CHECK-LABEL: define void @clang_shufflevector_v_v_c(
+void clang_shufflevector_v_v_c( float4* A, float4 x, float4 y) {
 // CHECK: [[V:%.*]] = shufflevector <4 x float> {{%.*}}, <4 x float> {{%.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 // CHECK: store <4 x float> [[V]], <4 x float>* {{%.*}}
   *A = __builtin_shufflevector( x, y, 0, 4, 1, 5 );
 }
+
+// CHECK-LABEL: define void @clang_shufflevector_v_v_undef(
+void clang_shufflevector_v_v_undef( float4* A, float4 x, float4 y) {
+// CHECK: [[V:%.*]] = shufflevector <4 x float> {{%.*}}, <4 x float> {{%.*}}, <4 x i32> <i32 0, i32 4, i32 undef, i32 5>
+// CHECK: store <4 x float> [[V]], <4 x float>* {{%.*}}
+  *A = __builtin_shufflevector( x, y, 0, 4, -1, 5 );
+}
diff --git a/test/CodeGen/byval-memcpy-elim.c b/test/CodeGen/byval-memcpy-elim.c
index 76cdafb..d4b751a 100644
--- a/test/CodeGen/byval-memcpy-elim.c
+++ b/test/CodeGen/byval-memcpy-elim.c
@@ -12,7 +12,7 @@ struct Test2S {
 
 // Make sure we don't generate extra memcpy for lvalues
 void test1a(struct Test1S, struct Test2S);
-// CHECK: define void @test1(
+// CHECK-LABEL: define void @test1(
 // CHECK-NOT: memcpy
 // CHECK: call void @test1a
 void test1(struct Test1S *A, struct Test2S *B) {
@@ -28,7 +28,7 @@ struct Test3S {
   int a,b,c,d,e,f,g,h,i,j,k,l;
 };
 void test2a(struct Test3S q);
-// CHECK: define void @test2(
+// CHECK-LABEL: define void @test2(
 // CHECK: alloca %struct.Test3S, align 8
 // CHECK: memcpy
 // CHECK: call void @test2a
@@ -38,7 +38,7 @@ void test2(struct Test3S *q) {
 
 // But make sure we don't generate a memcpy when we can guarantee alignment.
 void fooey(void);
-// CHECK: define void @test3(
+// CHECK-LABEL: define void @test3(
 // CHECK: alloca %struct.Test3S, align 8
 // CHECK: call void @fooey
 // CHECK-NOT: memcpy
diff --git a/test/CodeGen/c-strings.c b/test/CodeGen/c-strings.c
index 60a6b01..ff86619 100644
--- a/test/CodeGen/c-strings.c
+++ b/test/CodeGen/c-strings.c
@@ -19,13 +19,13 @@ unsigned char align = 1;
 
 void bar(const char *);
 
-// CHECK: define void @f0()
+// CHECK-LABEL: define void @f0()
 void f0() {
   bar("hello");
   // CHECK: call void @bar({{.*}} @.str
 }
 
-// CHECK: define void @f1()
+// CHECK-LABEL: define void @f1()
 void f1() {
   static char *x = "hello";
   bar(x);
@@ -33,14 +33,14 @@ void f1() {
   // CHECK: call void @bar(i8* [[T1:%.*]])
 }
 
-// CHECK: define void @f2()
+// CHECK-LABEL: define void @f2()
 void f2() {
   static char x[] = "hello";
   bar(x);
   // CHECK: call void @bar({{.*}} @f2.x
 }
 
-// CHECK: define void @f3()
+// CHECK-LABEL: define void @f3()
 void f3() {
   static char x[8] = "hello";
   bar(x);
@@ -49,7 +49,7 @@ void f3() {
 
 void gaz(void *);
 
-// CHECK: define void @f4()
+// CHECK-LABEL: define void @f4()
 void f4() {
   static struct s {
     char *name;
diff --git a/test/CodeGen/c11atomics-ios.c b/test/CodeGen/c11atomics-ios.c
index d1c9b14..ad004fa 100644
--- a/test/CodeGen/c11atomics-ios.c
+++ b/test/CodeGen/c11atomics-ios.c
@@ -6,7 +6,7 @@
 
 // This work was done in pursuit of <rdar://13338582>.
 
-// CHECK: define arm_aapcscc void @testFloat(float*
+// CHECK-LABEL: define arm_aapcscc void @testFloat(float*
 void testFloat(_Atomic(float) *fp) {
 // CHECK:      [[FP:%.*]] = alloca float*
 // CHECK-NEXT: [[X:%.*]] = alloca float
@@ -102,8 +102,6 @@ void testStruct(_Atomic(S) *fp) {
 // CHECK-NEXT: store [[S]]*
 
 // CHECK-NEXT: [[P:%.*]] = load [[S]]** [[FP]]
-// CHECK-NEXT: [[T0:%.*]] = bitcast [[S]]* [[P]] to i8*
-// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false)
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 0
 // CHECK-NEXT: store i16 1, i16* [[T0]], align 2
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 1
@@ -114,8 +112,6 @@ void testStruct(_Atomic(S) *fp) {
 // CHECK-NEXT: store i16 4, i16* [[T0]], align 2
   __c11_atomic_init(fp, (S){1,2,3,4});
 
-// CHECK-NEXT: [[T0:%.*]] = bitcast [[S]]* [[X]] to i8*
-// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false)
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 0
 // CHECK-NEXT: store i16 1, i16* [[T0]], align 2
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 1
@@ -169,7 +165,7 @@ void testPromotedStruct(_Atomic(PS) *fp) {
   __c11_atomic_init(fp, (PS){1,2,3});
 
 // CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[X]] to i8*
-// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false)
+// CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T0]], i8 0, i32 8, i32 8, i1 false)
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[X]], i32 0, i32 0
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 0
 // CHECK-NEXT: store i16 1, i16* [[T1]], align 2
@@ -183,7 +179,7 @@ void testPromotedStruct(_Atomic(PS) *fp) {
 // CHECK-NEXT: [[T1:%.*]] = bitcast [[APS]]* [[T0]] to i64*
 // CHECK-NEXT: [[T2:%.*]] = load atomic i64* [[T1]] seq_cst, align 8
 // CHECK-NEXT: [[T3:%.*]] = bitcast [[APS]]* [[TMP0]] to i64*
-// CHECK-NEXT: store i64 [[T2]], i64* [[T3]], align 8
+// CHECK-NEXT: store i64 [[T2]], i64* [[T3]], align 2
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[TMP0]], i32 0, i32 0
 // CHECK-NEXT: [[T1:%.*]] = bitcast [[PS]]* [[F]] to i8*
 // CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T0]] to i8*
@@ -191,6 +187,8 @@ void testPromotedStruct(_Atomic(PS) *fp) {
   PS f = *fp;
 
 // CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]]
+// CHECK-NEXT: [[T1:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[TMP1]] to i8*
+// CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T1]], i8 0, i32 8, i32 8, i1 false)
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[APS]]* [[TMP1]], i32 0, i32 0
 // CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T1]] to i8*
 // CHECK-NEXT: [[T3:%.*]] = bitcast [[PS]]* [[F]] to i8*
diff --git a/test/CodeGen/c11atomics.c b/test/CodeGen/c11atomics.c
index 8d298af..5c761b1 100644
--- a/test/CodeGen/c11atomics.c
+++ b/test/CodeGen/c11atomics.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -emit-llvm -o - -triple=armv7-unknown-freebsd -std=c11 | FileCheck %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple=armv5-unknown-freebsd -std=c11 | FileCheck %s
 
 // Test that we are generating atomicrmw instructions, rather than
 // compare-exchange loops for common atomic ops.  This makes a big difference
@@ -135,7 +135,7 @@ void testandeq(void)
   s &= 42;
 }
 
-// CHECK: define arm_aapcscc void @testFloat(float*
+// CHECK-LABEL: define arm_aapcscc void @testFloat(float*
 void testFloat(_Atomic(float) *fp) {
 // CHECK:      [[FP:%.*]] = alloca float*
 // CHECK-NEXT: [[X:%.*]] = alloca float
@@ -233,8 +233,6 @@ void testStruct(_Atomic(S) *fp) {
 // CHECK-NEXT: store [[S]]*
 
 // CHECK-NEXT: [[P:%.*]] = load [[S]]** [[FP]]
-// CHECK-NEXT: [[T0:%.*]] = bitcast [[S]]* [[P]] to i8*
-// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false)
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 0
 // CHECK-NEXT: store i16 1, i16* [[T0]], align 2
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 1
@@ -245,8 +243,6 @@ void testStruct(_Atomic(S) *fp) {
 // CHECK-NEXT: store i16 4, i16* [[T0]], align 2
   __c11_atomic_init(fp, (S){1,2,3,4});
 
-// CHECK-NEXT: [[T0:%.*]] = bitcast [[S]]* [[X]] to i8*
-// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false)
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 0
 // CHECK-NEXT: store i16 1, i16* [[T0]], align 2
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 1
@@ -283,6 +279,9 @@ void testPromotedStruct(_Atomic(PS) *fp) {
 // CHECK-NEXT: [[F:%.*]] = alloca [[PS:%.*]], align 2
 // CHECK-NEXT: [[TMP0:%.*]] = alloca [[APS]], align 8
 // CHECK-NEXT: [[TMP1:%.*]] = alloca [[APS]], align 8
+// CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[TMP2:%.*]] = alloca %struct.PS, align 2
+// CHECK-NEXT: [[TMP3:%.*]] = alloca [[APS]], align 8
 // CHECK-NEXT: store [[APS]]*
 
 // CHECK-NEXT: [[P:%.*]] = load [[APS]]** [[FP]]
@@ -298,7 +297,7 @@ void testPromotedStruct(_Atomic(PS) *fp) {
   __c11_atomic_init(fp, (PS){1,2,3});
 
 // CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[X]] to i8*
-// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false)
+// CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T0]], i8 0, i32 8, i32 8, i1 false)
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[X]], i32 0, i32 0
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 0
 // CHECK-NEXT: store i16 1, i16* [[T1]], align 2
@@ -319,6 +318,8 @@ void testPromotedStruct(_Atomic(PS) *fp) {
   PS f = *fp;
 
 // CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]]
+// CHECK-NEXT: [[T1:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[TMP1]] to i8*
+// CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T1]], i8 0, i32 8, i32 8, i1 false)
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[APS]]* [[TMP1]], i32 0, i32 0
 // CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T1]] to i8*
 // CHECK-NEXT: [[T3:%.*]] = bitcast [[PS]]* [[F]] to i8*
@@ -328,6 +329,20 @@ void testPromotedStruct(_Atomic(PS) *fp) {
 // CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T4]], i8* [[T5]], i32 5)
   *fp = f;
 
+// CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]], align 4
+// CHECK-NEXT: [[T1:%.*]] = bitcast [[APS]]* [[T0]] to i8*
+// CHECK-NEXT: [[T2:%.*]] = bitcast [[APS]]* [[TMP3]] to i8*
+// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5)
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[TMP3]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = bitcast %struct.PS* [[TMP2]] to i8*
+// CHECK-NEXT: [[T2:%.*]] = bitcast %struct.PS* [[T0]] to i8*
+// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 6, i32 2, i1 false)
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds %struct.PS* [[TMP2]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = load i16* [[T0]], align 2
+// CHECK-NEXT: [[T2:%.*]] = sext i16 [[T1]] to i32
+// CHECK-NEXT: store i32 [[T2]], i32* [[A]], align 4
+  int a = ((PS)*fp).x;
+
 // CHECK-NEXT: ret void
 }
 
diff --git a/test/CodeGen/capture-complex-expr-in-block.c b/test/CodeGen/capture-complex-expr-in-block.c
index 86c93d0..83695a8 100644
--- a/test/CodeGen/capture-complex-expr-in-block.c
+++ b/test/CodeGen/capture-complex-expr-in-block.c
@@ -12,7 +12,7 @@ int main ()
     b();
 }
 
-// CHECK: define internal void @__main_block_invoke
+// CHECK-LABEL: define internal void @__main_block_invoke
 // CHECK:  [[C1:%.*]] = alloca { double, double }, align 8
 // CHECK:  [[RP:%.*]] = getelementptr inbounds { double, double }* [[C1]], i32 0, i32 0
 // CHECK-NEXT:  [[R:%.*]] = load double* [[RP]]
diff --git a/test/CodeGen/captured-statements-nested.c b/test/CodeGen/captured-statements-nested.c
new file mode 100644
index 0000000..d8ec746
--- /dev/null
+++ b/test/CodeGen/captured-statements-nested.c
@@ -0,0 +1,126 @@
+// RUN: %clang_cc1 -fblocks -emit-llvm %s -o %t
+// RUN: FileCheck %s -input-file=%t -check-prefix=CHECK1
+// RUN: FileCheck %s -input-file=%t -check-prefix=CHECK2
+
+struct A {
+  int a;
+  float b;
+  char c;
+};
+
+void test_nest_captured_stmt(int param) {
+  int w;
+  // CHECK1: %struct.anon{{.*}} = type { i32*, i32* }
+  // CHECK1: %struct.anon{{.*}} = type { i32*, i32*, i32**, i32* }
+  // CHECK1: [[T:%struct.anon.*]] = type { i32*, i32*, %struct.A*, i32**, i32* }
+  #pragma clang __debug captured
+  {
+    int x;
+    int *y = &w;
+    #pragma clang __debug captured
+    {
+      struct A z;
+      #pragma clang __debug captured
+      {
+        w = x = z.a = 1;
+        *y = param;
+        z.b = 0.1f;
+        z.c = 'c';
+
+        // CHECK1: define internal void @__captured_stmt{{.*}}([[T]]
+        //
+        // CHECK1: getelementptr inbounds [[T]]* {{.*}}, i32 0, i32 2
+        // CHECK1-NEXT: load %struct.A**
+        // CHECK1-NEXT: getelementptr inbounds %struct.A*
+        // CHECK1-NEXT: store i32 1
+        //
+        // CHECK1: getelementptr inbounds [[T]]* {{.*}}, i32 0, i32 1
+        // CHECK1-NEXT: load i32**
+        // CHECK1-NEXT: store i32 1
+        //
+        // CHECK1: getelementptr inbounds [[T]]* {{.*}}, i32 0, i32 0
+        // CHECK1-NEXT: load i32**
+        // CHECK1-NEXT: store i32 1
+        //
+        // CHECK1: getelementptr inbounds [[T]]* {{.*}}, i32 0, i32 4
+        // CHECK1-NEXT: load i32**
+        // CHECK1-NEXT: load i32*
+        // CHECK1-NEXT: getelementptr inbounds [[T]]* {{.*}}, i32 0, i32 3
+        // CHECK1-NEXT: load i32***
+        // CHECK1-NEXT: load i32**
+        // CHECK1-NEXT: store i32
+        //
+        // CHECK1: getelementptr inbounds [[T]]* {{.*}}, i32 0, i32 2
+        // CHECK1-NEXT: load %struct.A**
+        // CHECK1-NEXT: getelementptr inbounds %struct.A*
+        // CHECK1-NEXT: store float
+        //
+        // CHECK1: getelementptr inbounds [[T]]* {{.*}}, i32 0, i32 2
+        // CHECK1-NEXT: load %struct.A**
+        // CHECK1-NEXT: getelementptr inbounds %struct.A*
+        // CHECK1-NEXT: store i8 99
+      }
+    }
+  }
+}
+
+void test_nest_block() {
+  __block int x;
+  int y;
+  ^{
+    int z;
+    x = z;
+    #pragma clang __debug captured
+    {
+      z = y; // OK
+    }
+  }();
+
+  // CHECK2: define internal void @{{.*}}test_nest_block_block_invoke
+  //
+  // CHECK2: [[Z:%[0-9a-z_]*]] = alloca i32
+  // CHECK2: alloca %struct.anon{{.*}}
+  //
+  // CHECK2: store i32
+  // CHECK2: store i32* [[Z]]
+  //
+  // CHECK2: getelementptr inbounds %struct.anon
+  // CHECK2-NEXT: getelementptr inbounds
+  // CHECK2-NEXT: store i32*
+  //
+  // CHECK2: call void @__captured_stmt
+
+  int a;
+  #pragma clang __debug captured
+  {
+    __block int b;
+    int c;
+    __block int d;
+    ^{
+      b = a;
+      b = c;
+      b = d;
+    }();
+  }
+
+  // CHECK2: alloca %struct.__block_byref_b
+  // CHECK2-NEXT: [[C:%[0-9a-z_]*]] = alloca i32
+  // CHECK2-NEXT: alloca %struct.__block_byref_d
+  //
+  // CHECK2: bitcast %struct.__block_byref_b*
+  // CHECK2-NEXT: store i8*
+  //
+  // CHECK2: [[CapA:%[0-9a-z_.]*]] = getelementptr inbounds {{.*}}, i32 0, i32 7
+  //
+  // CHECK2: getelementptr inbounds %struct.anon{{.*}}, i32 0, i32 0
+  // CHECK2: load i32**
+  // CHECK2: load i32*
+  // CHECK2: store i32 {{.*}}, i32* [[CapA]]
+  //
+  // CHECK2: [[CapC:%[0-9a-z_.]*]] = getelementptr inbounds {{.*}}, i32 0, i32 8
+  // CHECK2-NEXT: [[Val:%[0-9a-z_]*]] = load i32* [[C]]
+  // CHECK2-NEXT: store i32 [[Val]], i32* [[CapC]]
+  //
+  // CHECK2: bitcast %struct.__block_byref_d*
+  // CHECK2-NEXT: store i8*
+}
diff --git a/test/CodeGen/captured-statements.c b/test/CodeGen/captured-statements.c
new file mode 100644
index 0000000..c87c187
--- /dev/null
+++ b/test/CodeGen/captured-statements.c
@@ -0,0 +1,80 @@
+// RUN: %clang_cc1 -emit-llvm %s -o %t
+// RUN: FileCheck %s -input-file=%t -check-prefix=CHECK-GLOBALS
+// RUN: FileCheck %s -input-file=%t -check-prefix=CHECK-1
+// RUN: FileCheck %s -input-file=%t -check-prefix=CHECK-2
+// RUN: FileCheck %s -input-file=%t -check-prefix=CHECK-3
+
+int foo();
+int global;
+
+// Single statement
+void test1() {
+  int i = 0;
+  #pragma clang __debug captured
+  {
+    i++;
+  }
+  // CHECK-1: %struct.anon = type { i32* }
+  //
+  // CHECK-1: test1
+  // CHECK-1: alloca %struct.anon
+  // CHECK-1: getelementptr inbounds %struct.anon*
+  // CHECK-1: store i32* %i
+  // CHECK-1: call void @[[HelperName:__captured_stmt[0-9]+]]
+}
+
+// CHECK-1: define internal void @[[HelperName]](%struct.anon
+// CHECK-1:   getelementptr inbounds %struct.anon{{.*}}, i32 0, i32 0
+// CHECK-1:   load i32**
+// CHECK-1:   load i32*
+// CHECK-1:   add nsw i32
+// CHECK-1:   store i32
+
+// Compound statement with local variable
+void test2(int x) {
+  #pragma clang __debug captured
+  {
+    int i;
+    for (i = 0; i < x; i++)
+      foo();
+  }
+  // CHECK-2: test2
+  // CHECK-2-NOT: %i
+  // CHECK-2: call void @[[HelperName:__captured_stmt[0-9]+]]
+}
+
+// CHECK-2: define internal void @[[HelperName]]
+// CHECK-2-NOT: }
+// CHECK-2:   %i = alloca i32
+
+// Capture array
+void test3() {
+  int arr[] = {1, 2, 3, 4, 5};
+  #pragma clang __debug captured
+  {
+    arr[2] = arr[1];
+  }
+  // CHECK-3: test3
+  // CHECK-3: alloca [5 x i32]
+  // CHECK-3: call void @__captured_stmt
+}
+
+void dont_capture_global() {
+  static int s;
+  extern int e;
+  #pragma clang __debug captured
+  {
+    global++;
+    s++;
+    e++;
+  }
+
+  // CHECK-GLOBALS: %[[Capture:struct\.anon[\.0-9]*]] = type {}
+  // CHECK-GLOBALS: call void @__captured_stmt[[HelperName:[0-9]+]](%[[Capture]]
+}
+
+// CHECK-GLOBALS: define internal void @__captured_stmt[[HelperName]]
+// CHECK-GLOBALS-NOT: ret
+// CHECK-GLOBALS:   load i32* @global
+// CHECK-GLOBALS:   load i32* @
+// CHECK-GLOBALS:   load i32* @e
diff --git a/test/CodeGen/catch-undef-behavior.c b/test/CodeGen/catch-undef-behavior.c
index ebe39fe..9be2614 100644
--- a/test/CodeGen/catch-undef-behavior.c
+++ b/test/CodeGen/catch-undef-behavior.c
@@ -31,12 +31,12 @@ void foo() {
   // CHECK-TRAP: %[[CHECK0:.*]] = icmp ne {{.*}}* %[[PTR:.*]], null
 
   // CHECK:      %[[I8PTR:.*]] = bitcast i32* %[[PTR]] to i8*
-  // CHECK-NEXT: %[[SIZE:.*]] = call i64 @llvm.objectsize.i64(i8* %[[I8PTR]], i1 false)
+  // CHECK-NEXT: %[[SIZE:.*]] = call i64 @llvm.objectsize.i64.p0i8(i8* %[[I8PTR]], i1 false)
   // CHECK-NEXT: %[[CHECK1:.*]] = icmp uge i64 %[[SIZE]], 4
   // CHECK-NEXT: %[[CHECK01:.*]] = and i1 %[[CHECK0]], %[[CHECK1]]
 
   // CHECK-TRAP:      %[[I8PTR:.*]] = bitcast i32* %[[PTR]] to i8*
-  // CHECK-TRAP-NEXT: %[[SIZE:.*]] = call i64 @llvm.objectsize.i64(i8* %[[I8PTR]], i1 false)
+  // CHECK-TRAP-NEXT: %[[SIZE:.*]] = call i64 @llvm.objectsize.i64.p0i8(i8* %[[I8PTR]], i1 false)
   // CHECK-TRAP-NEXT: %[[CHECK1:.*]] = icmp uge i64 %[[SIZE]], 4
   // CHECK-TRAP-NEXT: %[[CHECK01:.*]] = and i1 %[[CHECK0]], %[[CHECK1]]
 
diff --git a/test/CodeGen/char-literal.c b/test/CodeGen/char-literal.c
index 237d4b2..6fdf8b7 100644
--- a/test/CodeGen/char-literal.c
+++ b/test/CodeGen/char-literal.c
@@ -1,6 +1,6 @@
-// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=C %s
-// RUN: %clang_cc1 -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=C %s
-// RUN: %clang_cc1 -x c++ -std=c++11 -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=CPP0X %s
+// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-C %s
+// RUN: %clang_cc1 -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-C %s
+// RUN: %clang_cc1 -x c++ -std=c++11 -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-CPP0X %s
 
 #include <stddef.h>
 
diff --git a/test/CodeGen/complex-convert.c b/test/CodeGen/complex-convert.c
index aaa57a0..e35be9c 100644
--- a/test/CodeGen/complex-convert.c
+++ b/test/CodeGen/complex-convert.c
@@ -21,7 +21,7 @@ void foo(signed char sc, unsigned char uc, signed long long sll,
   _Complex unsigned char cuc1;
   _Complex signed long long csll1;
   _Complex unsigned long long cull1;
-  // CHECK: define void @foo(
+  // CHECK-LABEL: define void @foo(
   // CHECK: alloca i[[CHSIZE:[0-9]+]], align [[CHALIGN:[0-9]+]]
   // CHECK-NEXT: alloca i[[CHSIZE]], align [[CHALIGN]]
   // CHECK-NEXT: alloca i[[LLSIZE:[0-9]+]], align [[LLALIGN:[0-9]+]]
diff --git a/test/CodeGen/complex-indirect.c b/test/CodeGen/complex-indirect.c
index 0daa970..cb84f7f 100644
--- a/test/CodeGen/complex-indirect.c
+++ b/test/CodeGen/complex-indirect.c
@@ -7,6 +7,6 @@
 
 void a(int,int,int,int,int,int,__complex__ char);
 void b(__complex__ char *y) { a(0,0,0,0,0,0,*y); }
-// CHECK: define void @b
+// CHECK-LABEL: define void @b
 // CHECK: alloca { i8, i8 }*, align 8
 // CHECK: call void @a(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i16 {{.*}})
diff --git a/test/CodeGen/complex-init-list.c b/test/CodeGen/complex-init-list.c
index 99c1c62..bc38e2c 100644
--- a/test/CodeGen/complex-init-list.c
+++ b/test/CodeGen/complex-init-list.c
@@ -8,11 +8,11 @@ _Complex float x = { 1.0f, 1.0f/0.0f };
 // CHECK: @x = global { float, float } { float 1.000000e+00, float 0x7FF0000000000000 }, align 4
 
 _Complex float f(float x, float y) { _Complex float z = { x, y }; return z; }
-// CHECK: define <2 x float> @f
+// CHECK-LABEL: define <2 x float> @f
 // CHECK: alloca { float, float }
 // CHECK: alloca { float, float }
 
 _Complex float f2(float x, float y) { return (_Complex float){ x, y }; }
-// CHECK: define <2 x float> @f2
+// CHECK-LABEL: define <2 x float> @f2
 // CHECK: alloca { float, float }
 // CHECK: alloca { float, float }
diff --git a/test/CodeGen/complex.c b/test/CodeGen/complex.c
index 1212660..206db25 100644
--- a/test/CodeGen/complex.c
+++ b/test/CodeGen/complex.c
@@ -32,8 +32,7 @@ void test3() {
   double Gr = __real g1;
 
   cf += D;
-  // FIXME: Currently unsupported!
-  //D += cf;
+  D += cf;
   cf /= g1;
   g1 = g1 + D;
   g1 = D + g1;
@@ -51,8 +50,7 @@ void test3int() {
   i = __real ci1;
 
   cs += i;
-  // FIXME: Currently unsupported!
-  //D += cf;
+  D += cf;
   cs /= ci1;
   ci1 = ci1 + i;
   ci1 = i + ci1;
@@ -97,3 +95,6 @@ double t7(double _Complex c) {
 void t8() {
   __complex__ int *x = &(__complex__ int){1};
 }
+
+const _Complex double test9const = 0;
+_Complex double test9func() { return test9const; }
diff --git a/test/CodeGen/compound-assign-overflow.c b/test/CodeGen/compound-assign-overflow.c
index e82061b..1533429 100644
--- a/test/CodeGen/compound-assign-overflow.c
+++ b/test/CodeGen/compound-assign-overflow.c
@@ -7,8 +7,7 @@
 // CHECK: @[[LINE_100:.*]] = private unnamed_addr global {{.*}}, i32 100, i32 5 {{.*}} @[[INT]]
 // CHECK: @[[UINT:.*]] = private unnamed_addr constant { i16, i16, [15 x i8] } { i16 0, i16 10, [15 x i8] c"'unsigned int'\00" }
 // CHECK: @[[LINE_200:.*]] = private unnamed_addr global {{.*}}, i32 200, i32 5 {{.*}} @[[UINT]]
-// CHECK: @[[DIVINT:.*]] = private unnamed_addr constant { i16, i16, [6 x i8] } { i16 0, i16 11, [6 x i8] c"'int'\00" }
-// CHECK: @[[LINE_300:.*]] = private unnamed_addr global {{.*}}, i32 300, i32 5 {{.*}} @[[DIVINT]]
+// CHECK: @[[LINE_300:.*]] = private unnamed_addr global {{.*}}, i32 300, i32 5 {{.*}} @[[INT]]
 
 int32_t x;
 
diff --git a/test/CodeGen/compound-literal.c b/test/CodeGen/compound-literal.c
index e4bf962..458a78e 100644
--- a/test/CodeGen/compound-literal.c
+++ b/test/CodeGen/compound-literal.c
@@ -12,7 +12,7 @@ struct s {int a, b, c;} * b = &(struct s) {1, 2, 3};
 _Complex double * x = &(_Complex double){1.0f};
 }
 
-// CHECK: define void @f()
+// CHECK-LABEL: define void @f()
 void f() {
   typedef struct S { int x,y; } S;
   // CHECK: [[S:%[a-zA-Z0-9.]+]] = alloca [[STRUCT:%[a-zA-Z0-9.]+]],
@@ -33,7 +33,7 @@ void f() {
   // CHECK-NEXT: ret void
 }
 
-// CHECK: define i48 @g(
+// CHECK-LABEL: define i48 @g(
 struct G { short x, y, z; };
 struct G g(int x, int y, int z) {
   // CHECK:      [[RESULT:%.*]] = alloca [[G:%.*]], align 2
diff --git a/test/CodeGen/const-init.c b/test/CodeGen/const-init.c
index 5f729b8..7d7ccae 100644
--- a/test/CodeGen/const-init.c
+++ b/test/CodeGen/const-init.c
@@ -156,6 +156,6 @@ void g29() {
   // CHECK: @g29.b = internal global [1 x i32] [i32 ptrtoint ([5 x i8]* @.str to i32)], align 4
   // CHECK: @g29.c = internal global [1 x i32] [i32 97], align 4
   static DCC_SRVR_NM a = { {"@"} };
-  static int b[1] = { "asdf" };
+  static int b[1] = { "asdf" }; // expected-warning {{incompatible pointer to integer conversion initializing 'int' with an expression of type 'char [5]'}}
   static int c[1] = { L"a" };
 }
diff --git a/test/CodeGen/convertvector.c b/test/CodeGen/convertvector.c
new file mode 100644
index 0000000..2b23dd9
--- /dev/null
+++ b/test/CodeGen/convertvector.c
@@ -0,0 +1,114 @@
+// RUN: %clang_cc1 -triple x86_64-apple-darwin9 -target-cpu corei7-avx -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin9 -target-cpu corei7-avx -emit-llvm -x c++ %s -o - | FileCheck %s
+
+typedef double vector8double __attribute__((__vector_size__(64)));
+typedef float  vector8float  __attribute__((__vector_size__(32)));
+typedef long   vector8long   __attribute__((__vector_size__(64)));
+typedef short  vector8short  __attribute__((__vector_size__(16)));
+typedef unsigned long   vector8ulong   __attribute__((__vector_size__(64)));
+typedef unsigned short  vector8ushort  __attribute__((__vector_size__(16)));
+
+#ifdef __cplusplus
+#define BOOL bool
+#else
+#define BOOL _Bool
+#endif
+
+typedef BOOL vector8bool __attribute__((__ext_vector_type__(8)));
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+vector8float flt_trunc(vector8double x) {
+  return __builtin_convertvector(x, vector8float);
+  // CHECK-LABEL: @flt_trunc
+  // CHECK: fptrunc <8 x double> %{{[^ ]}} to <8 x float>
+}
+
+vector8double flt_ext(vector8float x) {
+  return __builtin_convertvector(x, vector8double);
+  // CHECK-LABEL: @flt_ext
+  // CHECK: fpext <8 x float> %{{[^ ]}} to <8 x double>
+}
+
+vector8bool flt_tobool(vector8float x) {
+  return __builtin_convertvector(x, vector8bool);
+  // CHECK-LABEL: @flt_tobool
+  // CHECK-NOT: fptoui <8 x float> %{{[^ ]}} to <8 x i1>
+  // CHECK: fcmp une <8 x float> %{{[^ ]}}, zeroinitializer
+}
+
+vector8long flt_tosi(vector8float x) {
+  return __builtin_convertvector(x, vector8long);
+  // CHECK-LABEL: @flt_tosi
+  // CHECK: fptosi <8 x float> %{{[^ ]}} to <8 x i64>
+}
+
+vector8ulong flt_toui(vector8float x) {
+  return __builtin_convertvector(x, vector8ulong);
+  // CHECK-LABEL: @flt_toui
+  // CHECK: fptoui <8 x float> %{{[^ ]}} to <8 x i64>
+}
+
+vector8ulong fltd_toui(vector8double x) {
+  return __builtin_convertvector(x, vector8ulong);
+  // CHECK-LABEL: @fltd_toui
+  // CHECK: fptoui <8 x double> %{{[^ ]}} to <8 x i64>
+}
+
+vector8ulong int_zext(vector8ushort x) {
+  return __builtin_convertvector(x, vector8ulong);
+  // CHECK-LABEL: @int_zext
+  // CHECK: zext <8 x i16> %{{[^ ]}} to <8 x i64>
+}
+
+vector8long int_sext(vector8short x) {
+  return __builtin_convertvector(x, vector8long);
+  // CHECK-LABEL: @int_sext
+  // CHECK: sext <8 x i16> %{{[^ ]}} to <8 x i64>
+}
+
+vector8bool int_tobool(vector8short x) {
+  return __builtin_convertvector(x, vector8bool);
+  // CHECK-LABEL: @int_tobool
+  // CHECK-NOT: trunc <8 x i16> %{{[^ ]}} to <8 x i1>
+  // CHECK: icmp ne <8 x i16> %{{[^ ]}}, zeroinitializer
+}
+
+vector8float int_tofp(vector8short x) {
+  return __builtin_convertvector(x, vector8float);
+  // CHECK-LABEL: @int_tofp
+  // CHECK: sitofp <8 x i16> %{{[^ ]}} to <8 x float>
+}
+
+vector8float uint_tofp(vector8ushort x) {
+  return __builtin_convertvector(x, vector8float);
+  // CHECK-LABEL: @uint_tofp
+  // CHECK: uitofp <8 x i16> %{{[^ ]}} to <8 x float>
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#ifdef __cplusplus
+template<typename T>
+T int_toT(vector8long x) {
+  return __builtin_convertvector(x, T);
+}
+
+extern "C" {
+  vector8double int_toT_fp(vector8long x) {
+    // CHECK-LABEL: @int_toT_fp
+    // CHECK: sitofp <8 x i64> %{{[^ ]}} to <8 x double>
+    return int_toT<vector8double>(x);
+  }
+}
+#else
+vector8double int_toT_fp(vector8long x) {
+  return __builtin_convertvector(x, vector8double);
+}
+#endif
+
diff --git a/test/CodeGen/cxx-default-arg.cpp b/test/CodeGen/cxx-default-arg.cpp
index 25b7c10..12e2666 100644
--- a/test/CodeGen/cxx-default-arg.cpp
+++ b/test/CodeGen/cxx-default-arg.cpp
@@ -1,6 +1,6 @@
 // RUN: %clang_cc1 -emit-llvm %s -o %t
 
-// Note: define CLANG_GENERATE_KNOWN_GOOD and compile to generate code
+// Note-LABEL: define CLANG_GENERATE_KNOWN_GOOD and compile to generate code
 // that makes all of the defaulted arguments explicit. The resulting
 // byte code should be identical to the compilation without
 // CLANG_GENERATE_KNOWN_GOOD.
diff --git a/test/CodeGen/darwin-string-literals.c b/test/CodeGen/darwin-string-literals.c
index 968386a..c7d9ff9 100644
--- a/test/CodeGen/darwin-string-literals.c
+++ b/test/CodeGen/darwin-string-literals.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple i386-apple-darwin9 -emit-llvm %s -o - | FileCheck -check-prefix LSB %s
+// RUN: %clang_cc1 -triple i386-apple-darwin9 -emit-llvm %s -o - | FileCheck -check-prefix CHECK-LSB %s
 
 // CHECK-LSB: @.str = private unnamed_addr constant [8 x i8] c"string0\00"
 // CHECK-LSB: @.str1 = linker_private unnamed_addr constant [8 x i8] c"string1\00"
@@ -6,7 +6,7 @@
 // CHECK-LSB: @.str4 = internal unnamed_addr constant [6 x i16] [i16 116, i16 101, i16 115, i16 116, i16 8482, i16 0], align 2
 
 
-// RUN: %clang_cc1 -triple powerpc-apple-darwin9 -emit-llvm %s -o - | FileCheck -check-prefix MSB %s
+// RUN: %clang_cc1 -triple powerpc-apple-darwin9 -emit-llvm %s -o - | FileCheck -check-prefix CHECK-MSB %s
 
 // CHECK-MSB: @.str = private unnamed_addr constant [8 x i8] c"string0\00"
 // CHECK-MSB: @.str1 = linker_private unnamed_addr constant [8 x i8] c"string1\00"
diff --git a/test/CodeGen/debug-info-args.c b/test/CodeGen/debug-info-args.c
index 3312952..50b8541 100644
--- a/test/CodeGen/debug-info-args.c
+++ b/test/CodeGen/debug-info-args.c
@@ -2,7 +2,7 @@
 
 int somefunc(char *x, int y, double z) {
   
-  // CHECK: metadata ![[NUM:[^,]*]], i32 0, i32 0} ; [ DW_TAG_subroutine_type
+  // CHECK: metadata ![[NUM:[^,]*]], i32 0, null, null, null} ; [ DW_TAG_subroutine_type
   // CHECK: ![[NUM]] = {{metadata !{metadata ![^,]*, metadata ![^,]*, metadata ![^,]*, metadata ![^,]*}}}
   
   return y;
diff --git a/test/CodeGen/debug-info-block-decl.c b/test/CodeGen/debug-info-block-decl.c
new file mode 100644
index 0000000..06c0e1a
--- /dev/null
+++ b/test/CodeGen/debug-info-block-decl.c
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -g -fblocks -emit-llvm -o - %s | FileCheck %s
+// Assignment and block entry should point to the same line.
+// rdar://problem/14039866
+
+// CHECK: define{{.*}}@main()
+// CHECK: store{{.*}}bitcast{{.*}}, !dbg ![[ASSIGNMENT:[0-9]+]]
+// CHECK: define {{.*}} @__main_block_invoke
+// CHECK: dbg ![[BLOCK_ENTRY:[0-9]+]]
+
+int main()
+{
+// CHECK: [[ASSIGNMENT]] = metadata !{i32 [[@LINE+2]],
+// CHECK: [[BLOCK_ENTRY]] = metadata !{i32 [[@LINE+1]],
+    int (^blockptr)(void) = ^(void) {
+      return 0;
+    };
+    return blockptr();
+}
+
diff --git a/test/CodeGen/debug-info-enum.c b/test/CodeGen/debug-info-enum.c
deleted file mode 100644
index b4a1ce0..0000000
--- a/test/CodeGen/debug-info-enum.c
+++ /dev/null
@@ -1,11 +0,0 @@
-// RUN: %clang_cc1  -emit-llvm -g %s -o %t
-// RUN: grep DW_TAG_enumeration_type %t
-// Radar 8195980
-
-enum vtag {
-  VT_ONE
-};
-
-int foo(int i) {
-  return i == VT_ONE;
-}
diff --git a/test/CodeGen/debug-info-limited.c b/test/CodeGen/debug-info-limited.c
new file mode 100644
index 0000000..7333452
--- /dev/null
+++ b/test/CodeGen/debug-info-limited.c
@@ -0,0 +1,11 @@
+// RUN: %clang -flimit-debug-info -emit-llvm -g -S %s -o - | FileCheck %s
+
+// Ensure we emit the full definition of 'foo' even though only its declaration
+// is needed, since C has no ODR to ensure that the definition will be the same
+// in whatever TU actually uses/requires the definition of 'foo'.
+// CHECK: ; [ DW_TAG_structure_type ] [foo] {{.*}} [def]
+
+struct foo {
+};
+
+struct foo *f;
diff --git a/test/CodeGen/debug-info-version.c b/test/CodeGen/debug-info-version.c
new file mode 100644
index 0000000..3a74876
--- /dev/null
+++ b/test/CodeGen/debug-info-version.c
@@ -0,0 +1,8 @@
+// RUN: %clang -g -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang -S -emit-llvm -o - %s | FileCheck %s --check-prefix=NO_DEBUG
+int main (void) {
+  return 0;
+}
+
+// CHECK: metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+// NO_DEBUG-NOT: metadata !"Debug Info Version"
diff --git a/test/CodeGen/debug-info-vla.c b/test/CodeGen/debug-info-vla.c
index 20fb6aa..7a8da96 100644
--- a/test/CodeGen/debug-info-vla.c
+++ b/test/CodeGen/debug-info-vla.c
@@ -1,9 +1,8 @@
 // RUN: %clang_cc1 -emit-llvm -g -triple x86_64-apple-darwin %s -o - | FileCheck %s
 
-// CHECK: metadata !{i32 {{.*}}, metadata {{.*}}, metadata !"vla", metadata {{.*}}, i32 7, metadata {{.*}}, i32 0, i32 0, i64 2} ; [ DW_TAG_auto_variable ]
-
 void testVLAwithSize(int s)
 {
+// CHECK: metadata !{i32 {{.*}}, metadata {{.*}}, metadata !"vla", metadata {{.*}}, i32 [[@LINE+1]], metadata {{.*}}, i32 8192, i32 0} ; [ DW_TAG_auto_variable ] [vla] [line [[@LINE+1]]]
   int vla[s];
   int i;
   for (i = 0; i < s; i++) {
diff --git a/test/CodeGen/decl-in-prototype.c b/test/CodeGen/decl-in-prototype.c
index 2c0fc4f..15efa65 100644
--- a/test/CodeGen/decl-in-prototype.c
+++ b/test/CodeGen/decl-in-prototype.c
@@ -2,13 +2,13 @@
 
 const int AA = 5;
 
-// CHECK: define i32 @f1
+// CHECK-LABEL: define i32 @f1
 int f1(enum {AA,BB} E) {
     // CHECK: ret i32 1
     return BB;
 }
 
-// CHECK: define i32 @f2
+// CHECK-LABEL: define i32 @f2
 int f2(enum {AA=7,BB} E) {
     // CHECK: ret i32 7
     return AA;
diff --git a/test/CodeGen/dependent-lib.c b/test/CodeGen/dependent-lib.c
new file mode 100644
index 0000000..df4aaf0
--- /dev/null
+++ b/test/CodeGen/dependent-lib.c
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 %s --dependent-lib=msvcrt -triple i686-pc-win32 -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s --dependent-lib=msvcrt -triple x86_64-pc-win32 -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s --dependent-lib=msvcrt -triple i686-pc-linux -emit-llvm -o - | FileCheck -check-prefix LINUX %s
+
+// CHECK: !llvm.module.flags = !{!0}
+// CHECK: !0 = metadata !{i32 6, metadata !"Linker Options", metadata ![[link_opts:[0-9]+]]}
+// CHECK: ![[link_opts]] = metadata !{metadata ![[msvcrt:[0-9]+]]}
+// CHECK: ![[msvcrt]] = metadata !{metadata !"/DEFAULTLIB:msvcrt.lib"}
+
+// LINUX: !llvm.module.flags = !{!0}
+// LINUX: !0 = metadata !{i32 6, metadata !"Linker Options", metadata ![[link_opts:[0-9]+]]}
+// LINUX: ![[link_opts]] = metadata !{metadata ![[msvcrt:[0-9]+]]}
+// LINUX: ![[msvcrt]] = metadata !{metadata !"-lmsvcrt"}
+
+int f();
diff --git a/test/CodeGen/designated-initializers.c b/test/CodeGen/designated-initializers.c
index 6561ce5..b11c67a 100644
--- a/test/CodeGen/designated-initializers.c
+++ b/test/CodeGen/designated-initializers.c
@@ -52,6 +52,93 @@ struct ds ds7 = {
   .b = 3
 };
 
+
+// <rdar://problem/10465114>
+struct overwrite_string_struct1 {
+  __typeof(L"foo"[0]) L[6];
+  int M;
+} overwrite_string1[] = { { { L"foo" }, 1 }, [0].L[2] = L'x'};
+// CHECK: [6 x i32] [i32 102, i32 111, i32 120, i32 0, i32 0, i32 0], i32 1
+struct overwrite_string_struct2 {
+  char L[6];
+  int M;
+} overwrite_string2[] = { { { "foo" }, 1 }, [0].L[2] = 'x'};
+// CHECK: [6 x i8] c"fox\00\00\00", i32 1
+struct overwrite_string_struct3 {
+  char L[3];
+  int M;
+} overwrite_string3[] = { { { "foo" }, 1 }, [0].L[2] = 'x'};
+// CHECK: [3 x i8] c"fox", i32 1
+struct overwrite_string_struct4 {
+  char L[3];
+  int M;
+} overwrite_string4[] = { { { "foobar" }, 1 }, [0].L[2] = 'x'};
+// CHECK: [3 x i8] c"fox", i32 1
+struct overwrite_string_struct5 {
+  char L[6];
+  int M;
+} overwrite_string5[] = { { { "foo" }, 1 }, [0].L[4] = 'y'};
+// CHECK: [6 x i8] c"foo\00y\00", i32 1
+
+
+// CHECK: @u1 = {{.*}} { i32 65535 }
+union u_FFFF { char c; long l; } u1 = { .l = 0xFFFF };
+
+
+/// PR16644
+typedef union u_16644 {
+  struct s_16644 {
+    int zero;
+    int one;
+    int two;
+    int three;
+  } a;
+  int b[4];
+} union_16644_t;
+
+// CHECK: @union_16644_instance_0 = {{.*}} { i32 0, i32 0, i32 0, i32 3 } }
+union_16644_t union_16644_instance_0 =
+{
+  .b[0]    = 0,
+  .a.one   = 1,
+  .b[2]    = 2,
+  .a.three = 3,
+};
+
+// CHECK: @union_16644_instance_1 = {{.*}} [i32 10, i32 0, i32 0, i32 0]
+union_16644_t union_16644_instance_1 =
+{
+  .a.three = 13,
+  .b[2]    = 12,
+  .a.one   = 11,
+  .b[0]    = 10,
+};
+
+// CHECK: @union_16644_instance_2 = {{.*}} [i32 0, i32 20, i32 0, i32 0]
+union_16644_t union_16644_instance_2 =
+{
+  .a.one   = 21,
+  .b[1]    = 20,
+};
+
+// CHECK: @union_16644_instance_3 = {{.*}} { i32 0, i32 31, i32 0, i32 0 }
+union_16644_t union_16644_instance_3 =
+{
+  .b[1]    = 30,
+  .a = {
+    .one = 31
+  }
+};
+
+// CHECK: @union_16644_instance_4 = {{.*}} { i32 5, i32 2, i32 0, i32 0 } {{.*}} [i32 0, i32 4, i32 0, i32 0]
+union_16644_t union_16644_instance_4[2] =
+{
+  [0].a.one = 2,
+  [1].a.zero = 3,
+  [0].a.zero = 5,
+  [1].b[1] = 4
+};
+
 void test1(int argc, char **argv)
 {
   // CHECK: internal global %struct.foo { i8* null, i32 1024 }
diff --git a/test/CodeGen/dllimport-dllexport.c b/test/CodeGen/dllimport-dllexport.c
index c187503..e70ac03 100644
--- a/test/CodeGen/dllimport-dllexport.c
+++ b/test/CodeGen/dllimport-dllexport.c
@@ -2,11 +2,11 @@
 
 void __attribute__((dllimport)) foo1();
 void __attribute__((dllexport)) foo1(){}
-// CHECK: define dllexport void @foo1
+// CHECK-LABEL: define dllexport void @foo1
 void __attribute__((dllexport)) foo2();
 
 // PR6269
 __declspec(dllimport) void foo3();
 __declspec(dllexport) void foo3(){}
-// CHECK: define dllexport void @foo3
+// CHECK-LABEL: define dllexport void @foo3
 __declspec(dllexport) void foo4();
diff --git a/test/CodeGen/dwarf-version.c b/test/CodeGen/dwarf-version.c
new file mode 100644
index 0000000..6c0f097
--- /dev/null
+++ b/test/CodeGen/dwarf-version.c
@@ -0,0 +1,14 @@
+// RUN: %clang -target x86_64-linux-gnu -gdwarf-2 -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang -target x86_64-linux-gnu -gdwarf-3 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER3
+// RUN: %clang -target x86_64-linux-gnu -gdwarf-4 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER4
+// RUN: %clang -target x86_64-linux-gnu -g -S -emit-llvm -o - %s | FileCheck %s --check-prefix=LINUX
+// RUN: %clang -target x86_64-apple-darwin -g -S -emit-llvm -o - %s | FileCheck %s --check-prefix=DARWIN
+int main (void) {
+  return 0;
+}
+
+// CHECK: metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+// VER3: metadata !{i32 2, metadata !"Dwarf Version", i32 3}
+// VER4: metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+// LINUX: metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+// DARWIN: metadata !{i32 2, metadata !"Dwarf Version", i32 2}
diff --git a/test/CodeGen/exceptions-seh.c b/test/CodeGen/exceptions-seh.c
new file mode 100644
index 0000000..eadbe15
--- /dev/null
+++ b/test/CodeGen/exceptions-seh.c
@@ -0,0 +1,18 @@
+// RUN: not %clang_cc1 -triple i686-pc-win32 -fexceptions -fms-extensions -emit-llvm -o - %s 2>&1 | FileCheck %s
+
+// This is a codegen test because we only emit the diagnostic when we start
+// generating code.
+
+int SaveDiv(int numerator, int denominator, int *res) {
+  int myres = 0;
+  __try {
+    myres = numerator / denominator;
+  } __except (1) {
+    return 0;
+  }
+  *res = myres;
+  return 1;
+}
+// CHECK-NOT error
+// CHECK: error: cannot compile this SEH __try yet
+// CHECK-NOT error
diff --git a/test/CodeGen/exceptions.c b/test/CodeGen/exceptions.c
index 311bc84..ae0af4d 100644
--- a/test/CodeGen/exceptions.c
+++ b/test/CodeGen/exceptions.c
@@ -5,8 +5,8 @@
 void test1() {
   extern void test1_helper(void (^)(int));
 
-  // CHECK:     define void @test1()
-  // CHECK-ARM: define arm_aapcscc void @test1()
+  // CHECK-LABEL:     define void @test1()
+  // CHECK-ARM-LABEL: define arm_aapcscc void @test1()
 
   __block int x = 10;
 
diff --git a/test/CodeGen/exprs.c b/test/CodeGen/exprs.c
index f8f2833..2a22169 100644
--- a/test/CodeGen/exprs.c
+++ b/test/CodeGen/exprs.c
@@ -121,7 +121,7 @@ void f10() {
 }
 
 // rdar://7530813
-// CHECK: define i32 @f11
+// CHECK-LABEL: define i32 @f11
 int f11(long X) {
   int A[100];
   return A[X];
@@ -134,14 +134,14 @@ int f11(long X) {
 
 int f12() {
   // PR3150
-  // CHECK: define i32 @f12
+  // CHECK-LABEL: define i32 @f12
   // CHECK: ret i32 1
   return 1||1;
 }
 
 // Make sure negate of fp uses -0.0 for proper -0 handling.
 double f13(double X) {
-  // CHECK: define double @f13
+  // CHECK-LABEL: define double @f13
   // CHECK: fsub double -0.0
   return -X;
 }
@@ -151,7 +151,7 @@ void f14(struct s14 *a) {
   (void) &*a;
 }
 
-// CHECK: define void @f15
+// CHECK-LABEL: define void @f15
 void f15() {
   extern void f15_start(void);
   f15_start();
@@ -168,7 +168,7 @@ void f15() {
 }
 
 // PR8967: this was crashing
-// CHECK: define void @f16()
+// CHECK-LABEL: define void @f16()
 void f16() {
   __extension__({ goto lbl; });
  lbl:
@@ -176,7 +176,7 @@ void f16() {
 }
 
 // PR13704: negative increment in i128 is not preserved.
-// CHECK: define void @f17()
+// CHECK-LABEL: define void @f17()
 void f17() {
   extern void extfunc(__int128);
   __int128 x = 2;
diff --git a/test/CodeGen/ext-vector.c b/test/CodeGen/ext-vector.c
index a9fa151..0b78e97 100644
--- a/test/CodeGen/ext-vector.c
+++ b/test/CodeGen/ext-vector.c
@@ -286,3 +286,18 @@ int4 test15(uint4 V0) {
   V = V || V;
   return V;
 }
+
+// CHECK: @test16
+void test16(float2 a, float2 b) {
+  float2 t0 = (a + b) / 2;
+} 
+
+typedef char char16 __attribute__((ext_vector_type(16)));
+
+// CHECK: @test17
+void test17(void) {
+  char16 valA;
+  char valB;
+  char valC;
+  char16 destVal = valC ? valA : valB;
+}
diff --git a/test/CodeGen/fast-math.c b/test/CodeGen/fast-math.c
index 76cfbbd..4a51358 100644
--- a/test/CodeGen/fast-math.c
+++ b/test/CodeGen/fast-math.c
@@ -2,7 +2,7 @@
 float f0, f1, f2;
 
 void foo(void) {
-  // CHECK: define void @foo()
+  // CHECK-LABEL: define void @foo()
 
   // CHECK: fadd fast
   f0 = f1 + f2;
diff --git a/test/CodeGen/finite-math.c b/test/CodeGen/finite-math.c
index bf39cea..b0ee157 100644
--- a/test/CodeGen/finite-math.c
+++ b/test/CodeGen/finite-math.c
@@ -2,7 +2,7 @@
 float f0, f1, f2;
 
 void foo(void) {
-  // CHECK: define void @foo()
+  // CHECK-LABEL: define void @foo()
 
   // CHECK: fadd nnan ninf
   f0 = f1 + f2;
diff --git a/test/CodeGen/fp16-ops.c b/test/CodeGen/fp16-ops.c
index e506513..a848ed1 100644
--- a/test/CodeGen/fp16-ops.c
+++ b/test/CodeGen/fp16-ops.c
@@ -7,7 +7,7 @@ volatile __fp16 h0 = 0.0, h1 = 1.0, h2;
 volatile float f0, f1, f2;
 
 void foo(void) {
-  // CHECK: define void @foo()
+  // CHECK-LABEL: define void @foo()
 
   // Check unary ops
 
diff --git a/test/CodeGen/func-return-member.c b/test/CodeGen/func-return-member.c
index 14ecac5..efc3003 100644
--- a/test/CodeGen/func-return-member.c
+++ b/test/CodeGen/func-return-member.c
@@ -10,17 +10,17 @@ int X;
 struct frk F;
 float _Complex C;
 
-// CHECK: define void @bar
+// CHECK-LABEL: define void @bar
 void bar(void) {
   X = foo().f.f.x;
 }
 
-// CHECK: define void @bun
+// CHECK-LABEL: define void @bun
 void bun(void) {
   F = foo().f.f;
 }
 
-// CHECK: define void @ban
+// CHECK-LABEL: define void @ban
 void ban(void) {
   C = foo().f.f.c;
 }
diff --git a/test/CodeGen/function-attributes.c b/test/CodeGen/function-attributes.c
index 25ca916..47a0568 100644
--- a/test/CodeGen/function-attributes.c
+++ b/test/CodeGen/function-attributes.c
@@ -24,7 +24,7 @@ void f6(signed short x) { }
 
 void f7(unsigned short x) { }
 
-// CHECK: define void @f8()
+// CHECK-LABEL: define void @f8()
 // CHECK: [[AI:#[0-9]+]]
 // CHECK: {
 void __attribute__((always_inline)) f8(void) { }
@@ -61,7 +61,7 @@ void f13(void){}
 
 
 // Ensure that these get inlined: rdar://6853279
-// CHECK: define void @f14
+// CHECK-LABEL: define void @f14
 // CHECK-NOT: @ai_
 // CHECK: call void @f14_end
 static __inline__ __attribute__((always_inline))
@@ -81,21 +81,21 @@ void f14(int a) {
 }
 
 // <rdar://problem/7102668> [irgen] clang isn't setting the optsize bit on functions
-// CHECK: define void @f15
+// CHECK-LABEL: define void @f15
 // CHECK: [[NUW]]
 // CHECK: {
 void f15(void) {
 }
 
 // PR5254
-// CHECK: define void @f16
+// CHECK-LABEL: define void @f16
 // CHECK: [[ALIGN:#[0-9]+]]
 // CHECK: {
 void __attribute__((force_align_arg_pointer)) f16(void) {
 }
 
 // PR11038
-// CHECK: define void @f18()
+// CHECK-LABEL: define void @f18()
 // CHECK: [[RT:#[0-9]+]]
 // CHECK: {
 // CHECK: call void @f17()
@@ -106,7 +106,7 @@ __attribute__ ((returns_twice)) void f18(void) {
         f17();
 }
 
-// CHECK: define void @f19()
+// CHECK-LABEL: define void @f19()
 // CHECK: {
 // CHECK: call i32 @setjmp(i32* null)
 // CHECK: [[RT_CALL]]
diff --git a/test/CodeGen/functions.c b/test/CodeGen/functions.c
index 8241a3d..55f2d5f 100644
--- a/test/CodeGen/functions.c
+++ b/test/CodeGen/functions.c
@@ -20,14 +20,14 @@ int a(int);
 int a() {return 1;}
 
 void f0() {}
-// CHECK: define void @f0()
+// CHECK-LABEL: define void @f0()
 
 void f1();
 void f2(void) {
 // CHECK: call void @f1()
   f1(1, 2, 3);
 }
-// CHECK: define void @f1()
+// CHECK-LABEL: define void @f1()
 void f1() {}
 
 // CHECK: define {{.*}} @f3{{\(\)|\(.*sret.*\)}}
@@ -54,7 +54,7 @@ void f8_callback(struct Incomplete);
 void f8_user(void (*callback)(struct Incomplete));
 void f8_test() {
   f8_user(&f8_callback);
-// CHECK: define void @f8_test()
+// CHECK-LABEL: define void @f8_test()
 // CHECK: call void @f8_user({{.*}}* bitcast (void ()* @f8_callback to {{.*}}*))
 // CHECK: declare void @f8_user({{.*}}*)
 // CHECK: declare void @f8_callback()
diff --git a/test/CodeGen/implicit-arg.c b/test/CodeGen/implicit-arg.c
index 52ab58e..c25f034 100644
--- a/test/CodeGen/implicit-arg.c
+++ b/test/CodeGen/implicit-arg.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -emit-llvm -O0 -o -
+// RUN: %clang_cc1 %s -emit-llvm     -o -
 // RUN: %clang_cc1 %s -emit-llvm -O1 -o -
 // rdar://6518089
 
diff --git a/test/CodeGen/incomplete-function-type.c b/test/CodeGen/incomplete-function-type.c
index b630947..fc216da 100644
--- a/test/CodeGen/incomplete-function-type.c
+++ b/test/CodeGen/incomplete-function-type.c
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm -o - %s | FileCheck %s
 // CHECK: ModuleID
 // CHECK-NOT: opaque
-// CHECK: define void @f0
+// CHECK-LABEL: define void @f0
 
 enum teste1 test1f(void), (*test1)(void) = test1f;
 struct tests2 test2f(), (*test2)() = test2f;
diff --git a/test/CodeGen/inline.c b/test/CodeGen/inline.c
index 442b380..70ab696 100644
--- a/test/CodeGen/inline.c
+++ b/test/CodeGen/inline.c
@@ -1,55 +1,64 @@
 // RUN: echo "GNU89 tests:"
-// RUN: %clang %s -target i386-unknown-unknown -O1 -emit-llvm -S -o - -std=gnu89 | FileCheck %s --check-prefix=CHECK1
-// CHECK1: define i32 @foo()
-// CHECK1: define i32 @bar()
-// CHECK1: define void @unreferenced1()
+// RUN: %clang_cc1 %s -triple i386-unknown-unknown -O1 -disable-llvm-optzns -emit-llvm -o - -std=gnu89 | FileCheck %s --check-prefix=CHECK1
+// CHECK1-LABEL: define i32 @foo()
+// CHECK1-LABEL: define i32 @bar()
+// CHECK1-LABEL: define void @unreferenced1()
 // CHECK1-NOT: unreferenced2
-// CHECK1: define void @gnu_inline()
-// CHECK1: define i32 @test1
-// CHECK1: define i32 @test2
-// CHECK1: define void @test3()
-// CHECK1: define available_externally i32 @test4
-// CHECK1: define available_externally i32 @test5
-// CHECK1: define i32 @test6
-// CHECK1: define void @test7
+// CHECK1-LABEL: define void @gnu_inline()
+// CHECK1-LABEL: define i32 @test1
+// CHECK1-LABEL: define i32 @test2
+// CHECK1-LABEL: define void @test3()
+// CHECK1-LABEL: define available_externally i32 @test4
+// CHECK1-LABEL: define available_externally i32 @test5
+// CHECK1-LABEL: define i32 @test6
+// CHECK1-LABEL: define void @test7
 // CHECK1: define i{{..}} @strlcpy
 // CHECK1-NOT: test9
-// CHECK1: define void @testA
-// CHECK1: define void @testB
-// CHECK1: define void @testC
-// CHECK1: define available_externally void @gnu_ei_inline()
-// CHECK1: define available_externally i32 @ei()
+// CHECK1-LABEL: define void @testA
+// CHECK1-LABEL: define void @testB
+// CHECK1-LABEL: define void @testC
+// CHECK1-LABEL: define available_externally void @gnu_ei_inline()
+// CHECK1-LABEL: define available_externally i32 @ei()
 
 // RUN: echo "C99 tests:"
-// RUN: %clang %s -target i386-unknown-unknown -O1 -emit-llvm -S -o - -std=gnu99 | FileCheck %s --check-prefix=CHECK2
-// CHECK2: define i32 @ei()
-// CHECK2: define i32 @bar()
+// RUN: %clang_cc1 %s -triple i386-unknown-unknown -O1 -disable-llvm-optzns -emit-llvm -o - -std=gnu99 | FileCheck %s --check-prefix=CHECK2
+// CHECK2-LABEL: define i32 @ei()
+// CHECK2-LABEL: define i32 @bar()
 // CHECK2-NOT: unreferenced1
-// CHECK2: define void @unreferenced2()
-// CHECK2: define void @gnu_inline()
-// CHECK2: define i32 @test1
-// CHECK2: define i32 @test2
-// CHECK2: define void @test3
-// CHECK2: define available_externally i32 @test4
-// CHECK2: define available_externally i32 @test5
-// CHECK2: define i32 @test6
-// CHECK2: define void @test7
+// CHECK2-LABEL: define void @unreferenced2()
+// CHECK2-LABEL: define void @gnu_inline()
+// CHECK2-LABEL: define i32 @test1
+// CHECK2-LABEL: define i32 @test2
+// CHECK2-LABEL: define void @test3
+// CHECK2-LABEL: define available_externally i32 @test4
+// CHECK2-LABEL: define available_externally i32 @test5
+// CHECK2-LABEL: define i32 @test6
+// CHECK2-LABEL: define void @test7
 // CHECK2: define available_externally i{{..}} @strlcpy
-// CHECK2: define void @test9
-// CHECK2: define void @testA
-// CHECK2: define void @testB
-// CHECK2: define void @testC
-// CHECK2: define available_externally void @gnu_ei_inline()
-// CHECK2: define available_externally i32 @foo()
+// CHECK2-LABEL: define void @test9
+// CHECK2-LABEL: define void @testA
+// CHECK2-LABEL: define void @testB
+// CHECK2-LABEL: define void @testC
+// CHECK2-LABEL: define available_externally void @gnu_ei_inline()
+// CHECK2-LABEL: define available_externally i32 @foo()
 
 // RUN: echo "C++ tests:"
-// RUN: %clang -x c++ %s -target i386-unknown-unknown -O1 -emit-llvm -S -o - -std=c++98 | FileCheck %s --check-prefix=CHECK3
-// CHECK3: define i32 @_Z3barv()
-// CHECK3: define linkonce_odr i32 @_Z3foov()
+// RUN: %clang_cc1 -x c++ %s -triple i386-unknown-unknown -O1 -disable-llvm-optzns -emit-llvm -o - -std=c++98 | FileCheck %s --check-prefix=CHECK3
+// CHECK3-LABEL: define i32 @_Z3barv()
+// CHECK3-LABEL: define linkonce_odr i32 @_Z3foov()
 // CHECK3-NOT: unreferenced
-// CHECK3: define void @_Z10gnu_inlinev()
-// CHECK3: define available_externally void @_Z13gnu_ei_inlinev()
-// CHECK3: define linkonce_odr i32 @_Z2eiv()
+// CHECK3-LABEL: define void @_Z10gnu_inlinev()
+// CHECK3-LABEL: define available_externally void @_Z13gnu_ei_inlinev()
+// CHECK3-LABEL: define linkonce_odr i32 @_Z2eiv()
+
+// RUN: echo "MS C Mode tests:"
+// RUN: %clang_cc1 %s -triple i386-unknown-unknown -O1 -disable-llvm-optzns -emit-llvm -o - -std=c99 -fms-compatibility | FileCheck %s --check-prefix=CHECK4
+// CHECK4-LABEL: define i32 @bar()
+// CHECK4-LABEL: define void @gnu_inline()
+// CHECK4-LABEL: define available_externally void @gnu_ei_inline()
+// CHECK4-LABEL: define linkonce_odr i32 @foo()
+// CHECK4-NOT: unreferenced
+// CHECK4-LABEL: define linkonce_odr i32 @ei()
 
 extern __inline int ei() { return 123; }
 
diff --git a/test/CodeGen/inline2.c b/test/CodeGen/inline2.c
index fca4fff..670ae20 100644
--- a/test/CodeGen/inline2.c
+++ b/test/CodeGen/inline2.c
@@ -1,60 +1,60 @@
-// RUN: %clang_cc1 -O1 -std=gnu89 -triple i386-apple-darwin9 -emit-llvm %s -o - | FileCheck -check-prefix GNU89 %s
-// RUN: %clang_cc1 -O1 -std=c99 -triple i386-apple-darwin9 -emit-llvm %s -o - | FileCheck -check-prefix C99 %s
+// RUN: %clang_cc1 -O1 -std=gnu89 -triple i386-apple-darwin9 -emit-llvm %s -o - | FileCheck -check-prefix CHECK-GNU89 %s
+// RUN: %clang_cc1 -O1 -std=c99 -triple i386-apple-darwin9 -emit-llvm %s -o - | FileCheck -check-prefix CHECK-C99 %s
 
-// CHECK-GNU89: define i32 @f0()
-// CHECK-C99: define i32 @f0()
+// CHECK-GNU89-LABEL: define i32 @f0()
+// CHECK-C99-LABEL: define i32 @f0()
 int f0(void);
 int f0(void) { return 0; }
 
-// CHECK-GNU89: define i32 @f1()
-// CHECK-C99: define i32 @f1()
+// CHECK-GNU89-LABEL: define i32 @f1()
+// CHECK-C99-LABEL: define i32 @f1()
 inline int f1(void);
 int f1(void) { return 0; }
 
-// CHECK-GNU89: define i32 @f2()
-// CHECK-C99: define i32 @f2()
+// CHECK-GNU89-LABEL: define i32 @f2()
+// CHECK-C99-LABEL: define i32 @f2()
 int f2(void);
 inline int f2(void) { return 0; }
 
-// CHECK-GNU89: define i32 @f3()
-// CHECK-C99: define i32 @f3()
+// CHECK-GNU89-LABEL: define i32 @f3()
+// CHECK-C99-LABEL: define i32 @f3()
 extern inline int f3(void);
 int f3(void) { return 0; }
 
-// CHECK-GNU89: define i32 @f5()
-// CHECK-C99: define i32 @f5()
+// CHECK-GNU89-LABEL: define i32 @f5()
+// CHECK-C99-LABEL: define i32 @f5()
 extern inline int f5(void);
 inline int f5(void) { return 0; }
 
-// CHECK-GNU89: define i32 @f6()
-// CHECK-C99: define i32 @f6()
+// CHECK-GNU89-LABEL: define i32 @f6()
+// CHECK-C99-LABEL: define i32 @f6()
 inline int f6(void);
 extern inline int f6(void) { return 0; }
 
-// CHECK-GNU89: define i32 @f7()
-// CHECK-C99: define i32 @f7()
+// CHECK-GNU89-LABEL: define i32 @f7()
+// CHECK-C99-LABEL: define i32 @f7()
 extern inline int f7(void);
 extern int f7(void) { return 0; }
 
-// CHECK-GNU89: define i32 @fA()
+// CHECK-GNU89-LABEL: define i32 @fA()
 inline int fA(void) { return 0; }
 
-// CHECK-GNU89: define available_externally i32 @f4()
-// CHECK-C99: define i32 @f4()
+// CHECK-GNU89-LABEL: define available_externally i32 @f4()
+// CHECK-C99-LABEL: define i32 @f4()
 int f4(void);
 extern inline int f4(void) { return 0; }
 
-// CHECK-GNU89: define available_externally i32 @f8()
-// CHECK-C99: define i32 @f8()
+// CHECK-GNU89-LABEL: define available_externally i32 @f8()
+// CHECK-C99-LABEL: define i32 @f8()
 extern int f8(void);
 extern inline int f8(void) { return 0; }
 
-// CHECK-GNU89: define available_externally i32 @f9()
-// CHECK-C99: define i32 @f9()
+// CHECK-GNU89-LABEL: define available_externally i32 @f9()
+// CHECK-C99-LABEL: define i32 @f9()
 extern inline int f9(void);
 extern inline int f9(void) { return 0; }
 
-// CHECK-C99: define available_externally i32 @fA()
+// CHECK-C99-LABEL: define available_externally i32 @fA()
 
 int test_all() { 
   return f0() + f1() + f2() + f3() + f4() + f5() + f6() + f7() + f8() + f9() 
diff --git a/test/CodeGen/integer-overflow.c b/test/CodeGen/integer-overflow.c
index ed2dede..a007960 100644
--- a/test/CodeGen/integer-overflow.c
+++ b/test/CodeGen/integer-overflow.c
@@ -8,9 +8,9 @@
 // Tests for signed integer overflow stuff.
 // rdar://7432000 rdar://7221421
 void test1() {
-  // DEFAULT: define void @test1
-  // WRAPV: define void @test1
-  // TRAPV: define void @test1
+  // DEFAULT-LABEL: define void @test1
+  // WRAPV-LABEL: define void @test1
+  // TRAPV-LABEL: define void @test1
   extern volatile int f11G, a, b;
   
   // DEFAULT: add nsw i32
diff --git a/test/CodeGen/le32-arguments.c b/test/CodeGen/le32-arguments.c
index 2cbbc0f..d26640e 100644
--- a/test/CodeGen/le32-arguments.c
+++ b/test/CodeGen/le32-arguments.c
@@ -2,7 +2,7 @@
 
 // Basic argument/attribute tests for le32/PNaCl
 
-// CHECK: define void @f0(i32 %i, i32 %j, double %k)
+// CHECK-LABEL: define void @f0(i32 %i, i32 %j, double %k)
 void f0(int i, long j, double k) {}
 
 typedef struct {
@@ -10,27 +10,27 @@ typedef struct {
   int bb;
 } s1;
 // Structs should be passed byval and not split up
-// CHECK: define void @f1(%struct.s1* byval %i)
+// CHECK-LABEL: define void @f1(%struct.s1* byval %i)
 void f1(s1 i) {}
 
 typedef struct {
   int cc;
 } s2;
 // Structs should be returned sret and not simplified by the frontend
-// CHECK: define void @f2(%struct.s2* noalias sret %agg.result)
+// CHECK-LABEL: define void @f2(%struct.s2* noalias sret %agg.result)
 s2 f2() {
   s2 foo;
   return foo;
 }
 
-// CHECK: define void @f3(i64 %i)
+// CHECK-LABEL: define void @f3(i64 %i)
 void f3(long long i) {}
 
 // i8/i16 should be signext, i32 and higher should not
-// CHECK: define void @f4(i8 signext %a, i16 signext %b)
+// CHECK-LABEL: define void @f4(i8 signext %a, i16 signext %b)
 void f4(char a, short b) {}
 
-// CHECK: define void @f5(i8 zeroext %a, i16 zeroext %b)
+// CHECK-LABEL: define void @f5(i8 zeroext %a, i16 zeroext %b)
 void f5(unsigned char a, unsigned short b) {}
 
 
@@ -40,7 +40,7 @@ enum my_enum {
   ENUM3,
 };
 // Enums should be treated as the underlying i32
-// CHECK: define void @f6(i32 %a)
+// CHECK-LABEL: define void @f6(i32 %a)
 void f6(enum my_enum a) {}
 
 union simple_union {
@@ -48,7 +48,7 @@ union simple_union {
   char b;
 };
 // Unions should be passed as byval structs
-// CHECK: define void @f7(%union.simple_union* byval %s)
+// CHECK-LABEL: define void @f7(%union.simple_union* byval %s)
 void f7(union simple_union s) {}
 
 typedef struct {
@@ -57,5 +57,5 @@ typedef struct {
   int b8 : 8;
 } bitfield1;
 // Bitfields should be passed as byval structs
-// CHECK: define void @f8(%struct.bitfield1* byval %bf1)
+// CHECK-LABEL: define void @f8(%struct.bitfield1* byval %bf1)
 void f8(bitfield1 bf1) {}
diff --git a/test/CodeGen/le32-libcall-pow.c b/test/CodeGen/le32-libcall-pow.c
new file mode 100644
index 0000000..1b8a7a3
--- /dev/null
+++ b/test/CodeGen/le32-libcall-pow.c
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -fno-math-builtin -fmath-errno -emit-llvm -o - %s -triple le32-unknown-nacl | FileCheck %s
+// RUN: %clang_cc1 -fno-math-builtin -emit-llvm -o - %s -triple le32-unknown-nacl | FileCheck %s
+
+// le32 (PNaCl) never generates intrinsics for pow calls, with or without
+// errno, when the -fno-math-builtin flag is passed to -cc1. A separate test
+// makes sure this flag is indeed passed for le32.
+
+float powf(float, float);
+double pow(double, double);
+long double powl(long double, long double);
+
+// CHECK-LABEL: define void @test_pow
+void test_pow(float a0, double a1, long double a2) {
+  // CHECK: call float @powf
+  float l0 = powf(a0, a0);
+
+  // CHECK: call double @pow
+  double l1 = pow(a1, a1);
+
+  // CHECK: call double @powl
+  long double l2 = powl(a2, a2);
+}
+
+// CHECK: declare float @powf(float, float)
+// CHECK: declare double @pow(double, double)
+// CHECK: declare double @powl(double, double)
+
diff --git a/test/CodeGen/libcall-declarations.c b/test/CodeGen/libcall-declarations.c
index d07590f..6442e29 100644
--- a/test/CodeGen/libcall-declarations.c
+++ b/test/CodeGen/libcall-declarations.c
@@ -1,194 +1,596 @@
 // RUN: %clang_cc1 -triple x86_64-apple-darwin12 -S -o - -emit-llvm %s | FileCheck %s -check-prefix=CHECK-NOERRNO
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -S -o - -emit-llvm -fmath-errno %s | FileCheck %s -check-prefix=CHECK-ERRNO
+// RUN: %clang_cc1 -triple x86_64-apple-darwin12 -S -o - -emit-llvm -x c++ %s | FileCheck %s -check-prefix=CHECK-NOERRNO
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -S -o - -emit-llvm -x c++ -fmath-errno %s | FileCheck %s -check-prefix=CHECK-ERRNO
 
 // Prototypes.
+#ifdef __cplusplus
+extern "C" {
+#endif
+double atan2(double, double);
+float atan2f(float, float);
+long double atan2l(long double, long double);
+int abs(int);
+long int labs(long int);
+long long int llabs(long long int);
+double copysign(double, double);
+float copysignf(float, float);
+long double copysignl(long double, long double);
+double fabs(double);
+float fabsf(float);
+long double fabsl(long double);
+double fmod(double, double);
+float fmodf(float, float);
+long double fmodl(long double, long double);
+double frexp(double, int *);
+float frexpf(float, int *);
+long double frexpl(long double, int *);
+double ldexp(double, int);
+float ldexpf(float, int);
+long double ldexpl(long double, int);
+double modf(double, double *);
+float modff(float, float *);
+long double modfl(long double, long double *);
+double nan(const char *);
+float nanf(const char *);
+long double nanl(const char *);
+double pow(double, double);
+float powf(float, float);
+long double powl(long double, long double);
 double acos(double);
-long double acosl(long double);
 float acosf(float);
+long double acosl(long double);
+double acosh(double);
+float acoshf(float);
+long double acoshl(long double);
 double asin(double);
-long double asinl(long double);
 float asinf(float);
+long double asinl(long double);
+double asinh(double);
+float asinhf(float);
+long double asinhl(long double);
 double atan(double);
-long double atanl(long double);
 float atanf(float);
-double atan2(double, double);
-long double atan2l(long double, long double);
-float atan2f(float, float);
+long double atanl( long double);
+double atanh(double);
+float atanhf(float);
+long double atanhl(long double);
+double cbrt(double);
+float cbrtf(float);
+long double cbrtl(long double);
 double ceil(double);
-long double ceill(long double);
 float ceilf(float);
-double copysign(double, double);
-long double copysignl(long double, long double);
-float copysignf(float, float);
+long double ceill(long double);
 double cos(double);
-long double cosl(long double);
 float cosf(float);
+long double cosl(long double);
+double cosh(double);
+float coshf(float);
+long double coshl(long double);
+double erf(double);
+float erff(float);
+long double erfl(long double);
+double erfc(double);
+float erfcf(float);
+long double erfcl(long double);
 double exp(double);
-long double expl(long double);
 float expf(float);
+long double expl(long double);
 double exp2(double);
-long double exp2l(long double);
 float exp2f(float);
-double fabs(double);
-long double fabsl(long double);
-float fabsf(float);
+long double exp2l(long double);
+double expm1(double);
+float expm1f(float);
+long double expm1l(long double);
+double fdim(double, double);
+float fdimf(float, float);
+long double fdiml(long double, long double);
 double floor(double);
-long double floorl(long double);
 float floorf(float);
+long double floorl(long double);
 double fma(double, double, double);
-long double fmal(long double, long double, long double);
 float fmaf(float, float, float);
+long double fmal(long double, long double, long double);
 double fmax(double, double);
-long double fmaxl(long double, long double);
 float fmaxf(float, float);
+long double fmaxl(long double, long double);
 double fmin(double, double);
-long double fminl(long double, long double);
 float fminf(float, float);
+long double fminl(long double, long double);
+double hypot(double, double);
+float hypotf(float, float);
+long double hypotl(long double, long double);
+int ilogb(double);
+int ilogbf(float);
+int ilogbl(long double);
+double lgamma(double);
+float lgammaf(float);
+long double lgammal(long double);
+long long int llrint(double);
+long long int llrintf(float);
+long long int llrintl(long double);
+long long int llround(double);
+long long int llroundf(float);
+long long int llroundl(long double);
 double log(double);
-long double logl(long double);
 float logf(float);
+long double logl(long double);
+double log10(double);
+float log10f(float);
+long double log10l(long double);
+double log1p(double);
+float log1pf(float);
+long double log1pl(long double);
 double log2(double);
-long double log2l(long double);
 float log2f(float);
+long double log2l(long double);
+double logb(double);
+float logbf(float);
+long double logbl(long double);
+long int lrint(double);
+long int lrintf(float);
+long int lrintl(long double);
+long int lround(double);
+long int lroundf(float);
+long int lroundl(long double);
 double nearbyint(double);
-long double nearbyintl(long double);
 float nearbyintf(float);
-double pow(double, double);
-long double powl(long double, long double);
-float powf(float, float);
+long double nearbyintl(long double);
+double nextafter(double, double);
+float nextafterf(float, float);
+long double nextafterl(long double, long double);
+double nexttoward(double, long double);
+float nexttowardf(float, long double);
+long double nexttowardl(long double, long double);
+double remainder(double, double);
+float remainderf(float, float);
+long double remainderl(long double, long double);
 double rint(double);
-long double rintl(long double);
 float rintf(float);
+long double rintl(long double);
 double round(double);
-long double roundl(long double);
 float roundf(float);
+long double roundl(long double);
+double scalbln(double, long int exp);
+float scalblnf(float, long int exp);
+long double scalblnl(long double, long int exp);
+double scalbn(double, int exp);
+float scalbnf(float, int exp);
+long double scalbnl(long double, int exp);
 double sin(double);
-long double sinl(long double);
 float sinf(float);
+long double sinl(long double);
+double sinh(double);
+float sinhf(float);
+long double sinhl(long double);
 double sqrt(double);
-long double sqrtl(long double);
 float sqrtf(float);
+long double sqrtl(long double);
 double tan(double);
-long double tanl(long double);
 float tanf(float);
+long double tanl(long double);
+double tanh(double);
+float tanhf(float);
+long double tanhl(long double);
+double tgamma(double);
+float tgammaf(float);
+long double tgammal(long double);
 double trunc(double);
-long double truncl(long double);
 float truncf(float);
+long double truncl(long double);
+double cabs(double _Complex);
+float cabsf(float _Complex);
+long double cabsl(long double _Complex);
+double _Complex cacos(double _Complex);
+float _Complex cacosf(float _Complex);
+long double _Complex cacosl(long double _Complex);
+double _Complex cacosh(double _Complex);
+float _Complex cacoshf(float _Complex);
+long double _Complex cacoshl(long double _Complex);
+double carg(double _Complex);
+float cargf(float _Complex);
+long double cargl(long double _Complex);
+double _Complex casin(double _Complex);
+float _Complex casinf(float _Complex);
+long double _Complex casinl(long double _Complex);
+double _Complex casinh(double _Complex);
+float _Complex casinhf(float _Complex);
+long double _Complex casinhl(long double _Complex);
+double _Complex catan(double _Complex);
+float _Complex catanf(float _Complex);
+long double _Complex catanl(long double _Complex);
+double _Complex catanh(double _Complex);
+float _Complex catanhf(float _Complex);
+long double _Complex catanhl(long double _Complex);
+double _Complex ccos(double _Complex);
+float _Complex ccosf(float _Complex);
+long double _Complex ccosl(long double _Complex);
+double _Complex ccosh(double _Complex);
+float _Complex ccoshf(float _Complex);
+long double _Complex ccoshl(long double _Complex);
+double _Complex cexp(double _Complex);
+float _Complex cexpf(float _Complex);
+long double _Complex cexpl(long double _Complex);
+double cimag(double _Complex);
+float cimagf(float _Complex);
+long double cimagl(long double _Complex);
+double _Complex conj(double _Complex);
+float _Complex conjf(float _Complex);
+long double _Complex conjl(long double _Complex);
+double _Complex clog(double _Complex);
+float _Complex clogf(float _Complex);
+long double _Complex clogl(long double _Complex);
+double _Complex cproj(double _Complex);
+float _Complex cprojf(float _Complex);
+long double _Complex cprojl(long double _Complex);
+double _Complex cpow(double _Complex, _Complex double);
+float _Complex cpowf(float _Complex, _Complex float);
+long double _Complex cpowl(long double _Complex, _Complex long double);
+double creal(double _Complex);
+float crealf(float _Complex);
+long double creall(long double _Complex);
+double _Complex csin(double _Complex);
+float _Complex csinf(float _Complex);
+long double _Complex csinl(long double _Complex);
+double _Complex csinh(double _Complex);
+float _Complex csinhf(float _Complex);
+long double _Complex csinhl(long double _Complex);
+double _Complex csqrt(double _Complex);
+float _Complex csqrtf(float _Complex);
+long double _Complex csqrtl(long double _Complex);
+double _Complex ctan(double _Complex);
+float _Complex ctanf(float _Complex);
+long double _Complex ctanl(long double _Complex);
+double _Complex ctanh(double _Complex);
+float _Complex ctanhf(float _Complex);
+long double _Complex ctanhl(long double _Complex);
+#ifdef __cplusplus
+}
+#endif
 
 // Force emission of the declare statements.
+#define F(x) ((void*)x)
 void *use[] = {
-  acos, acosl, acosf, asin, asinl, asinf, atan, atanl, atanf, atan2, atan2l,
-  atan2f, ceil, ceill, ceilf, copysign, copysignl, copysignf, cos, cosl, cosf,
-  exp, expl, expf, exp2, exp2l, exp2f, fabs, fabsl, fabsf, floor, floorl,
-  floorf, fma, fmal, fmaf, fmax, fmaxl, fmaxf, fmin, fminl, fminf, log, logl,
-  logf, log2, log2l, log2f, nearbyint, nearbyintl, nearbyintf, pow, powl, powf,
-  rint, rintl, rintf, round, roundl, roundf, sin, sinl, sinf, sqrt, sqrtl,
-  sqrtf, tan, tanl, tanf, trunc, truncl, truncf
+  F(atan2),      F(atan2f),     F(atan2l),     F(abs),         F(labs),
+  F(llabs),      F(copysign),   F(copysignf),  F(copysignl),   F(fabs),
+  F(fabsf),      F(fabsl),      F(fmod),       F(fmodf),       F(fmodl),
+  F(frexp),      F(frexpf),     F(frexpl),     F(ldexp),       F(ldexpf),
+  F(ldexpl),     F(modf),       F(modff),      F(modfl),       F(nan),
+  F(nanf),       F(nanl),       F(pow),        F(powf),        F(powl),
+  F(acos),       F(acosf),      F(acosl),      F(acosh),       F(acoshf),
+  F(acoshl),     F(asin),       F(asinf),      F(asinl),       F(asinh),
+  F(asinhf),     F(asinhl),     F(atan),       F(atanf),       F(atanl),
+  F(atanh),      F(atanhf),     F(atanhl),     F(cbrt),        F(cbrtf),
+  F(cbrtl),      F(ceil),       F(ceilf),      F(ceill),       F(cos),
+  F(cosf),       F(cosl),       F(cosh),       F(coshf),       F(coshl),
+  F(erf),        F(erff),       F(erfl),       F(erfc),        F(erfcf),
+  F(erfcl),      F(exp),        F(expf),       F(expl),        F(exp2),
+  F(exp2f),      F(exp2l),      F(expm1),      F(expm1f),      F(expm1l),
+  F(fdim),       F(fdimf),      F(fdiml),      F(floor),       F(floorf),
+  F(floorl),     F(fma),        F(fmaf),       F(fmal),        F(fmax),
+  F(fmaxf),      F(fmaxl),      F(fmin),       F(fminf),       F(fminl),
+  F(hypot),      F(hypotf),     F(hypotl),     F(ilogb),       F(ilogbf),
+  F(ilogbl),     F(lgamma),     F(lgammaf),    F(lgammal),     F(llrint),
+  F(llrintf),    F(llrintl),    F(llround),    F(llroundf),    F(llroundl),
+  F(log),        F(logf),       F(logl),       F(log10),       F(log10f),
+  F(log10l),     F(log1p),      F(log1pf),     F(log1pl),      F(log2),
+  F(log2f),      F(log2l),      F(logb),       F(logbf),       F(logbl),
+  F(lrint),      F(lrintf),     F(lrintl),     F(lround),      F(lroundf),
+  F(lroundl),    F(nearbyint),  F(nearbyintf), F(nearbyintl),  F(nextafter),
+  F(nextafterf), F(nextafterl), F(nexttoward), F(nexttowardf), F(nexttowardl),
+  F(remainder),  F(remainderf), F(remainderl), F(rint),        F(rintf),
+  F(rintl),      F(round),      F(roundf),     F(roundl),      F(scalbln),
+  F(scalblnf),   F(scalblnl),   F(scalbn),     F(scalbnf),     F(scalbnl),
+  F(sin),        F(sinf),       F(sinl),       F(sinh),        F(sinhf),
+  F(sinhl),      F(sqrt),       F(sqrtf),      F(sqrtl),       F(tan),
+  F(tanf),       F(tanl),       F(tanh),       F(tanhf),       F(tanhl),
+  F(tgamma),     F(tgammaf),    F(tgammal),    F(trunc),       F(truncf),
+  F(truncl),     F(cabs),       F(cabsf),      F(cabsl),       F(cacos),
+  F(cacosf),     F(cacosl),     F(cacosh),     F(cacoshf),     F(cacoshl),
+  F(carg),       F(cargf),      F(cargl),      F(casin),       F(casinf),
+  F(casinl),     F(casinh),     F(casinhf),    F(casinhl),     F(catan),
+  F(catanf),     F(catanl),     F(catanh),     F(catanhf),     F(catanhl),
+  F(ccos),       F(ccosf),      F(ccosl),      F(ccosh),       F(ccoshf),
+  F(ccoshl),     F(cexp),       F(cexpf),      F(cexpl),       F(cimag),
+  F(cimagf),     F(cimagl),     F(conj),       F(conjf),       F(conjl),
+  F(clog),       F(clogf),      F(clogl),      F(cproj),       F(cprojf),
+  F(cprojl),     F(cpow),       F(cpowf),      F(cpowl),       F(creal),
+  F(crealf),     F(creall),     F(csin),       F(csinf),       F(csinl),
+  F(csinh),      F(csinhf),     F(csinhl),     F(csqrt),       F(csqrtf),
+  F(csqrtl),     F(ctan),       F(ctanf),      F(ctanl),       F(ctanh),
+  F(ctanhf),     F(ctanhl)
 };
 
-// CHECK-NOERRNO: declare double @acos(double) [[NUW:#[0-9]+]]
-// CHECK-NOERRNO: declare x86_fp80 @acosl(x86_fp80) [[NUW]]
+// CHECK-NOERRNO: declare double @atan2(double, double) [[NUW:#[0-9]+]]
+// CHECK-NOERRNO: declare float @atan2f(float, float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @atan2l(x86_fp80, x86_fp80) [[NUW]]
+// CHECK-NOERRNO: declare i32 @abs(i32) [[NUW]]
+// CHECK-NOERRNO: declare i64 @labs(i64) [[NUW]]
+// CHECK-NOERRNO: declare i64 @llabs(i64) [[NUW]]
+// CHECK-NOERRNO: declare double @copysign(double, double) [[NUW]]
+// CHECK-NOERRNO: declare float @copysignf(float, float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @copysignl(x86_fp80, x86_fp80) [[NUW]]
+// CHECK-NOERRNO: declare double @fabs(double) [[NUW]]
+// CHECK-NOERRNO: declare float @fabsf(float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @fabsl(x86_fp80) [[NUW]]
+// CHECK-NOERRNO: declare double @fmod(double, double) [[NUW]]
+// CHECK-NOERRNO: declare float @fmodf(float, float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @fmodl(x86_fp80, x86_fp80) [[NUW]]
+// CHECK-NOERRNO: declare double @ldexp(double, i32) [[NUW]]
+// CHECK-NOERRNO: declare float @ldexpf(float, i32) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @ldexpl(x86_fp80, i32) [[NUW]]
+// CHECK-NOERRNO: declare double @nan(i8*) [[NUW]]
+// CHECK-NOERRNO: declare float @nanf(i8*) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @nanl(i8*) [[NUW]]
+// CHECK-NOERRNO: declare double @pow(double, double) [[NUW]]
+// CHECK-NOERRNO: declare float @powf(float, float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @powl(x86_fp80, x86_fp80) [[NUW]]
+// CHECK-NOERRNO: declare double @acos(double) [[NUW]]
 // CHECK-NOERRNO: declare float @acosf(float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @acosl(x86_fp80) [[NUW]]
+// CHECK-NOERRNO: declare double @acosh(double) [[NUW]]
+// CHECK-NOERRNO: declare float @acoshf(float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @acoshl(x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare double @asin(double) [[NUW]]
-// CHECK-NOERRNO: declare x86_fp80 @asinl(x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare float @asinf(float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @asinl(x86_fp80) [[NUW]]
+// CHECK-NOERRNO: declare double @asinh(double) [[NUW]]
+// CHECK-NOERRNO: declare float @asinhf(float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @asinhl(x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare double @atan(double) [[NUW]]
-// CHECK-NOERRNO: declare x86_fp80 @atanl(x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare float @atanf(float) [[NUW]]
-// CHECK-NOERRNO: declare double @atan2(double, double) [[NUW]]
-// CHECK-NOERRNO: declare x86_fp80 @atan2l(x86_fp80, x86_fp80) [[NUW]]
-// CHECK-NOERRNO: declare float @atan2f(float, float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @atanl(x86_fp80) [[NUW]]
+// CHECK-NOERRNO: declare double @atanh(double) [[NUW]]
+// CHECK-NOERRNO: declare float @atanhf(float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @atanhl(x86_fp80) [[NUW]]
+// CHECK-NOERRNO: declare double @cbrt(double) [[NUW]]
+// CHECK-NOERRNO: declare float @cbrtf(float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @cbrtl(x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare double @ceil(double) [[NUW]]
-// CHECK-NOERRNO: declare x86_fp80 @ceill(x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare float @ceilf(float) [[NUW]]
-// CHECK-NOERRNO: declare double @copysign(double, double) [[NUW]]
-// CHECK-NOERRNO: declare x86_fp80 @copysignl(x86_fp80, x86_fp80) [[NUW]]
-// CHECK-NOERRNO: declare float @copysignf(float, float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @ceill(x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare double @cos(double) [[NUW]]
-// CHECK-NOERRNO: declare x86_fp80 @cosl(x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare float @cosf(float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @cosl(x86_fp80) [[NUW]]
+// CHECK-NOERRNO: declare double @cosh(double) [[NUW]]
+// CHECK-NOERRNO: declare float @coshf(float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @coshl(x86_fp80) [[NUW]]
+// CHECK-NOERRNO: declare double @erf(double) [[NUW]]
+// CHECK-NOERRNO: declare float @erff(float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @erfl(x86_fp80) [[NUW]]
+// CHECK-NOERRNO: declare double @erfc(double) [[NUW]]
+// CHECK-NOERRNO: declare float @erfcf(float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @erfcl(x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare double @exp(double) [[NUW]]
-// CHECK-NOERRNO: declare x86_fp80 @expl(x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare float @expf(float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @expl(x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare double @exp2(double) [[NUW]]
-// CHECK-NOERRNO: declare x86_fp80 @exp2l(x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare float @exp2f(float) [[NUW]]
-// CHECK-NOERRNO: declare double @fabs(double) [[NUW]]
-// CHECK-NOERRNO: declare x86_fp80 @fabsl(x86_fp80) [[NUW]]
-// CHECK-NOERRNO: declare float @fabsf(float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @exp2l(x86_fp80) [[NUW]]
+// CHECK-NOERRNO: declare double @expm1(double) [[NUW]]
+// CHECK-NOERRNO: declare float @expm1f(float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @expm1l(x86_fp80) [[NUW]]
+// CHECK-NOERRNO: declare double @fdim(double, double) [[NUW]]
+// CHECK-NOERRNO: declare float @fdimf(float, float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @fdiml(x86_fp80, x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare double @floor(double) [[NUW]]
-// CHECK-NOERRNO: declare x86_fp80 @floorl(x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare float @floorf(float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @floorl(x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare double @fma(double, double, double) [[NUW]]
-// CHECK-NOERRNO: declare x86_fp80 @fmal(x86_fp80, x86_fp80, x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare float @fmaf(float, float, float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @fmal(x86_fp80, x86_fp80, x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare double @fmax(double, double) [[NUW]]
-// CHECK-NOERRNO: declare x86_fp80 @fmaxl(x86_fp80, x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare float @fmaxf(float, float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @fmaxl(x86_fp80, x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare double @fmin(double, double) [[NUW]]
-// CHECK-NOERRNO: declare x86_fp80 @fminl(x86_fp80, x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare float @fminf(float, float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @fminl(x86_fp80, x86_fp80) [[NUW]]
+// CHECK-NOERRNO: declare double @hypot(double, double) [[NUW]]
+// CHECK-NOERRNO: declare float @hypotf(float, float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @hypotl(x86_fp80, x86_fp80) [[NUW]]
+// CHECK-NOERRNO: declare i32 @ilogb(double) [[NUW]]
+// CHECK-NOERRNO: declare i32 @ilogbf(float) [[NUW]]
+// CHECK-NOERRNO: declare i32 @ilogbl(x86_fp80) [[NUW]]
+// CHECK-NOERRNO: declare double @lgamma(double) [[NUW]]
+// CHECK-NOERRNO: declare float @lgammaf(float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @lgammal(x86_fp80) [[NUW]]
+// CHECK-NOERRNO: declare i64 @llrint(double) [[NUW]]
+// CHECK-NOERRNO: declare i64 @llrintf(float) [[NUW]]
+// CHECK-NOERRNO: declare i64 @llrintl(x86_fp80) [[NUW]]
+// CHECK-NOERRNO: declare i64 @llround(double) [[NUW]]
+// CHECK-NOERRNO: declare i64 @llroundf(float) [[NUW]]
+// CHECK-NOERRNO: declare i64 @llroundl(x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare double @log(double) [[NUW]]
-// CHECK-NOERRNO: declare x86_fp80 @logl(x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare float @logf(float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @logl(x86_fp80) [[NUW]]
+// CHECK-NOERRNO: declare double @log10(double) [[NUW]]
+// CHECK-NOERRNO: declare float @log10f(float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @log10l(x86_fp80) [[NUW]]
+// CHECK-NOERRNO: declare double @log1p(double) [[NUW]]
+// CHECK-NOERRNO: declare float @log1pf(float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @log1pl(x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare double @log2(double) [[NUW]]
-// CHECK-NOERRNO: declare x86_fp80 @log2l(x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare float @log2f(float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @log2l(x86_fp80) [[NUW]]
+// CHECK-NOERRNO: declare double @logb(double) [[NUW]]
+// CHECK-NOERRNO: declare float @logbf(float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @logbl(x86_fp80) [[NUW]]
+// CHECK-NOERRNO: declare i64 @lrint(double) [[NUW]]
+// CHECK-NOERRNO: declare i64 @lrintf(float) [[NUW]]
+// CHECK-NOERRNO: declare i64 @lrintl(x86_fp80) [[NUW]]
+// CHECK-NOERRNO: declare i64 @lround(double) [[NUW]]
+// CHECK-NOERRNO: declare i64 @lroundf(float) [[NUW]]
+// CHECK-NOERRNO: declare i64 @lroundl(x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare double @nearbyint(double) [[NUW]]
-// CHECK-NOERRNO: declare x86_fp80 @nearbyintl(x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare float @nearbyintf(float) [[NUW]]
-// CHECK-NOERRNO: declare double @pow(double, double) [[NUW]]
-// CHECK-NOERRNO: declare x86_fp80 @powl(x86_fp80, x86_fp80) [[NUW]]
-// CHECK-NOERRNO: declare float @powf(float, float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @nearbyintl(x86_fp80) [[NUW]]
+// CHECK-NOERRNO: declare double @nextafter(double, double) [[NUW]]
+// CHECK-NOERRNO: declare float @nextafterf(float, float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @nextafterl(x86_fp80, x86_fp80) [[NUW]]
+// CHECK-NOERRNO: declare double @nexttoward(double, x86_fp80) [[NUW]]
+// CHECK-NOERRNO: declare float @nexttowardf(float, x86_fp80) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @nexttowardl(x86_fp80, x86_fp80) [[NUW]]
+// CHECK-NOERRNO: declare double @remainder(double, double) [[NUW]]
+// CHECK-NOERRNO: declare float @remainderf(float, float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @remainderl(x86_fp80, x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare double @rint(double) [[NUW]]
-// CHECK-NOERRNO: declare x86_fp80 @rintl(x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare float @rintf(float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @rintl(x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare double @round(double) [[NUW]]
-// CHECK-NOERRNO: declare x86_fp80 @roundl(x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare float @roundf(float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @roundl(x86_fp80) [[NUW]]
+// CHECK-NOERRNO: declare double @scalbln(double, i64) [[NUW]]
+// CHECK-NOERRNO: declare float @scalblnf(float, i64) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @scalblnl(x86_fp80, i64) [[NUW]]
+// CHECK-NOERRNO: declare double @scalbn(double, i32) [[NUW]]
+// CHECK-NOERRNO: declare float @scalbnf(float, i32) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @scalbnl(x86_fp80, i32) [[NUW]]
 // CHECK-NOERRNO: declare double @sin(double) [[NUW]]
-// CHECK-NOERRNO: declare x86_fp80 @sinl(x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare float @sinf(float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @sinl(x86_fp80) [[NUW]]
+// CHECK-NOERRNO: declare double @sinh(double) [[NUW]]
+// CHECK-NOERRNO: declare float @sinhf(float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @sinhl(x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare double @sqrt(double) [[NUW]]
-// CHECK-NOERRNO: declare x86_fp80 @sqrtl(x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare float @sqrtf(float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @sqrtl(x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare double @tan(double) [[NUW]]
-// CHECK-NOERRNO: declare x86_fp80 @tanl(x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare float @tanf(float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @tanl(x86_fp80) [[NUW]]
+// CHECK-NOERRNO: declare double @tanh(double) [[NUW]]
+// CHECK-NOERRNO: declare float @tanhf(float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @tanhl(x86_fp80) [[NUW]]
+// CHECK-NOERRNO: declare double @tgamma(double) [[NUW]]
+// CHECK-NOERRNO: declare float @tgammaf(float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @tgammal(x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare double @trunc(double) [[NUW]]
-// CHECK-NOERRNO: declare x86_fp80 @truncl(x86_fp80) [[NUW]]
 // CHECK-NOERRNO: declare float @truncf(float) [[NUW]]
+// CHECK-NOERRNO: declare x86_fp80 @truncl(x86_fp80) [[NUW]]
+// CHECK-NOERRNO: declare double @cabs(double, double) [[NUW]]
+// CHECK-NOERRNO: declare float @cabsf(<2 x float>) [[NUW]]
+// CHECK-NOERRNO: declare { double, double } @cacos(double, double) [[NUW]]
+// CHECK-NOERRNO: declare <2 x float> @cacosf(<2 x float>) [[NUW]]
+// CHECK-NOERRNO: declare { double, double } @cacosh(double, double) [[NUW]]
+// CHECK-NOERRNO: declare <2 x float> @cacoshf(<2 x float>) [[NUW]]
+// CHECK-NOERRNO: declare double @carg(double, double) [[NUW]]
+// CHECK-NOERRNO: declare float @cargf(<2 x float>) [[NUW]]
+// CHECK-NOERRNO: declare { double, double } @casin(double, double) [[NUW]]
+// CHECK-NOERRNO: declare <2 x float> @casinf(<2 x float>) [[NUW]]
+// CHECK-NOERRNO: declare { double, double } @casinh(double, double) [[NUW]]
+// CHECK-NOERRNO: declare <2 x float> @casinhf(<2 x float>) [[NUW]]
+// CHECK-NOERRNO: declare { double, double } @catan(double, double) [[NUW]]
+// CHECK-NOERRNO: declare <2 x float> @catanf(<2 x float>) [[NUW]]
+// CHECK-NOERRNO: declare { double, double } @catanh(double, double) [[NUW]]
+// CHECK-NOERRNO: declare <2 x float> @catanhf(<2 x float>) [[NUW]]
+// CHECK-NOERRNO: declare { double, double } @ccos(double, double) [[NUW]]
+// CHECK-NOERRNO: declare <2 x float> @ccosf(<2 x float>) [[NUW]]
+// CHECK-NOERRNO: declare { double, double } @ccosh(double, double) [[NUW]]
+// CHECK-NOERRNO: declare <2 x float> @ccoshf(<2 x float>) [[NUW]]
+// CHECK-NOERRNO: declare { double, double } @cexp(double, double) [[NUW]]
+// CHECK-NOERRNO: declare <2 x float> @cexpf(<2 x float>) [[NUW]]
+// CHECK-NOERRNO: declare double @cimag(double, double) [[NUW]]
+// CHECK-NOERRNO: declare float @cimagf(<2 x float>) [[NUW]]
+// CHECK-NOERRNO: declare { double, double } @conj(double, double) [[NUW]]
+// CHECK-NOERRNO: declare <2 x float> @conjf(<2 x float>) [[NUW]]
+// CHECK-NOERRNO: declare { double, double } @clog(double, double) [[NUW]]
+// CHECK-NOERRNO: declare <2 x float> @clogf(<2 x float>) [[NUW]]
+// CHECK-NOERRNO: declare { double, double } @cproj(double, double) [[NUW]]
+// CHECK-NOERRNO: declare <2 x float> @cprojf(<2 x float>) [[NUW]]
+// CHECK-NOERRNO: declare { double, double } @cpow(double, double, double, double) [[NUW]]
+// CHECK-NOERRNO: declare <2 x float> @cpowf(<2 x float>, <2 x float>) [[NUW]]
+// CHECK-NOERRNO: declare double @creal(double, double) [[NUW]]
+// CHECK-NOERRNO: declare float @crealf(<2 x float>) [[NUW]]
+// CHECK-NOERRNO: declare { double, double } @csin(double, double) [[NUW]]
+// CHECK-NOERRNO: declare <2 x float> @csinf(<2 x float>) [[NUW]]
+// CHECK-NOERRNO: declare { double, double } @csinh(double, double) [[NUW]]
+// CHECK-NOERRNO: declare <2 x float> @csinhf(<2 x float>) [[NUW]]
+// CHECK-NOERRNO: declare { double, double } @csqrt(double, double) [[NUW]]
+// CHECK-NOERRNO: declare <2 x float> @csqrtf(<2 x float>) [[NUW]]
+// CHECK-NOERRNO: declare { double, double } @ctan(double, double) [[NUW]]
+// CHECK-NOERRNO: declare <2 x float> @ctanf(<2 x float>) [[NUW]]
+// CHECK-NOERRNO: declare { double, double } @ctanh(double, double) [[NUW]]
+// CHECK-NOERRNO: declare <2 x float> @ctanhf(<2 x float>) [[NUW]]
 
-// CHECK-ERRNO: declare double @ceil(double) [[NUW:#[0-9]+]]
-// CHECK-ERRNO: declare x86_fp80 @ceill(x86_fp80) [[NUW]]
-// CHECK-ERRNO: declare float @ceilf(float) [[NUW]]
+// CHECK-ERRNO: declare i32 @abs(i32) [[NUW:#[0-9]+]]
+// CHECK-ERRNO: declare i64 @labs(i64) [[NUW]]
+// CHECK-ERRNO: declare i64 @llabs(i64) [[NUW]]
 // CHECK-ERRNO: declare double @copysign(double, double) [[NUW]]
-// CHECK-ERRNO: declare x86_fp80 @copysignl(x86_fp80, x86_fp80) [[NUW]]
 // CHECK-ERRNO: declare float @copysignf(float, float) [[NUW]]
+// CHECK-ERRNO: declare x86_fp80 @copysignl(x86_fp80, x86_fp80) [[NUW]]
 // CHECK-ERRNO: declare double @fabs(double) [[NUW]]
-// CHECK-ERRNO: declare x86_fp80 @fabsl(x86_fp80) [[NUW]]
 // CHECK-ERRNO: declare float @fabsf(float) [[NUW]]
+// CHECK-ERRNO: declare x86_fp80 @fabsl(x86_fp80) [[NUW]]
+// CHECK-ERRNO: declare double @nan(i8*) [[NUW]]
+// CHECK-ERRNO: declare float @nanf(i8*) [[NUW]]
+// CHECK-ERRNO: declare x86_fp80 @nanl(i8*) [[NUW]]
+// CHECK-ERRNO: declare double @ceil(double) [[NUW]]
+// CHECK-ERRNO: declare float @ceilf(float) [[NUW]]
+// CHECK-ERRNO: declare x86_fp80 @ceill(x86_fp80) [[NUW]]
 // CHECK-ERRNO: declare double @floor(double) [[NUW]]
-// CHECK-ERRNO: declare x86_fp80 @floorl(x86_fp80) [[NUW]]
 // CHECK-ERRNO: declare float @floorf(float) [[NUW]]
+// CHECK-ERRNO: declare x86_fp80 @floorl(x86_fp80) [[NUW]]
 // CHECK-ERRNO: declare double @fmax(double, double) [[NUW]]
-// CHECK-ERRNO: declare x86_fp80 @fmaxl(x86_fp80, x86_fp80) [[NUW]]
 // CHECK-ERRNO: declare float @fmaxf(float, float) [[NUW]]
+// CHECK-ERRNO: declare x86_fp80 @fmaxl(x86_fp80, x86_fp80) [[NUW]]
 // CHECK-ERRNO: declare double @fmin(double, double) [[NUW]]
-// CHECK-ERRNO: declare x86_fp80 @fminl(x86_fp80, x86_fp80) [[NUW]]
 // CHECK-ERRNO: declare float @fminf(float, float) [[NUW]]
+// CHECK-ERRNO: declare x86_fp80 @fminl(x86_fp80, x86_fp80) [[NUW]]
 // CHECK-ERRNO: declare double @nearbyint(double) [[NUW]]
-// CHECK-ERRNO: declare x86_fp80 @nearbyintl(x86_fp80) [[NUW]]
 // CHECK-ERRNO: declare float @nearbyintf(float) [[NUW]]
+// CHECK-ERRNO: declare x86_fp80 @nearbyintl(x86_fp80) [[NUW]]
 // CHECK-ERRNO: declare double @rint(double) [[NUW]]
-// CHECK-ERRNO: declare x86_fp80 @rintl(x86_fp80) [[NUW]]
 // CHECK-ERRNO: declare float @rintf(float) [[NUW]]
+// CHECK-ERRNO: declare x86_fp80 @rintl(x86_fp80) [[NUW]]
 // CHECK-ERRNO: declare double @round(double) [[NUW]]
-// CHECK-ERRNO: declare x86_fp80 @roundl(x86_fp80) [[NUW]]
 // CHECK-ERRNO: declare float @roundf(float) [[NUW]]
+// CHECK-ERRNO: declare x86_fp80 @roundl(x86_fp80) [[NUW]]
 // CHECK-ERRNO: declare double @trunc(double) [[NUW]]
-// CHECK-ERRNO: declare x86_fp80 @truncl(x86_fp80) [[NUW]]
 // CHECK-ERRNO: declare float @truncf(float) [[NUW]]
+// CHECK-ERRNO: declare x86_fp80 @truncl(x86_fp80) [[NUW]]
+// CHECK-ERRNO: declare double @cabs(double, double) [[NUW]]
+// CHECK-ERRNO: declare float @cabsf(<2 x float>) [[NUW]]
+// CHECK-ERRNO: declare { double, double } @cacos(double, double) [[NUW]]
+// CHECK-ERRNO: declare <2 x float> @cacosf(<2 x float>) [[NUW]]
+// CHECK-ERRNO: declare { double, double } @cacosh(double, double) [[NUW]]
+// CHECK-ERRNO: declare <2 x float> @cacoshf(<2 x float>) [[NUW]]
+// CHECK-ERRNO: declare double @carg(double, double) [[NUW]]
+// CHECK-ERRNO: declare float @cargf(<2 x float>) [[NUW]]
+// CHECK-ERRNO: declare { double, double } @casin(double, double) [[NUW]]
+// CHECK-ERRNO: declare <2 x float> @casinf(<2 x float>) [[NUW]]
+// CHECK-ERRNO: declare { double, double } @casinh(double, double) [[NUW]]
+// CHECK-ERRNO: declare <2 x float> @casinhf(<2 x float>) [[NUW]]
+// CHECK-ERRNO: declare { double, double } @catan(double, double) [[NUW]]
+// CHECK-ERRNO: declare <2 x float> @catanf(<2 x float>) [[NUW]]
+// CHECK-ERRNO: declare { double, double } @catanh(double, double) [[NUW]]
+// CHECK-ERRNO: declare <2 x float> @catanhf(<2 x float>) [[NUW]]
+// CHECK-ERRNO: declare { double, double } @ccos(double, double) [[NUW]]
+// CHECK-ERRNO: declare <2 x float> @ccosf(<2 x float>) [[NUW]]
+// CHECK-ERRNO: declare { double, double } @ccosh(double, double) [[NUW]]
+// CHECK-ERRNO: declare <2 x float> @ccoshf(<2 x float>) [[NUW]]
+// CHECK-ERRNO: declare { double, double } @cexp(double, double) [[NUW]]
+// CHECK-ERRNO: declare <2 x float> @cexpf(<2 x float>) [[NUW]]
+// CHECK-ERRNO: declare double @cimag(double, double) [[NUW]]
+// CHECK-ERRNO: declare float @cimagf(<2 x float>) [[NUW]]
+// CHECK-ERRNO: declare { double, double } @conj(double, double) [[NUW]]
+// CHECK-ERRNO: declare <2 x float> @conjf(<2 x float>) [[NUW]]
+// CHECK-ERRNO: declare { double, double } @clog(double, double) [[NUW]]
+// CHECK-ERRNO: declare <2 x float> @clogf(<2 x float>) [[NUW]]
+// CHECK-ERRNO: declare { double, double } @cproj(double, double) [[NUW]]
+// CHECK-ERRNO: declare <2 x float> @cprojf(<2 x float>) [[NUW]]
+// CHECK-ERRNO: declare { double, double } @cpow(double, double, double, double) [[NUW]]
+// CHECK-ERRNO: declare <2 x float> @cpowf(<2 x float>, <2 x float>) [[NUW]]
+// CHECK-ERRNO: declare double @creal(double, double) [[NUW]]
+// CHECK-ERRNO: declare float @crealf(<2 x float>) [[NUW]]
+// CHECK-ERRNO: declare { double, double } @csin(double, double) [[NUW]]
+// CHECK-ERRNO: declare <2 x float> @csinf(<2 x float>) [[NUW]]
+// CHECK-ERRNO: declare { double, double } @csinh(double, double) [[NUW]]
+// CHECK-ERRNO: declare <2 x float> @csinhf(<2 x float>) [[NUW]]
+// CHECK-ERRNO: declare { double, double } @csqrt(double, double) [[NUW]]
+// CHECK-ERRNO: declare <2 x float> @csqrtf(<2 x float>) [[NUW]]
+// CHECK-ERRNO: declare { double, double } @ctan(double, double) [[NUW]]
+// CHECK-ERRNO: declare <2 x float> @ctanf(<2 x float>) [[NUW]]
+// CHECK-ERRNO: declare { double, double } @ctanh(double, double) [[NUW]]
+// CHECK-ERRNO: declare <2 x float> @ctanhf(<2 x float>) [[NUW]]
 
 // CHECK-NOERRNO: attributes [[NUW]] = { nounwind readnone{{.*}} }
 
diff --git a/test/CodeGen/libcalls-complex.c b/test/CodeGen/libcalls-complex.c
index 7bcfa60..22c97b6 100644
--- a/test/CodeGen/libcalls-complex.c
+++ b/test/CodeGen/libcalls-complex.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -fno-builtin -emit-llvm -o - %s -triple i386-unknown-unknown | FileCheck -check-prefix YES %s
-// RUN: %clang_cc1 -emit-llvm -o - %s -triple i386-unknown-unknown | FileCheck -check-prefix NO %s
+// RUN: %clang_cc1 -fno-builtin -emit-llvm -o - %s -triple i386-unknown-unknown | FileCheck -check-prefix CHECK-YES %s
+// RUN: %clang_cc1 -emit-llvm -o - %s -triple i386-unknown-unknown | FileCheck -check-prefix CHECK-NO %s
 
 extern float crealf(float _Complex);
 extern double creal(double _Complex);
diff --git a/test/CodeGen/libcalls-d.c b/test/CodeGen/libcalls-d.c
index b375f2b..3d88eb1 100644
--- a/test/CodeGen/libcalls-d.c
+++ b/test/CodeGen/libcalls-d.c
@@ -1,8 +1,8 @@
 // llvm-gcc -O1+ should run simplify libcalls, O0 shouldn't
 // and -fno-builtins shouldn't.
 // -fno-math-errno should emit an llvm intrinsic, -fmath-errno should not.
-// RUN: %clang_cc1 %s -emit-llvm -fno-math-errno -O0 -o - | grep {call.*exp2\\.f64}
-// RUN: %clang_cc1 %s -emit-llvm -fmath-errno -O0 -o - | grep {call.*exp2}
+// RUN: %clang_cc1 %s -emit-llvm -fno-math-errno -o - | grep {call.*exp2\\.f64}
+// RUN: %clang_cc1 %s -emit-llvm -fmath-errno -o - | grep {call.*exp2}
 // RUN: %clang_cc1 %s -emit-llvm -O1 -o - | grep {call.*ldexp}
 // RUN: %clang_cc1 %s -emit-llvm -O3 -fno-builtin -o - | grep {call.*exp2}
 
diff --git a/test/CodeGen/libcalls-ld.c b/test/CodeGen/libcalls-ld.c
index 2758761..dfa7835 100644
--- a/test/CodeGen/libcalls-ld.c
+++ b/test/CodeGen/libcalls-ld.c
@@ -1,8 +1,8 @@
 // llvm-gcc -O1+ should run simplify libcalls, O0 shouldn't
 // and -fno-builtins shouldn't.
 // -fno-math-errno should emit an llvm intrinsic, -fmath-errno should not.
-// RUN: %clang_cc1 %s -emit-llvm -fno-math-errno -O0 -o - | grep {call.*exp2\\..*f}
-// RUN: %clang_cc1 %s -emit-llvm -fmath-errno -O0 -o - | grep {call.*exp2l}
+// RUN: %clang_cc1 %s -emit-llvm -fno-math-errno -o - | grep {call.*exp2\\..*f}
+// RUN: %clang_cc1 %s -emit-llvm -fmath-errno -o - | grep {call.*exp2l}
 // RUN: %clang_cc1 %s -emit-llvm -O1 -o - | grep {call.*ldexp}
 // RUN: %clang_cc1 %s -emit-llvm -O3 -fno-builtin -o - | grep {call.*exp2l}
 
diff --git a/test/CodeGen/libcalls.c b/test/CodeGen/libcalls.c
index 8f8e182..3112c87 100644
--- a/test/CodeGen/libcalls.c
+++ b/test/CodeGen/libcalls.c
@@ -1,8 +1,10 @@
-// RUN: %clang_cc1 -fmath-errno -emit-llvm -o - %s -triple i386-unknown-unknown | FileCheck -check-prefix YES %s
-// RUN: %clang_cc1 -emit-llvm -o - %s -triple i386-unknown-unknown | FileCheck -check-prefix NO %s
+// RUN: %clang_cc1 -fmath-errno -emit-llvm -o - %s -triple i386-unknown-unknown | FileCheck -check-prefix CHECK-YES %s
+// RUN: %clang_cc1 -emit-llvm -o - %s -triple i386-unknown-unknown | FileCheck -check-prefix CHECK-NO %s
+// RUN: %clang_cc1 -menable-unsafe-fp-math -emit-llvm -o - %s -triple i386-unknown-unknown | FileCheck -check-prefix CHECK-FAST %s
 
-// CHECK-YES: define void @test_sqrt
-// CHECK-NO: define void @test_sqrt
+// CHECK-YES-LABEL: define void @test_sqrt
+// CHECK-NO-LABEL: define void @test_sqrt
+// CHECK-FAST-LABEL: define void @test_sqrt
 void test_sqrt(float a0, double a1, long double a2) {
   // Following llvm-gcc's lead, we never emit these as intrinsics;
   // no-math-errno isn't good enough.  We could probably use intrinsics
@@ -27,9 +29,12 @@ void test_sqrt(float a0, double a1, long double a2) {
 // CHECK-NO: declare float @sqrtf(float) [[NUW_RN:#[0-9]+]]
 // CHECK-NO: declare double @sqrt(double) [[NUW_RN]]
 // CHECK-NO: declare x86_fp80 @sqrtl(x86_fp80) [[NUW_RN]]
+// CHECK-FAST: declare float @llvm.sqrt.f32(float)
+// CHECK-FAST: declare double @llvm.sqrt.f64(double)
+// CHECK-FAST: declare x86_fp80 @llvm.sqrt.f80(x86_fp80)
 
-// CHECK-YES: define void @test_pow
-// CHECK-NO: define void @test_pow
+// CHECK-YES-LABEL: define void @test_pow
+// CHECK-NO-LABEL: define void @test_pow
 void test_pow(float a0, double a1, long double a2) {
   // CHECK-YES: call float @powf
   // CHECK-NO: call float @llvm.pow.f32
@@ -51,8 +56,8 @@ void test_pow(float a0, double a1, long double a2) {
 // CHECK-NO: declare double @llvm.pow.f64(double, double) [[NUW_RO]]
 // CHECK-NO: declare x86_fp80 @llvm.pow.f80(x86_fp80, x86_fp80) [[NUW_RO]]
 
-// CHECK-YES: define void @test_fma
-// CHECK-NO: define void @test_fma
+// CHECK-YES-LABEL: define void @test_fma
+// CHECK-NO-LABEL: define void @test_fma
 void test_fma(float a0, double a1, long double a2) {
     // CHECK-YES: call float @llvm.fma.f32
     // CHECK-NO: call float @llvm.fma.f32
diff --git a/test/CodeGen/link-bitcode-file.c b/test/CodeGen/link-bitcode-file.c
index 7740406..cf9493f 100644
--- a/test/CodeGen/link-bitcode-file.c
+++ b/test/CodeGen/link-bitcode-file.c
@@ -1,6 +1,6 @@
 // RUN: %clang_cc1 -triple i386-pc-linux-gnu -DBITCODE -emit-llvm-bc -o %t.bc %s
 // RUN: %clang_cc1 -triple i386-pc-linux-gnu -mlink-bitcode-file %t.bc -O3 -emit-llvm -o - %s | FileCheck -check-prefix=CHECK-NO-BC %s
-// RUN: %clang_cc1 -triple i386-pc-linux-gnu -DBITCODE -mlink-bitcode-file %t.bc -O3 -emit-llvm -o - %s 2>&1 | FileCheck -check-prefix=CHECK-BC %s
+// RUN: not %clang_cc1 -triple i386-pc-linux-gnu -DBITCODE -mlink-bitcode-file %t.bc -O3 -emit-llvm -o - %s 2>&1 | FileCheck -check-prefix=CHECK-BC %s
 
 int f(void);
 
@@ -13,12 +13,12 @@ int f(void) {
 
 #else
 
-// CHECK-NO-BC: define i32 @g
+// CHECK-NO-BC-LABEL: define i32 @g
 // CHECK-NO-BC: ret i32 42
 int g(void) {
   return f();
 }
 
-// CHECK-NO-BC: define i32 @f
+// CHECK-NO-BC-LABEL: define i32 @f
 
 #endif
diff --git a/test/CodeGen/linkage-redecl.c b/test/CodeGen/linkage-redecl.c
index 14112fe..58993f3 100644
--- a/test/CodeGen/linkage-redecl.c
+++ b/test/CodeGen/linkage-redecl.c
@@ -16,4 +16,4 @@ void g0() {
 }
 
 extern void f(int x) { } // still has internal linkage
-// CHECK: define internal void @f
+// CHECK-LABEL: define internal void @f
diff --git a/test/CodeGen/linux-arm-atomic.c b/test/CodeGen/linux-arm-atomic.c
index c7ce1d2..116925a5 100644
--- a/test/CodeGen/linux-arm-atomic.c
+++ b/test/CodeGen/linux-arm-atomic.c
@@ -1,6 +1,8 @@
 // RUN: %clang_cc1 %s -emit-llvm -o - -triple=armv7-unknown-linux | FileCheck %s
 // RUN: %clang_cc1 %s -emit-llvm -o - -triple=armv6-unknown-linux | FileCheck %s
 // RUN: %clang_cc1 %s -emit-llvm -o - -triple=thumbv7-unknown-linux | FileCheck %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple=armv6-unknown-freebsd | FileCheck %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple=armv6-unknown-bitrig | FileCheck %s
 
 typedef int _Atomic_word;
 _Atomic_word exchange_and_add(volatile _Atomic_word *__mem, int __val) {
diff --git a/test/CodeGen/long-double-x86-nacl.c b/test/CodeGen/long-double-x86-nacl.c
index 175129c..cec381a 100644
--- a/test/CodeGen/long-double-x86-nacl.c
+++ b/test/CodeGen/long-double-x86-nacl.c
@@ -3,5 +3,5 @@
 long double x = 0;
 int checksize[sizeof(x) == 8 ? 1 : -1];
 
-// CHECK: define void @s1(double %a)
+// CHECK-LABEL: define void @s1(double %a)
 void s1(long double a) {}
diff --git a/test/CodeGen/mangle-windows-rtd.c b/test/CodeGen/mangle-windows-rtd.c
new file mode 100644
index 0000000..fc6f309
--- /dev/null
+++ b/test/CodeGen/mangle-windows-rtd.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -emit-llvm -mrtd %s -o - -triple=i386-mingw32 | FileCheck %s
+
+void f1(void) {}
+// CHECK: define x86_stdcallcc void @"\01_f1@0"
+
+void __stdcall f2(void) {}
+// CHECK: define x86_stdcallcc void @"\01_f2@0"
+
+void __fastcall f3(void) {}
+// CHECK: define x86_fastcallcc void @"\01@f3@0"
diff --git a/test/CodeGen/mangle-windows.c b/test/CodeGen/mangle-windows.c
new file mode 100644
index 0000000..6706492
--- /dev/null
+++ b/test/CodeGen/mangle-windows.c
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -emit-llvm %s -o - -cxx-abi microsoft \
+// RUN:     -triple=i386-pc-win32 | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm %s -o - -triple=i386-mingw32 | FileCheck %s
+
+void __stdcall f1(void) {}
+// CHECK: define x86_stdcallcc void @"\01_f1@0"
+
+void __fastcall f2(void) {}
+// CHECK: define x86_fastcallcc void @"\01@f2@0"
+
+void __stdcall f3() {}
+// CHECK: define x86_stdcallcc void @"\01_f3@0"
+
+void __fastcall f4(char a) {}
+// CHECK: define x86_fastcallcc void @"\01@f4@4"
+
+void __fastcall f5(short a) {}
+// CHECK: define x86_fastcallcc void @"\01@f5@4"
+
+void __fastcall f6(int a) {}
+// CHECK: define x86_fastcallcc void @"\01@f6@4"
+
+void __fastcall f7(long a) {}
+// CHECK: define x86_fastcallcc void @"\01@f7@4"
+
+void __fastcall f8(long long a) {}
+// CHECK: define x86_fastcallcc void @"\01@f8@8"
+
+void __fastcall f9(long long a, char b, char c, short d) {}
+// CHECK: define x86_fastcallcc void @"\01@f9@20"(i64 %a, i8 signext %b, i8
+// signext %c, i16 signext %d)
+
+void f12(void) {}
+// CHECK: define void @f12(
diff --git a/test/CodeGen/may-alias.c b/test/CodeGen/may-alias.c
index c767244..4d6f721 100644
--- a/test/CodeGen/may-alias.c
+++ b/test/CodeGen/may-alias.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -Werror -triple i386-unknown-unknown -emit-llvm -O1 -disable-llvm-optzns -o - %s | FileCheck %s
-// RUN: %clang_cc1 -Werror -triple i386-unknown-unknown -emit-llvm -O1 -struct-path-tbaa -disable-llvm-optzns -o - %s | FileCheck %s -check-prefix=PATH
+// RUN: %clang_cc1 -Werror -triple i386-unknown-unknown -emit-llvm -O1 -no-struct-path-tbaa -disable-llvm-optzns -o - %s | FileCheck %s
+// RUN: %clang_cc1 -Werror -triple i386-unknown-unknown -emit-llvm -O1 -disable-llvm-optzns -o - %s | FileCheck %s -check-prefix=PATH
 
 // Types with the may_alias attribute should be considered equivalent
 // to char for aliasing.
@@ -8,10 +8,10 @@ typedef int __attribute__((may_alias)) aliasing_int;
 
 void test0(aliasing_int *ai, int *i)
 {
-// CHECK: store i32 0, i32* %{{.*}}, !tbaa !1
+// CHECK: store i32 0, i32* %{{.*}}, !tbaa [[TAG_CHAR:!.*]]
 // PATH: store i32 0, i32* %{{.*}}, !tbaa [[TAG_CHAR:!.*]]
   *ai = 0;
-// CHECK: store i32 1, i32* %{{.*}}, !tbaa !3
+// CHECK: store i32 1, i32* %{{.*}}, !tbaa [[TAG_INT:!.*]]
 // PATH: store i32 1, i32* %{{.*}}, !tbaa [[TAG_INT:!.*]]
   *i = 1;
 }
@@ -20,22 +20,23 @@ void test0(aliasing_int *ai, int *i)
 struct Test1 { int x; };
 struct Test1MA { int x; } __attribute__((may_alias));
 void test1(struct Test1MA *p1, struct Test1 *p2) {
-  // CHECK: store i32 2, i32* {{%.*}}, !tbaa !1
+  // CHECK: store i32 2, i32* {{%.*}}, !tbaa [[TAG_CHAR]]
   // PATH: store i32 2, i32* {{%.*}}, !tbaa [[TAG_CHAR]]
   p1->x = 2;
-  // CHECK: store i32 3, i32* {{%.*}}, !tbaa !3
+  // CHECK: store i32 3, i32* {{%.*}}, !tbaa [[TAG_INT]]
   // PATH: store i32 3, i32* {{%.*}}, !tbaa [[TAG_test1_x:!.*]]
   p2->x = 3;
 }
-
-// CHECK: !0 = metadata !{metadata !"any pointer", metadata !1}
-// CHECK: !1 = metadata !{metadata !"omnipotent char", metadata !2}
-// CHECK: !2 = metadata !{metadata !"Simple C/C++ TBAA"}
-// CHECK: !3 = metadata !{metadata !"int", metadata !1}
+// CHECK: metadata !{metadata !"any pointer", metadata [[TYPE_CHAR:!.*]],
+// CHECK: [[TYPE_CHAR]] = metadata !{metadata !"omnipotent char", metadata [[TAG_CXX_TBAA:!.*]],
+// CHECK: [[TAG_CXX_TBAA]] = metadata !{metadata !"Simple C/C++ TBAA"}
+// CHECK: [[TAG_CHAR]] = metadata !{metadata [[TYPE_CHAR]], metadata [[TYPE_CHAR]], i64 0}
+// CHECK: [[TAG_INT]] = metadata !{metadata [[TYPE_INT:!.*]], metadata [[TYPE_INT]], i64 0}
+// CHECK: [[TYPE_INT]] = metadata !{metadata !"int", metadata [[TYPE_CHAR]]
 
 // PATH: [[TYPE_CHAR:!.*]] = metadata !{metadata !"omnipotent char", metadata !{{.*}}
 // PATH: [[TAG_CHAR]] = metadata !{metadata [[TYPE_CHAR]], metadata [[TYPE_CHAR]], i64 0}
 // PATH: [[TAG_INT]] = metadata !{metadata [[TYPE_INT:!.*]], metadata [[TYPE_INT]], i64 0}
 // PATH: [[TYPE_INT]] = metadata !{metadata !"int", metadata [[TYPE_CHAR]]
 // PATH: [[TAG_test1_x]] = metadata !{metadata [[TYPE_test1:!.*]], metadata [[TYPE_INT]], i64 0}
-// PATH: [[TYPE_test1]] = metadata !{metadata !"_ZTS5Test1", metadata [[TYPE_INT]], i64 0}
+// PATH: [[TYPE_test1]] = metadata !{metadata !"Test1", metadata [[TYPE_INT]], i64 0}
diff --git a/test/CodeGen/microsoft-call-conv-x64.c b/test/CodeGen/microsoft-call-conv-x64.c
index 97a1d99..6475dfa 100644
--- a/test/CodeGen/microsoft-call-conv-x64.c
+++ b/test/CodeGen/microsoft-call-conv-x64.c
@@ -3,12 +3,12 @@
 void __fastcall f1(void);
 void __stdcall f2(void);
 void __fastcall f4(void) {
-// CHECK: define void @f4()
+// CHECK-LABEL: define void @f4()
   f1();
 // CHECK: call void @f1()
 }
 void __stdcall f5(void) {
-// CHECK: define void @f5()
+// CHECK-LABEL: define void @f5()
   f2();
 // CHECK: call void @f2()
 }
diff --git a/test/CodeGen/microsoft-call-conv.c b/test/CodeGen/microsoft-call-conv.c
index 64d10fb..b80c58d 100644
--- a/test/CodeGen/microsoft-call-conv.c
+++ b/test/CodeGen/microsoft-call-conv.c
@@ -1,20 +1,22 @@
 // RUN: %clang_cc1 -triple i386-pc-linux -emit-llvm < %s | FileCheck %s
+// RUN: %clang_cc1 -triple i386-pc-linux -emit-llvm -mrtd < %s | FileCheck %s
+// RUN: %clang_cc1 -triple i386-pc-linux -emit-llvm -fms-compatibility < %s
 
 void __fastcall f1(void);
 void __stdcall f2(void);
 void __thiscall f3(void);
 void __fastcall f4(void) {
-// CHECK: define x86_fastcallcc void @f4()
+// CHECK-LABEL: define x86_fastcallcc void @f4()
   f1();
 // CHECK: call x86_fastcallcc void @f1()
 }
 void __stdcall f5(void) {
-// CHECK: define x86_stdcallcc void @f5()
+// CHECK-LABEL: define x86_stdcallcc void @f5()
   f2();
 // CHECK: call x86_stdcallcc void @f2()
 }
 void __thiscall f6(void) {
-// CHECK: define x86_thiscallcc void @f6()
+// CHECK-LABEL: define x86_thiscallcc void @f6()
   f3();
 // CHECK: call x86_thiscallcc void @f3()
 }
@@ -48,3 +50,11 @@ void f8(void) {
   f7(0);
   // CHECK: call x86_stdcallcc void @f7(i32 0)
 }
+
+// PR12535
+void __fastcall f9(int x, int y) {};
+// WIN: define x86_fastcallcc void @f9({{.*}})
+void __fastcall f10(int x, ...) {};
+// WIN: define void @f10({{.*}})
+void __stdcall f11(int x, ...) {};
+// WIN: define void @f11({{.*}})
diff --git a/test/CodeGen/mips-byval-arg.c b/test/CodeGen/mips-byval-arg.c
index 41ccd60..589e85e 100644
--- a/test/CodeGen/mips-byval-arg.c
+++ b/test/CodeGen/mips-byval-arg.c
@@ -7,8 +7,8 @@ typedef struct {
 
 extern void foo2(S0);
 
-// O32: define void @foo1(i32 %a0.coerce0, i32 %a0.coerce1, i32 %a0.coerce2)
-// N64: define void @foo1(i64 %a0.coerce0, i32 %a0.coerce1)
+// O32-LABEL: define void @foo1(i32 %a0.coerce0, i32 %a0.coerce1, i32 %a0.coerce2)
+// N64-LABEL: define void @foo1(i64 %a0.coerce0, i32 %a0.coerce1)
 
 void foo1(S0 a0) {
   foo2(a0);
diff --git a/test/CodeGen/mips-clobber-reg.c b/test/CodeGen/mips-clobber-reg.c
index be18353..a87a3e7 100644
--- a/test/CodeGen/mips-clobber-reg.c
+++ b/test/CodeGen/mips-clobber-reg.c
@@ -1,80 +1,150 @@
-// RUN: %clang -target mipsel-unknown-linux -S -o - -emit-llvm %s 
+// RUN: %clang -target mipsel-unknown-linux -S -o - -emit-llvm %s
 
 /*
     This checks that the frontend will accept both
-    enumerated and symbolic Mips GPR register names.
-    
+    enumerated and symbolic Mips register names.
+
+    Includes:
+    - GPR
+    - FPU
+    - MSA
+
     Any bad names will make the frontend choke.
  */
 
 main()
 {
 
-    __asm__ __volatile__ (".set noat \n\t addi $7,$at,77":::"at"); 
-    __asm__ __volatile__ ("addi $7,$v0,77":::"v0"); 
-    __asm__ __volatile__ ("addi $7,$v1,77":::"v1"); 
-    __asm__ __volatile__ ("addi $7,$a0,77":::"a0"); 
-    __asm__ __volatile__ ("addi $7,$a1,77":::"a1"); 
-    __asm__ __volatile__ ("addi $7,$a2,77":::"a2"); 
-    __asm__ __volatile__ ("addi $7,$a3,77":::"a3"); 
-    __asm__ __volatile__ ("addi $7,$t0,77":::"t0"); 
-    __asm__ __volatile__ ("addi $7,$t1,77":::"t1"); 
-    __asm__ __volatile__ ("addi $7,$t2,77":::"t2"); 
-    __asm__ __volatile__ ("addi $7,$t3,77":::"t3"); 
-    __asm__ __volatile__ ("addi $7,$t4,77":::"t4"); 
-    __asm__ __volatile__ ("addi $7,$t5,77":::"t5"); 
-    __asm__ __volatile__ ("addi $7,$t6,77":::"t6"); 
-    __asm__ __volatile__ ("addi $7,$t7,77":::"t7"); 
-    __asm__ __volatile__ ("addi $7,$s0,77":::"s0"); 
-    __asm__ __volatile__ ("addi $7,$s1,77":::"s1"); 
-    __asm__ __volatile__ ("addi $7,$s2,77":::"s2"); 
-    __asm__ __volatile__ ("addi $7,$s3,77":::"s3"); 
-    __asm__ __volatile__ ("addi $7,$s4,77":::"s4"); 
-    __asm__ __volatile__ ("addi $7,$s5,77":::"s5"); 
-    __asm__ __volatile__ ("addi $7,$s6,77":::"s6"); 
-    __asm__ __volatile__ ("addi $7,$s7,77":::"s7"); 
-    __asm__ __volatile__ ("addi $7,$t8,77":::"t8"); 
-    __asm__ __volatile__ ("addi $7,$t9,77":::"t9"); 
-    __asm__ __volatile__ ("addi $7,$k0,77":::"k0"); 
-    __asm__ __volatile__ ("addi $7,$k1,77":::"k1"); 
-    __asm__ __volatile__ ("addi $7,$gp,77":::"gp"); 
-    __asm__ __volatile__ ("addi $7,$sp,77":::"sp"); 
-    __asm__ __volatile__ ("addi $7,$fp,77":::"fp"); 
-    __asm__ __volatile__ ("addi $7,$sp,77":::"$sp"); 
-    __asm__ __volatile__ ("addi $7,$fp,77":::"$fp"); 
-    __asm__ __volatile__ ("addi $7,$ra,77":::"ra"); 
+    __asm__ __volatile__ (".set noat \n\t addi $7,$at,77":::"at");
+    __asm__ __volatile__ ("addi $7,$v0,77":::"v0");
+    __asm__ __volatile__ ("addi $7,$v1,77":::"v1");
+    __asm__ __volatile__ ("addi $7,$a0,77":::"a0");
+    __asm__ __volatile__ ("addi $7,$a1,77":::"a1");
+    __asm__ __volatile__ ("addi $7,$a2,77":::"a2");
+    __asm__ __volatile__ ("addi $7,$a3,77":::"a3");
+    __asm__ __volatile__ ("addi $7,$t0,77":::"t0");
+    __asm__ __volatile__ ("addi $7,$t1,77":::"t1");
+    __asm__ __volatile__ ("addi $7,$t2,77":::"t2");
+    __asm__ __volatile__ ("addi $7,$t3,77":::"t3");
+    __asm__ __volatile__ ("addi $7,$t4,77":::"t4");
+    __asm__ __volatile__ ("addi $7,$t5,77":::"t5");
+    __asm__ __volatile__ ("addi $7,$t6,77":::"t6");
+    __asm__ __volatile__ ("addi $7,$t7,77":::"t7");
+    __asm__ __volatile__ ("addi $7,$s0,77":::"s0");
+    __asm__ __volatile__ ("addi $7,$s1,77":::"s1");
+    __asm__ __volatile__ ("addi $7,$s2,77":::"s2");
+    __asm__ __volatile__ ("addi $7,$s3,77":::"s3");
+    __asm__ __volatile__ ("addi $7,$s4,77":::"s4");
+    __asm__ __volatile__ ("addi $7,$s5,77":::"s5");
+    __asm__ __volatile__ ("addi $7,$s6,77":::"s6");
+    __asm__ __volatile__ ("addi $7,$s7,77":::"s7");
+    __asm__ __volatile__ ("addi $7,$t8,77":::"t8");
+    __asm__ __volatile__ ("addi $7,$t9,77":::"t9");
+    __asm__ __volatile__ ("addi $7,$k0,77":::"k0");
+    __asm__ __volatile__ ("addi $7,$k1,77":::"k1");
+    __asm__ __volatile__ ("addi $7,$gp,77":::"gp");
+    __asm__ __volatile__ ("addi $7,$sp,77":::"sp");
+    __asm__ __volatile__ ("addi $7,$fp,77":::"fp");
+    __asm__ __volatile__ ("addi $7,$sp,77":::"$sp");
+    __asm__ __volatile__ ("addi $7,$fp,77":::"$fp");
+    __asm__ __volatile__ ("addi $7,$ra,77":::"ra");
+
+    __asm__ __volatile__ ("addi $7,$0,77":::"$0");
+    __asm__ __volatile__ (".set noat \n\t addi $7,$1,77":::"$1");
+    __asm__ __volatile__ ("addi $7,$2,77":::"$2");
+    __asm__ __volatile__ ("addi $7,$3,77":::"$3");
+    __asm__ __volatile__ ("addi $7,$4,77":::"$4");
+    __asm__ __volatile__ ("addi $7,$5,77":::"$5");
+    __asm__ __volatile__ ("addi $7,$6,77":::"$6");
+    __asm__ __volatile__ ("addi $7,$7,77":::"$7");
+    __asm__ __volatile__ ("addi $7,$8,77":::"$8");
+    __asm__ __volatile__ ("addi $7,$9,77":::"$9");
+    __asm__ __volatile__ ("addi $7,$10,77":::"$10");
+    __asm__ __volatile__ ("addi $7,$11,77":::"$11");
+    __asm__ __volatile__ ("addi $7,$12,77":::"$12");
+    __asm__ __volatile__ ("addi $7,$13,77":::"$13");
+    __asm__ __volatile__ ("addi $7,$14,77":::"$14");
+    __asm__ __volatile__ ("addi $7,$15,77":::"$15");
+    __asm__ __volatile__ ("addi $7,$16,77":::"$16");
+    __asm__ __volatile__ ("addi $7,$17,77":::"$17");
+    __asm__ __volatile__ ("addi $7,$18,77":::"$18");
+    __asm__ __volatile__ ("addi $7,$19,77":::"$19");
+    __asm__ __volatile__ ("addi $7,$20,77":::"$20");
+    __asm__ __volatile__ ("addi $7,$21,77":::"$21");
+    __asm__ __volatile__ ("addi $7,$22,77":::"$22");
+    __asm__ __volatile__ ("addi $7,$23,77":::"$23");
+    __asm__ __volatile__ ("addi $7,$24,77":::"$24");
+    __asm__ __volatile__ ("addi $7,$25,77":::"$25");
+    __asm__ __volatile__ ("addi $7,$26,77":::"$26");
+    __asm__ __volatile__ ("addi $7,$27,77":::"$27");
+    __asm__ __volatile__ ("addi $7,$28,77":::"$28");
+    __asm__ __volatile__ ("addi $7,$29,77":::"$29");
+    __asm__ __volatile__ ("addi $7,$30,77":::"$30");
+    __asm__ __volatile__ ("addi $7,$31,77":::"$31");
 
-    __asm__ __volatile__ ("addi $7,$0,77":::"$0"); 
-    __asm__ __volatile__ (".set noat \n\t addi $7,$1,77":::"$1"); 
-    __asm__ __volatile__ ("addi $7,$2,77":::"$2"); 
-    __asm__ __volatile__ ("addi $7,$3,77":::"$3"); 
-    __asm__ __volatile__ ("addi $7,$4,77":::"$4"); 
-    __asm__ __volatile__ ("addi $7,$5,77":::"$5"); 
-    __asm__ __volatile__ ("addi $7,$6,77":::"$6"); 
-    __asm__ __volatile__ ("addi $7,$7,77":::"$7"); 
-    __asm__ __volatile__ ("addi $7,$8,77":::"$8"); 
-    __asm__ __volatile__ ("addi $7,$9,77":::"$9"); 
-    __asm__ __volatile__ ("addi $7,$10,77":::"$10"); 
-    __asm__ __volatile__ ("addi $7,$11,77":::"$10"); 
-    __asm__ __volatile__ ("addi $7,$12,77":::"$12"); 
-    __asm__ __volatile__ ("addi $7,$13,77":::"$13"); 
-    __asm__ __volatile__ ("addi $7,$14,77":::"$14"); 
-    __asm__ __volatile__ ("addi $7,$15,77":::"$15"); 
-    __asm__ __volatile__ ("addi $7,$16,77":::"$16"); 
-    __asm__ __volatile__ ("addi $7,$17,77":::"$17"); 
-    __asm__ __volatile__ ("addi $7,$18,77":::"$18"); 
-    __asm__ __volatile__ ("addi $7,$19,77":::"$19"); 
-    __asm__ __volatile__ ("addi $7,$20,77":::"$20"); 
-    __asm__ __volatile__ ("addi $7,$21,77":::"$21"); 
-    __asm__ __volatile__ ("addi $7,$22,77":::"$22"); 
-    __asm__ __volatile__ ("addi $7,$23,77":::"$23"); 
-    __asm__ __volatile__ ("addi $7,$24,77":::"$24"); 
-    __asm__ __volatile__ ("addi $7,$25,77":::"$25"); 
-    __asm__ __volatile__ ("addi $7,$26,77":::"$26"); 
-    __asm__ __volatile__ ("addi $7,$27,77":::"$27"); 
-    __asm__ __volatile__ ("addi $7,$28,77":::"$28"); 
-    __asm__ __volatile__ ("addi $7,$29,77":::"$29"); 
-    __asm__ __volatile__ ("addi $7,$30,77":::"$30"); 
-    __asm__ __volatile__ ("addi $7,$31,77":::"$31"); 
+    __asm__ __volatile__ ("fadd.s $f0,77":::"$f0");
+    __asm__ __volatile__ ("fadd.s $f1,77":::"$f1");
+    __asm__ __volatile__ ("fadd.s $f2,77":::"$f2");
+    __asm__ __volatile__ ("fadd.s $f3,77":::"$f3");
+    __asm__ __volatile__ ("fadd.s $f4,77":::"$f4");
+    __asm__ __volatile__ ("fadd.s $f5,77":::"$f5");
+    __asm__ __volatile__ ("fadd.s $f6,77":::"$f6");
+    __asm__ __volatile__ ("fadd.s $f7,77":::"$f7");
+    __asm__ __volatile__ ("fadd.s $f8,77":::"$f8");
+    __asm__ __volatile__ ("fadd.s $f9,77":::"$f9");
+    __asm__ __volatile__ ("fadd.s $f10,77":::"$f10");
+    __asm__ __volatile__ ("fadd.s $f11,77":::"$f11");
+    __asm__ __volatile__ ("fadd.s $f12,77":::"$f12");
+    __asm__ __volatile__ ("fadd.s $f13,77":::"$f13");
+    __asm__ __volatile__ ("fadd.s $f14,77":::"$f14");
+    __asm__ __volatile__ ("fadd.s $f15,77":::"$f15");
+    __asm__ __volatile__ ("fadd.s $f16,77":::"$f16");
+    __asm__ __volatile__ ("fadd.s $f17,77":::"$f17");
+    __asm__ __volatile__ ("fadd.s $f18,77":::"$f18");
+    __asm__ __volatile__ ("fadd.s $f19,77":::"$f19");
+    __asm__ __volatile__ ("fadd.s $f20,77":::"$f20");
+    __asm__ __volatile__ ("fadd.s $f21,77":::"$f21");
+    __asm__ __volatile__ ("fadd.s $f22,77":::"$f22");
+    __asm__ __volatile__ ("fadd.s $f23,77":::"$f23");
+    __asm__ __volatile__ ("fadd.s $f24,77":::"$f24");
+    __asm__ __volatile__ ("fadd.s $f25,77":::"$f25");
+    __asm__ __volatile__ ("fadd.s $f26,77":::"$f26");
+    __asm__ __volatile__ ("fadd.s $f27,77":::"$f27");
+    __asm__ __volatile__ ("fadd.s $f28,77":::"$f28");
+    __asm__ __volatile__ ("fadd.s $f29,77":::"$f29");
+    __asm__ __volatile__ ("fadd.s $f30,77":::"$f30");
+    __asm__ __volatile__ ("fadd.s $f31,77":::"$f31");
 
+    __asm__ __volatile__ ("ldi.w $w0,77":::"$w0");
+    __asm__ __volatile__ ("ldi.w $w1,77":::"$w1");
+    __asm__ __volatile__ ("ldi.w $w2,77":::"$w2");
+    __asm__ __volatile__ ("ldi.w $w3,77":::"$w3");
+    __asm__ __volatile__ ("ldi.w $w4,77":::"$w4");
+    __asm__ __volatile__ ("ldi.w $w5,77":::"$w5");
+    __asm__ __volatile__ ("ldi.w $w6,77":::"$w6");
+    __asm__ __volatile__ ("ldi.w $w7,77":::"$w7");
+    __asm__ __volatile__ ("ldi.w $w8,77":::"$w8");
+    __asm__ __volatile__ ("ldi.w $w9,77":::"$w9");
+    __asm__ __volatile__ ("ldi.w $w10,77":::"$w10");
+    __asm__ __volatile__ ("ldi.w $w11,77":::"$w10");
+    __asm__ __volatile__ ("ldi.w $w12,77":::"$w12");
+    __asm__ __volatile__ ("ldi.w $w13,77":::"$w13");
+    __asm__ __volatile__ ("ldi.w $w14,77":::"$w14");
+    __asm__ __volatile__ ("ldi.w $w15,77":::"$w15");
+    __asm__ __volatile__ ("ldi.w $w16,77":::"$w16");
+    __asm__ __volatile__ ("ldi.w $w17,77":::"$w17");
+    __asm__ __volatile__ ("ldi.w $w18,77":::"$w18");
+    __asm__ __volatile__ ("ldi.w $w19,77":::"$w19");
+    __asm__ __volatile__ ("ldi.w $w20,77":::"$w20");
+    __asm__ __volatile__ ("ldi.w $w21,77":::"$w21");
+    __asm__ __volatile__ ("ldi.w $w22,77":::"$w22");
+    __asm__ __volatile__ ("ldi.w $w23,77":::"$w23");
+    __asm__ __volatile__ ("ldi.w $w24,77":::"$w24");
+    __asm__ __volatile__ ("ldi.w $w25,77":::"$w25");
+    __asm__ __volatile__ ("ldi.w $w26,77":::"$w26");
+    __asm__ __volatile__ ("ldi.w $w27,77":::"$w27");
+    __asm__ __volatile__ ("ldi.w $w28,77":::"$w28");
+    __asm__ __volatile__ ("ldi.w $w29,77":::"$w29");
+    __asm__ __volatile__ ("ldi.w $w30,77":::"$w30");
+    __asm__ __volatile__ ("ldi.w $w31,77":::"$w31");
 }
diff --git a/test/CodeGen/mips-constraints-mem.c b/test/CodeGen/mips-constraints-mem.c
index ea6bcaf..2c3c01a 100644
--- a/test/CodeGen/mips-constraints-mem.c
+++ b/test/CodeGen/mips-constraints-mem.c
@@ -9,7 +9,7 @@ int foo()
  // 'R': An address that can be used in a non-macro load or stor'
  // This test will result in the higher and lower nibbles being
  // switched due to the lwl/lwr instruction pairs.
- // CHECK:   %{{[0-9]+}} = call i32 asm sideeffect  "lwl $0, 1 + $1\0A\09lwr $0, 2 + $1\0A\09", "=r,*R"(i32* %{{[0-9,a-f]+}}) #1, !srcloc !0
+ // CHECK:   %{{[0-9]+}} = call i32 asm sideeffect  "lwl $0, 1 + $1\0A\09lwr $0, 2 + $1\0A\09", "=r,*R"(i32* %{{[0-9,a-f]+}}) #1,
 
   int c = 0xffbbccdd;
 
diff --git a/test/CodeGen/mips-inline-asm-modifiers.c b/test/CodeGen/mips-inline-asm-modifiers.c
index 7c4ca2c..9d697e8 100644
--- a/test/CodeGen/mips-inline-asm-modifiers.c
+++ b/test/CodeGen/mips-inline-asm-modifiers.c
@@ -5,12 +5,16 @@
 
 int printf(const char*, ...);
 
-  // CHECK: %{{[0-9]+}} = call i32 asm ".set noreorder;\0Alw    $0,$1;\0A.set reorder;\0A", "=r,*m"(i32* getelementptr inbounds ([8 x i32]* @b, i32 {{[0-9]+}}, i32 {{[0-9]+}})) #2, !srcloc !0
-  // CHECK: %{{[0-9]+}} = call i32 asm "lw    $0,${1:D};\0A", "=r,*m"(i32* getelementptr inbounds ([8 x i32]* @b, i32 {{[0-9]+}}, i32 {{[0-9]+}})) #2, !srcloc !1
+typedef int v4i32 __attribute__((vector_size(16)));
+
+  // CHECK: %{{[0-9]+}} = call i32 asm ".set noreorder;\0Alw    $0,$1;\0A.set reorder;\0A", "=r,*m"(i32* getelementptr inbounds ([8 x i32]* @b, i32 {{[0-9]+}}, i32 {{[0-9]+}})) #2,
+  // CHECK: %{{[0-9]+}} = call i32 asm "lw    $0,${1:D};\0A", "=r,*m"(i32* getelementptr inbounds ([8 x i32]* @b, i32 {{[0-9]+}}, i32 {{[0-9]+}})) #2,
+  // CHECK: %{{[0-9]+}} = call <4 x i32> asm "ldi.w ${0:w},1", "=f"
 int b[8] = {0,1,2,3,4,5,6,7};
 int  main()
 {
   int i;
+  v4i32 v4i32_r;
 
   // The first word. Notice, no 'D'
   {asm (
@@ -29,6 +33,9 @@ int  main()
   : "m" (*(b+4))
   );}
 
+  // MSA registers
+  {asm ("ldi.w %w0,1" : "=f" (v4i32_r));}
+
   printf("%d\n",i);
 
   return 1;
diff --git a/test/CodeGen/mips-target-data.c b/test/CodeGen/mips-target-data.c
index 88eadcb..6475ccb 100644
--- a/test/CodeGen/mips-target-data.c
+++ b/test/CodeGen/mips-target-data.c
@@ -6,6 +6,8 @@
 // RUN: FileCheck %s -check-prefix=64EL
 // RUN: %clang -target mips64-linux-gnu -o - -emit-llvm -S %s |\
 // RUN: FileCheck %s -check-prefix=64EB
+// RUN: %clang -target mipsel-linux-gnu -o - -emit-llvm -S -mfp64 %s |\
+// RUN: FileCheck %s -check-prefix=32EL
 
 // 32EL: e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64
 // 32EB: E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64
diff --git a/test/CodeGen/mips-vector-return.c b/test/CodeGen/mips-vector-return.c
index 0bff969..a7c8ce1 100644
--- a/test/CodeGen/mips-vector-return.c
+++ b/test/CodeGen/mips-vector-return.c
@@ -8,14 +8,14 @@ typedef float  v4sf __attribute__ ((__vector_size__ (16)));
 typedef double v4df __attribute__ ((__vector_size__ (32)));
 typedef int v4i32 __attribute__ ((__vector_size__ (16)));
 
-// O32: define void @test_v4sf(<4 x float>* noalias nocapture sret
+// O32-LABEL: define void @test_v4sf(<4 x float>* noalias nocapture sret
 // N64: define { i64, i64 } @test_v4sf
 v4sf test_v4sf(float a) {
   return (v4sf){0.0f, a, 0.0f, 0.0f};
 }
 
-// O32: define void @test_v4df(<4 x double>* noalias nocapture sret
-// N64: define void @test_v4df(<4 x double>* noalias nocapture sret
+// O32-LABEL: define void @test_v4df(<4 x double>* noalias nocapture sret
+// N64-LABEL: define void @test_v4df(<4 x double>* noalias nocapture sret
 v4df test_v4df(double a) {
   return (v4df){0.0, a, 0.0, 0.0};
 }
diff --git a/test/CodeGen/mips64-class-return.cpp b/test/CodeGen/mips64-class-return.cpp
index 2a786df..a473c13 100644
--- a/test/CodeGen/mips64-class-return.cpp
+++ b/test/CodeGen/mips64-class-return.cpp
@@ -34,12 +34,12 @@ D1 foo2(void) {
   return gd1;
 }
 
-// CHECK: define void @_Z4foo32D2(i64 %a0.coerce0, double %a0.coerce1) 
+// CHECK-LABEL: define void @_Z4foo32D2(i64 %a0.coerce0, double %a0.coerce1) 
 void foo3(D2 a0) {
   gd2 = a0;
 }
 
-// CHECK: define void @_Z4foo42D0(i64 %a0.coerce0, i64 %a0.coerce1)
+// CHECK-LABEL: define void @_Z4foo42D0(i64 %a0.coerce0, i64 %a0.coerce1)
 void foo4(D0 a0) {
   gd0 = a0;
 }
diff --git a/test/CodeGen/mips64-padding-arg.c b/test/CodeGen/mips64-padding-arg.c
index 85dc00c..49a29c1 100644
--- a/test/CodeGen/mips64-padding-arg.c
+++ b/test/CodeGen/mips64-padding-arg.c
@@ -1,5 +1,6 @@
 // RUN: %clang -target mipsel-unknown-linux -O3 -S -o - -emit-llvm %s | FileCheck %s -check-prefix=O32
 // RUN: %clang -target mips64el-unknown-linux -O3 -S -mabi=n64 -o - -emit-llvm %s | FileCheck %s -check-prefix=N64
+// RUN: %clang -target mipsel-unknown-linux -mfp64 -O3 -S -o - -emit-llvm %s | FileCheck %s -check-prefix=O32
 
 typedef struct {
   double d;
@@ -8,7 +9,7 @@ typedef struct {
 
 // Insert padding to ensure arguments of type S0 are aligned to 16-byte boundaries.
 
-// N64: define void @foo1(i32 %a0, i64, double %a1.coerce0, i64 %a1.coerce1, i64 %a1.coerce2, i64 %a1.coerce3, double %a2.coerce0, i64 %a2.coerce1, i64 %a2.coerce2, i64 %a2.coerce3, i32 %b, i64, double %a3.coerce0, i64 %a3.coerce1, i64 %a3.coerce2, i64 %a3.coerce3)
+// N64-LABEL: define void @foo1(i32 %a0, i64, double %a1.coerce0, i64 %a1.coerce1, i64 %a1.coerce2, i64 %a1.coerce3, double %a2.coerce0, i64 %a2.coerce1, i64 %a2.coerce2, i64 %a2.coerce3, i32 %b, i64, double %a3.coerce0, i64 %a3.coerce1, i64 %a3.coerce2, i64 %a3.coerce3)
 // N64: tail call void @foo2(i32 1, i32 2, i32 %a0, i64 undef, double %a1.coerce0, i64 %a1.coerce1, i64 %a1.coerce2, i64 %a1.coerce3, double %a2.coerce0, i64 %a2.coerce1, i64 %a2.coerce2, i64 %a2.coerce3, i32 3, i64 undef, double %a3.coerce0, i64 %a3.coerce1, i64 %a3.coerce2, i64 %a3.coerce3)
 // N64: declare void @foo2(i32, i32, i32, i64, double, i64, i64, i64, double, i64, i64, i64, i32, i64, double, i64, i64, i64)
 
@@ -20,7 +21,7 @@ void foo1(int a0, S0 a1, S0 a2, int b, S0 a3) {
 
 // Insert padding before long double argument.
 //
-// N64: define void @foo3(i32 %a0, i64, fp128 %a1)
+// N64-LABEL: define void @foo3(i32 %a0, i64, fp128 %a1)
 // N64: tail call void @foo4(i32 1, i32 2, i32 %a0, i64 undef, fp128 %a1)
 // N64: declare void @foo4(i32, i32, i32, i64, fp128)
 
@@ -32,7 +33,7 @@ void foo3(int a0, long double a1) {
 
 // Insert padding after hidden argument.
 //
-// N64: define void @foo5(%struct.S0* noalias sret %agg.result, i64, fp128 %a0)
+// N64-LABEL: define void @foo5(%struct.S0* noalias sret %agg.result, i64, fp128 %a0)
 // N64: call void @foo6(%struct.S0* sret %agg.result, i32 1, i32 2, i64 undef, fp128 %a0)
 // N64: declare void @foo6(%struct.S0* sret, i32, i32, i64, fp128)
 
@@ -44,7 +45,7 @@ S0 foo5(long double a0) {
 
 // Do not insert padding if ABI is O32.
 //
-// O32: define void @foo7(float %a0, double %a1)
+// O32-LABEL: define void @foo7(float %a0, double %a1)
 // O32: declare void @foo8(float, double)
 
 extern void foo8(float, double);
@@ -53,3 +54,18 @@ void foo7(float a0, double a1) {
   foo8(a0 + 1.0f, a1 + 2.0);
 }
 
+// O32-LABEL: define void @foo9()
+// O32: declare void @foo10(i32, i32
+
+typedef struct __attribute__((aligned(16))) {
+  int a;
+} S16;
+
+S16 s16;
+
+void foo10(int, S16);
+
+void foo9(void) {
+  foo10(1, s16);
+}
+
diff --git a/test/CodeGen/mmx-inline-asm-error.c b/test/CodeGen/mmx-inline-asm-error.c
new file mode 100644
index 0000000..a639368
--- /dev/null
+++ b/test/CodeGen/mmx-inline-asm-error.c
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -verify -triple x86_64-unknown-unknown -emit-llvm-only %s
+typedef int vec256 __attribute__((ext_vector_type(8)));
+
+vec256 foo(vec256 in) {
+  vec256 out;
+
+  asm("something %0" : : "y"(in)); // expected-error {{invalid type 'vec256' in asm input for constraint 'y'}}
+  asm("something %0" : "=y"(out)); // expected-error {{invalid type 'vec256' in asm input for constraint 'y'}}
+  asm("something %0, %0" : "+y"(out)); // expected-error {{invalid type 'vec256' in asm input for constraint 'y'}}
+
+  return out;
+}
+
diff --git a/test/CodeGen/mrtd.c b/test/CodeGen/mrtd.c
index a40a59a..8fa7cf0 100644
--- a/test/CodeGen/mrtd.c
+++ b/test/CodeGen/mrtd.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -mrtd -triple i386-unknown-freebsd9.0 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -mrtd -triple i386-unknown-unknown -std=c89 -emit-llvm -o - %s | FileCheck %s
 
 void baz(int arg);
 
@@ -14,4 +14,13 @@ void foo(int arg) {
 
 // CHECK: declare x86_stdcallcc void @baz(i32)
 
+void qux(int arg, ...) { }
+// CHECK: define void @qux(i32 %arg, ...)
+
+void quux(int a1, int a2, int a3) {
+  qux(a1, a2, a3);
+}
+// CHECK-LABEL: define x86_stdcallcc void @quux
+// CHECK: call void (i32, ...)* @qux
+
 // CHECK: attributes [[NUW]] = { nounwind{{.*}} }
diff --git a/test/CodeGen/ms-declspecs.c b/test/CodeGen/ms-declspecs.c
index 26bdc58..5dc7787 100644
--- a/test/CodeGen/ms-declspecs.c
+++ b/test/CodeGen/ms-declspecs.c
@@ -1,5 +1,10 @@
 // RUN: %clang_cc1 -triple i386-pc-win32 %s -emit-llvm -fms-compatibility -o - | FileCheck %s
 
+__declspec(selectany) int x1 = 1;
+const __declspec(selectany) int x2 = 2;
+// CHECK: @x1 = weak_odr global i32 1, align 4
+// CHECK: @x2 = weak_odr constant i32 2, align 4
+
 struct __declspec(align(16)) S {
   char x;
 };
diff --git a/test/CodeGen/ms-inline-asm-64.c b/test/CodeGen/ms-inline-asm-64.c
index dd7b9b3..f667708 100644
--- a/test/CodeGen/ms-inline-asm-64.c
+++ b/test/CodeGen/ms-inline-asm-64.c
@@ -1,5 +1,5 @@
 // REQUIRES: x86-64-registered-target
-// RUN: %clang_cc1 %s -triple x86_64-apple-darwin10 -O0 -fasm-blocks -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple x86_64-apple-darwin10 -fasm-blocks -emit-llvm -o - | FileCheck %s
 
 void t1() {
   int var = 10;
diff --git a/test/CodeGen/ms-inline-asm.c b/test/CodeGen/ms-inline-asm.c
index c71a8df..c4486f6 100644
--- a/test/CodeGen/ms-inline-asm.c
+++ b/test/CodeGen/ms-inline-asm.c
@@ -1,5 +1,5 @@
 // REQUIRES: x86-64-registered-target
-// RUN: %clang_cc1 %s -triple i386-apple-darwin10 -O0 -fasm-blocks -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple i386-apple-darwin10 -fasm-blocks -emit-llvm -o - | FileCheck %s
 
 void t1() {
 // CHECK: @t1
diff --git a/test/CodeGen/ms-inline-asm.cpp b/test/CodeGen/ms-inline-asm.cpp
index 8f824f9..64b8558 100644
--- a/test/CodeGen/ms-inline-asm.cpp
+++ b/test/CodeGen/ms-inline-asm.cpp
@@ -1,5 +1,5 @@
 // REQUIRES: x86-64-registered-target
-// RUN: %clang_cc1 -x c++ %s -triple i386-apple-darwin10 -O0 -fasm-blocks -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -x c++ %s -triple i386-apple-darwin10 -fasm-blocks -emit-llvm -o - | FileCheck %s
 
 // rdar://13645930
 
@@ -39,7 +39,7 @@ void t2() {
 // CHECK: call void asm sideeffect inteldialect "mov eax, $0", "r,~{eax},~{dirflag},~{fpsr},~{flags}"(i32** @_ZN3Foo3Bar3ptrE)
 }
 
-// CHECK: define void @_Z2t3v()
+// CHECK-LABEL: define void @_Z2t3v()
 void t3() {
   __asm mov eax, LENGTH Foo::ptr
 // CHECK: call void asm sideeffect inteldialect "mov eax, $$1", "~{eax},~{dirflag},~{fpsr},~{flags}"()
@@ -76,7 +76,7 @@ struct T4 {
   void test();
 };
 
-// CHECK: define void @_ZN2T44testEv(
+// CHECK-LABEL: define void @_ZN2T44testEv(
 void T4::test() {
 // CHECK: [[T0:%.*]] = alloca [[T4:%.*]]*,
 // CHECK: [[THIS:%.*]] = load [[T4]]** [[T0]]
@@ -91,7 +91,7 @@ template <class T> struct T5 {
   template <class U> static T create(U);
   void run();
 };
-// CHECK: define void @_Z5test5v()
+// CHECK-LABEL: define void @_Z5test5v()
 void test5() {
   // CHECK: [[X:%.*]] = alloca i32
   // CHECK: [[Y:%.*]] = alloca i32
@@ -103,3 +103,11 @@ void test5() {
   __asm mov x, eax
   // CHECK: call void asm sideeffect inteldialect "mov dword ptr $0, eax", "=*m,~{dirflag},~{fpsr},~{flags}"(i32* [[X]])
 }
+
+// Just verify this doesn't emit an error.
+void test6() {
+  __asm {
+   a:
+   jmp a
+  }
+}
diff --git a/test/CodeGen/ms_abi.c b/test/CodeGen/ms_abi.c
new file mode 100644
index 0000000..7c5c26f
--- /dev/null
+++ b/test/CodeGen/ms_abi.c
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-freebsd10.0 -emit-llvm < %s | FileCheck -check-prefix=FREEBSD %s
+// RUN: %clang_cc1 -triple x86_64-pc-win32 -emit-llvm < %s | FileCheck -check-prefix=WIN64 %s
+
+void __attribute__((ms_abi)) f1(void);
+void __attribute__((sysv_abi)) f2(void);
+void f3(void) {
+// FREEBSD: define void @f3()
+// WIN64: define void @f3()
+  f1();
+// FREEBSD: call x86_64_win64cc void @f1()
+// WIN64: call void @f1()
+  f2();
+// FREEBSD: call void @f2()
+// WIN64: call x86_64_sysvcc void @f2()
+}
+// FREEBSD: declare x86_64_win64cc void @f1()
+// FREEBSD: declare void @f2()
+// WIN64: declare void @f1()
+// WIN64: declare x86_64_sysvcc void @f2()
+
diff --git a/test/CodeGen/mult-alt-generic.c b/test/CodeGen/mult-alt-generic.c
index 111679e..6c9d789 100644
--- a/test/CodeGen/mult-alt-generic.c
+++ b/test/CodeGen/mult-alt-generic.c
@@ -1,7 +1,6 @@
 // RUN: %clang_cc1 -triple i686 %s -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 -triple x86_64 %s -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 -triple arm %s -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 -triple mblaze %s -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 -triple mips %s -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 -triple mipsel %s -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 -triple powerpc %s -emit-llvm -o - | FileCheck %s
diff --git a/test/CodeGen/no-opt-volatile-memcpy.c b/test/CodeGen/no-opt-volatile-memcpy.c
index 0fab363..718d704 100644
--- a/test/CodeGen/no-opt-volatile-memcpy.c
+++ b/test/CodeGen/no-opt-volatile-memcpy.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -O0 -triple=x86_64-apple-darwin  -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple=x86_64-apple-darwin  -emit-llvm -o - %s | FileCheck %s
 // rdar://11861085
 
 struct s {
@@ -14,7 +14,7 @@ void foo (void) {
   gs = gs;
   ls = gs;
 }
-// CHECK: define void @foo()
+// CHECK-LABEL: define void @foo()
 // CHECK: %[[LS:.*]] = alloca %struct.s, align 4
 // CHECK-NEXT: %[[ZERO:.*]] = bitcast %struct.s* %[[LS]] to i8*
 // CHECK-NEXT: %[[ONE:.*]] = bitcast %struct.s* %[[LS]] to i8*
@@ -34,7 +34,7 @@ void fee (void) {
   s = s;
   s.y = gs;
 }
-// CHECK: define void @fee()
+// CHECK-LABEL: define void @fee()
 // CHECK: call void @llvm.memcpy.{{.*}}(i8* getelementptr inbounds (%struct.s1* @s, i32 0, i32 0, i32 0, i32 0), i8* getelementptr inbounds (%struct.s1* @s, i32 0, i32 0, i32 0, i32 0), i64 132, i32 4, i1 true)
 // CHECK-NEXT: call void @llvm.memcpy.{{.*}}(i8* getelementptr inbounds (%struct.s1* @s, i32 0, i32 0, i32 0, i32 0), i8* getelementptr inbounds (%struct.s* @gs, i32 0, i32 0, i32 0), i64 132, i32 4, i1 true)
 
diff --git a/test/CodeGen/nomathbuiltin.c b/test/CodeGen/nomathbuiltin.c
new file mode 100644
index 0000000..f80cd91
--- /dev/null
+++ b/test/CodeGen/nomathbuiltin.c
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -fno-math-builtin -emit-llvm -o - %s | FileCheck %s
+
+// Check that the -fno-math-builtin option for -cc1 is working properly.
+
+
+double pow(double, double);
+
+double foo(double a, double b) {
+  return pow(a, b);
+// CHECK: call double @pow
+}
+
diff --git a/test/CodeGen/nvptx-abi.c b/test/CodeGen/nvptx-abi.c
new file mode 100644
index 0000000..f846def
--- /dev/null
+++ b/test/CodeGen/nvptx-abi.c
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 -triple nvptx-unknown-unknown -S -o - %s -emit-llvm | FileCheck %s
+// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -S -o - %s -emit-llvm | FileCheck %s
+
+typedef struct float4_s {
+  float x, y, z, w;
+} float4_t;
+
+float4_t my_function(void);
+
+// CHECK-DAG: declare %struct.float4_s @my_function
+
+float bar(void) {
+  float4_t ret;
+// CHECK-DAG: call %struct.float4_s @my_function
+  ret = my_function();
+  return ret.x;
+}
diff --git a/test/CodeGen/nvptx-inlineasm-ptx.c b/test/CodeGen/nvptx-inlineasm-ptx.c
new file mode 100644
index 0000000..0a19123
--- /dev/null
+++ b/test/CodeGen/nvptx-inlineasm-ptx.c
@@ -0,0 +1,40 @@
+// RUN: %clang_cc1 -triple nvptx-unknown-unknown -O3 -S -o - %s -emit-llvm | FileCheck %s
+// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -O3 -S -o - %s -emit-llvm | FileCheck %s
+
+void constraints() {
+  char           c;
+  unsigned char  uc;
+  short          s;
+  unsigned short us;
+  int            i;
+  unsigned int   ui;
+  long           l;
+  unsigned long  ul;
+  float          f;
+  double         d;
+
+  // CHECK: i8 asm sideeffect "mov.b8 $0, $1;", "=c,c"
+  asm volatile ("mov.b8 %0, %1;" : "=c"(c) : "c"(c));
+  // CHECK: i8 asm sideeffect "mov.b8 $0, $1;", "=c,c"
+  asm volatile ("mov.b8 %0, %1;" : "=c"(uc) : "c"(uc));
+
+  // CHECK: i16 asm sideeffect "mov.b16 $0, $1;", "=h,h"
+  asm volatile ("mov.b16 %0, %1;" : "=h"(s) : "h"(s));
+  // CHECK: i16 asm sideeffect "mov.b16 $0, $1;", "=h,h"
+  asm volatile ("mov.b16 %0, %1;" : "=h"(us) : "h"(us));
+
+  // CHECK: i32 asm sideeffect "mov.b32 $0, $1;", "=r,r"
+  asm volatile ("mov.b32 %0, %1;" : "=r"(i) : "r"(i));
+  // CHECK: i32 asm sideeffect "mov.b32 $0, $1;", "=r,r"
+  asm volatile ("mov.b32 %0, %1;" : "=r"(ui) : "r"(ui));
+
+  // CHECK: i64 asm sideeffect "mov.b64 $0, $1;", "=l,l"
+  asm volatile ("mov.b64 %0, %1;" : "=l"(l) : "l"(l));
+  // CHECK: i64 asm sideeffect "mov.b64 $0, $1;", "=l,l"
+  asm volatile ("mov.b64 %0, %1;" : "=l"(ul) : "l"(ul));
+
+  // CHECK: float asm sideeffect "mov.b32 $0, $1;", "=f,f"
+  asm volatile ("mov.b32 %0, %1;" : "=f"(f) : "f"(f));
+  // CHECK: double asm sideeffect "mov.b64 $0, $1;", "=d,d"
+  asm volatile ("mov.b64 %0, %1;" : "=d"(d) : "d"(d));
+}
diff --git a/test/CodeGen/object-size.c b/test/CodeGen/object-size.c
index f6c7db8..5a4dc99 100644
--- a/test/CodeGen/object-size.c
+++ b/test/CodeGen/object-size.c
@@ -13,38 +13,38 @@ char gbuf[63];
 char *gp;
 int gi, gj;
 
-// CHECK: define void @test1
+// CHECK-LABEL: define void @test1
 void test1() {
   // CHECK:     = call i8* @__strcpy_chk(i8* getelementptr inbounds ([63 x i8]* @gbuf, i32 0, i64 4), i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i64 59)
   strcpy(&gbuf[4], "Hi there");
 }
 
-// CHECK: define void @test2
+// CHECK-LABEL: define void @test2
 void test2() {
   // CHECK:     = call i8* @__strcpy_chk(i8* getelementptr inbounds ([63 x i8]* @gbuf, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i64 63)
   strcpy(gbuf, "Hi there");
 }
 
-// CHECK: define void @test3
+// CHECK-LABEL: define void @test3
 void test3() {
   // CHECK:     = call i8* @__strcpy_chk(i8* getelementptr inbounds ([63 x i8]* @gbuf, i64 1, i64 37), i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i64 0)
   strcpy(&gbuf[100], "Hi there");
 }
 
-// CHECK: define void @test4
+// CHECK-LABEL: define void @test4
 void test4() {
   // CHECK:     = call i8* @__strcpy_chk(i8* getelementptr inbounds ([63 x i8]* @gbuf, i32 0, i64 -1), i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i64 0)
   strcpy((char*)(void*)&gbuf[-1], "Hi there");
 }
 
-// CHECK: define void @test5
+// CHECK-LABEL: define void @test5
 void test5() {
   // CHECK:     = load i8** @gp
-  // CHECK-NEXT:= call i64 @llvm.objectsize.i64(i8* %{{.*}}, i1 false)
+  // CHECK-NEXT:= call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
   strcpy(gp, "Hi there");
 }
 
-// CHECK: define void @test6
+// CHECK-LABEL: define void @test6
 void test6() {
   char buf[57];
 
@@ -52,7 +52,7 @@ void test6() {
   strcpy(&buf[4], "Hi there");
 }
 
-// CHECK: define void @test7
+// CHECK-LABEL: define void @test7
 void test7() {
   int i;
   // Ensure we only evaluate the side-effect once.
@@ -62,7 +62,7 @@ void test7() {
   strcpy((++i, gbuf), "Hi there");
 }
 
-// CHECK: define void @test8
+// CHECK-LABEL: define void @test8
 void test8() {
   char *buf[50];
   // CHECK-NOT:   __strcpy_chk
@@ -70,14 +70,14 @@ void test8() {
   strcpy(buf[++gi], "Hi there");
 }
 
-// CHECK: define void @test9
+// CHECK-LABEL: define void @test9
 void test9() {
   // CHECK-NOT:   __strcpy_chk
   // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0))
   strcpy((char *)((++gi) + gj), "Hi there");
 }
 
-// CHECK: define void @test10
+// CHECK-LABEL: define void @test10
 char **p;
 void test10() {
   // CHECK-NOT:   __strcpy_chk
@@ -85,42 +85,42 @@ void test10() {
   strcpy(*(++p), "Hi there");
 }
 
-// CHECK: define void @test11
+// CHECK-LABEL: define void @test11
 void test11() {
   // CHECK-NOT:   __strcpy_chk
   // CHECK:       = call i8* @__inline_strcpy_chk(i8* getelementptr inbounds ([63 x i8]* @gbuf, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0))
   strcpy(gp = gbuf, "Hi there");
 }
 
-// CHECK: define void @test12
+// CHECK-LABEL: define void @test12
 void test12() {
   // CHECK-NOT:   __strcpy_chk
   // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0))
   strcpy(++gp, "Hi there");
 }
 
-// CHECK: define void @test13
+// CHECK-LABEL: define void @test13
 void test13() {
   // CHECK-NOT:   __strcpy_chk
   // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0))
   strcpy(gp++, "Hi there");
 }
 
-// CHECK: define void @test14
+// CHECK-LABEL: define void @test14
 void test14() {
   // CHECK-NOT:   __strcpy_chk
   // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0))
   strcpy(--gp, "Hi there");
 }
 
-// CHECK: define void @test15
+// CHECK-LABEL: define void @test15
 void test15() {
   // CHECK-NOT:   __strcpy_chk
   // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{..*}}, i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0))
   strcpy(gp--, "Hi there");
 }
 
-// CHECK: define void @test16
+// CHECK-LABEL: define void @test16
 void test16() {
   // CHECK-NOT:   __strcpy_chk
   // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0))
diff --git a/test/CodeGen/override-layout.c b/test/CodeGen/override-layout.c
index 99c2cd6..57de8b5 100644
--- a/test/CodeGen/override-layout.c
+++ b/test/CodeGen/override-layout.c
@@ -1,6 +1,6 @@
-// RUN: %clang_cc1 -w -fdump-record-layouts %s 2> %t.layouts
-// RUN: %clang_cc1 -w -fdump-record-layouts-simple %s > %t.before 2>&1
-// RUN: %clang_cc1 -w -DPACKED= -DALIGNED16= -fdump-record-layouts-simple -foverride-record-layout=%t.layouts %s > %t.after 2>&1
+// RUN: %clang_cc1 -w -fdump-record-layouts %s > %t.layouts
+// RUN: %clang_cc1 -w -fdump-record-layouts-simple %s > %t.before
+// RUN: %clang_cc1 -w -DPACKED= -DALIGNED16= -fdump-record-layouts-simple -foverride-record-layout=%t.layouts %s > %t.after
 // RUN: diff %t.before %t.after
 // RUN: FileCheck %s < %t.after
 
@@ -19,24 +19,32 @@ struct X0 {
   int x[6] PACKED; 
 };
 
+void use_X0() { struct X0 x0; x0.x[5] = sizeof(struct X0); };
+
 // CHECK: Type: struct X1
 struct X1 { 
   char x[13]; 
   struct X0 y; 
 } PACKED;
 
+void use_X1() { struct X1 x1; x1.x[5] = sizeof(struct X1); };
+
 // CHECK: Type: struct X2
 struct PACKED X2 {
   short x;
   int y;
 };
 
+void use_X2() { struct X2 x2; x2.y = sizeof(struct X2); };
+
 // CHECK: Type: struct X3
 struct X3 {
   short x PACKED;
   int y;
 };
 
+void use_X3() { struct X3 x3; x3.y = sizeof(struct X3); };
+
 #pragma pack(push,2)
 // CHECK: Type: struct X4
 struct X4 {
@@ -45,18 +53,26 @@ struct X4 {
 };
 #pragma pack(pop)
 
+void use_X4() { struct X4 x4; x4.y = sizeof(struct X4); };
+
 // CHECK: Type: struct X5
 struct PACKED X5 { double a[19];  signed char b; };
 
+void use_X5() { struct X5 x5; x5.b = sizeof(struct X5); };
+
 // CHECK: Type: struct X6
 struct PACKED X6 { long double a; char b; };
 
+void use_X6() { struct X6 x6; x6.b = sizeof(struct X6); };
+
 // CHECK: Type: struct X7
 struct X7 {
         unsigned x;
         unsigned char y;
 } PACKED;
 
+void use_X7() { struct X7 x7; x7.y = x7.x = sizeof(struct X7); }
+
 // CHECK: Type: union X8
 union X8 {
   struct X7 x;
@@ -114,32 +130,6 @@ struct ALIGNED16 X16 {
 };
 
 void use_structs() {
-  struct X0 x0;
-  x0.x[5] = sizeof(struct X0);
-
-  struct X1 x1;
-  x1.x[5] = sizeof(struct X1);
-
-  struct X2 x2;
-  x2.y = sizeof(struct X2);
-
-  struct X3 x3;
-  x3.y = sizeof(struct X3);
-
-  struct X4 x4;
-  x4.y = sizeof(struct X4);
-
-  struct X5 x5;
-  x5.b = sizeof(struct X5);
-
-  struct X6 x6;
-  x6.b = sizeof(struct X6);
-
-  struct X7 x7;
-  typedef int X7array[sizeof(struct X7)];
-  x7.x = sizeof(struct X7);
-  x7.y = x7.x;
-
   union X8 x8;
   typedef int X8array[sizeof(union X8)];
   x8.y = sizeof(union X8);
diff --git a/test/CodeGen/packed-arrays.c b/test/CodeGen/packed-arrays.c
index 0c8bb6c..8e748df 100644
--- a/test/CodeGen/packed-arrays.c
+++ b/test/CodeGen/packed-arrays.c
@@ -51,10 +51,10 @@ int align2_x0 = __alignof(((struct s2*) 0)->x[0]);
 // CHECK: @align3_x0 = global i32 4
 int align3_x0 = __alignof(((struct s3*) 0)->x[0]);
 
-// CHECK: define i32 @f0_a
+// CHECK-LABEL: define i32 @f0_a
 // CHECK:   load i32* %{{.*}}, align 1
 // CHECK: }
-// CHECK: define i32 @f0_b
+// CHECK-LABEL: define i32 @f0_b
 // CHECK:   load i32* %{{.*}}, align 4
 // CHECK: }
 int f0_a(struct s0 *a) {
@@ -66,19 +66,19 @@ int f0_b(struct s0 *a) {
 
 // Note that we are incompatible with GCC on this example.
 // 
-// CHECK: define i32 @f1_a
+// CHECK-LABEL: define i32 @f1_a
 // CHECK:   load i32* %{{.*}}, align 1
 // CHECK: }
-// CHECK: define i32 @f1_b
+// CHECK-LABEL: define i32 @f1_b
 // CHECK:   load i32* %{{.*}}, align 4
 // CHECK: }
 
 // Note that we are incompatible with GCC on this example.
 //
-// CHECK: define i32 @f1_c
+// CHECK-LABEL: define i32 @f1_c
 // CHECK:   load i32* %{{.*}}, align 4
 // CHECK: }
-// CHECK: define i32 @f1_d
+// CHECK-LABEL: define i32 @f1_d
 // CHECK:   load i32* %{{.*}}, align 1
 // CHECK: }
 int f1_a(struct s1 *a) {
@@ -94,16 +94,16 @@ int f1_d(struct s1 *a) {
   return a->z;
 }
 
-// CHECK: define i32 @f2_a
+// CHECK-LABEL: define i32 @f2_a
 // CHECK:   load i32* %{{.*}}, align 1
 // CHECK: }
-// CHECK: define i32 @f2_b
+// CHECK-LABEL: define i32 @f2_b
 // CHECK:   load i32* %{{.*}}, align 4
 // CHECK: }
-// CHECK: define i32 @f2_c
+// CHECK-LABEL: define i32 @f2_c
 // CHECK:   load i32* %{{.*}}, align 1
 // CHECK: }
-// CHECK: define i32 @f2_d
+// CHECK-LABEL: define i32 @f2_d
 // CHECK:   load i32* %{{.*}}, align 1
 // CHECK: }
 int f2_a(struct s2 *a) {
@@ -119,16 +119,16 @@ int f2_d(struct s2 *a) {
   return a->z;
 }
 
-// CHECK: define i32 @f3_a
+// CHECK-LABEL: define i32 @f3_a
 // CHECK:   load i32* %{{.*}}, align 1
 // CHECK: }
-// CHECK: define i32 @f3_b
+// CHECK-LABEL: define i32 @f3_b
 // CHECK:   load i32* %{{.*}}, align 4
 // CHECK: }
-// CHECK: define i32 @f3_c
+// CHECK-LABEL: define i32 @f3_c
 // CHECK:   load i32* %{{.*}}, align 1
 // CHECK: }
-// CHECK: define i32 @f3_d
+// CHECK-LABEL: define i32 @f3_d
 // CHECK:   load i32* %{{.*}}, align 1
 // CHECK: }
 int f3_a(struct s3 *a) {
@@ -146,7 +146,7 @@ int f3_d(struct s3 *a) {
 
 // Verify we don't claim things are overaligned.
 //
-// CHECK: define double @f4
+// CHECK-LABEL: define double @f4
 // CHECK:   load double* {{.*}}, align 8
 // CHECK: }
 extern double g4[5] __attribute__((aligned(16)));
diff --git a/test/CodeGen/packed-nest-unpacked.c b/test/CodeGen/packed-nest-unpacked.c
index 7f486c9..3931741 100644
--- a/test/CodeGen/packed-nest-unpacked.c
+++ b/test/CodeGen/packed-nest-unpacked.c
@@ -28,7 +28,7 @@ void test3(struct X a) {
 // <rdar://problem/10530444>
 void test4() {
   // CHECK: @test4
-  // FIXME: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* bitcast (%struct.X* getelementptr inbounds (%struct.Y* @g, i32 0, i32 1) to i8*), i64 24, i32 1, i1 false)
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* bitcast (%struct.X* getelementptr inbounds (%struct.Y* @g, i32 0, i32 1) to i8*), i64 24, i32 1, i1 false)
   f(g.y);
 }
 
diff --git a/test/CodeGen/packed-structure.c b/test/CodeGen/packed-structure.c
index ffd98db..a915a54 100644
--- a/test/CodeGen/packed-structure.c
+++ b/test/CodeGen/packed-structure.c
@@ -16,7 +16,7 @@ int s0_align_x = __alignof(((struct s0*)0)->x);
 int s0_align_y = __alignof(((struct s0*)0)->y);
 int s0_align   = __alignof(struct s0);
 
-// CHECK-FUNCTIONS: define i32 @s0_load_x
+// CHECK-FUNCTIONS-LABEL: define i32 @s0_load_x
 // CHECK-FUNCTIONS: [[s0_load_x:%.*]] = load i32* {{.*}}, align 4
 // CHECK-FUNCTIONS: ret i32 [[s0_load_x]]
 int s0_load_x(struct s0 *a) { return a->x; }
@@ -24,11 +24,11 @@ int s0_load_x(struct s0 *a) { return a->x; }
 // has changed in llvm-gcc recently, previously both x and y would be loaded
 // with align 1 (in 2363.1 at least).
 //
-// CHECK-FUNCTIONS: define i32 @s0_load_y
+// CHECK-FUNCTIONS-LABEL: define i32 @s0_load_y
 // CHECK-FUNCTIONS: [[s0_load_y:%.*]] = load i32* {{.*}}, align 1
 // CHECK-FUNCTIONS: ret i32 [[s0_load_y]]
 int s0_load_y(struct s0 *a) { return a->y; }
-// CHECK-FUNCTIONS: define void @s0_copy
+// CHECK-FUNCTIONS-LABEL: define void @s0_copy
 // CHECK-FUNCTIONS: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 8, i32 4, i1 false)
 void s0_copy(struct s0 *a, struct s0 *b) { *b = *a; }
 
@@ -46,15 +46,15 @@ int s1_align_x = __alignof(((struct s1*)0)->x);
 int s1_align_y = __alignof(((struct s1*)0)->y);
 int s1_align   = __alignof(struct s1);
 
-// CHECK-FUNCTIONS: define i32 @s1_load_x
+// CHECK-FUNCTIONS-LABEL: define i32 @s1_load_x
 // CHECK-FUNCTIONS: [[s1_load_x:%.*]] = load i32* {{.*}}, align 1
 // CHECK-FUNCTIONS: ret i32 [[s1_load_x]]
 int s1_load_x(struct s1 *a) { return a->x; }
-// CHECK-FUNCTIONS: define i32 @s1_load_y
+// CHECK-FUNCTIONS-LABEL: define i32 @s1_load_y
 // CHECK-FUNCTIONS: [[s1_load_y:%.*]] = load i32* {{.*}}, align 1
 // CHECK-FUNCTIONS: ret i32 [[s1_load_y]]
 int s1_load_y(struct s1 *a) { return a->y; }
-// CHECK-FUNCTIONS: define void @s1_copy
+// CHECK-FUNCTIONS-LABEL: define void @s1_copy
 // CHECK-FUNCTIONS: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 8, i32 1, i1 false)
 void s1_copy(struct s1 *a, struct s1 *b) { *b = *a; }
 
@@ -74,15 +74,15 @@ int s2_align_x = __alignof(((struct s2*)0)->x);
 int s2_align_y = __alignof(((struct s2*)0)->y);
 int s2_align   = __alignof(struct s2);
 
-// CHECK-FUNCTIONS: define i32 @s2_load_x
+// CHECK-FUNCTIONS-LABEL: define i32 @s2_load_x
 // CHECK-FUNCTIONS: [[s2_load_y:%.*]] = load i32* {{.*}}, align 2
 // CHECK-FUNCTIONS: ret i32 [[s2_load_y]]
 int s2_load_x(struct s2 *a) { return a->x; }
-// CHECK-FUNCTIONS: define i32 @s2_load_y
+// CHECK-FUNCTIONS-LABEL: define i32 @s2_load_y
 // CHECK-FUNCTIONS: [[s2_load_y:%.*]] = load i32* {{.*}}, align 2
 // CHECK-FUNCTIONS: ret i32 [[s2_load_y]]
 int s2_load_y(struct s2 *a) { return a->y; }
-// CHECK-FUNCTIONS: define void @s2_copy
+// CHECK-FUNCTIONS-LABEL: define void @s2_copy
 // CHECK-FUNCTIONS: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 8, i32 2, i1 false)
 void s2_copy(struct s2 *a, struct s2 *b) { *b = *a; }
 
@@ -92,7 +92,7 @@ struct __attribute__((packed, aligned)) s3 {
 };
 // CHECK-GLOBAL: @s3_1 = global i32 1
 int s3_1 = __alignof(((struct s3*) 0)->anInt);
-// CHECK-FUNCTIONS: define i32 @test3(
+// CHECK-FUNCTIONS-LABEL: define i32 @test3(
 int test3(struct s3 *ptr) {
   // CHECK-FUNCTIONS:      [[PTR:%.*]] = getelementptr inbounds {{%.*}}* {{%.*}}, i32 0, i32 1
   // CHECK-FUNCTIONS-NEXT: load i32* [[PTR]], align 1
diff --git a/test/CodeGen/ppc64-extend.c b/test/CodeGen/ppc64-extend.c
index 68d28c7..d46b651 100644
--- a/test/CodeGen/ppc64-extend.c
+++ b/test/CodeGen/ppc64-extend.c
@@ -1,5 +1,5 @@
 // REQUIRES: ppc64-registered-target
-// RUN: %clang_cc1 -O0 -triple powerpc64-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple powerpc64-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s
 
 void f1(int x) { return; }
 // CHECK: define void @f1(i32 signext %x) [[NUW:#[0-9]+]]
diff --git a/test/CodeGen/ppc64-struct-onefloat.c b/test/CodeGen/ppc64-struct-onefloat.c
index 4f9e194..e26987f 100644
--- a/test/CodeGen/ppc64-struct-onefloat.c
+++ b/test/CodeGen/ppc64-struct-onefloat.c
@@ -1,5 +1,5 @@
 // REQUIRES: ppc64-registered-target
-// RUN: %clang_cc1 -O0 -triple powerpc64-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple powerpc64-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s
 
 typedef struct s1 { float f; } Sf;
 typedef struct s2 { double d; } Sd;
@@ -8,7 +8,7 @@ typedef struct s5 { Sd ds; } SSd;
 
 void bar(Sf a, Sd b, SSf d, SSd e) {}
 
-// CHECK: define void @bar
+// CHECK-LABEL: define void @bar
 // CHECK:  %a = alloca %struct.s1, align 4
 // CHECK:  %b = alloca %struct.s2, align 8
 // CHECK:  %d = alloca %struct.s4, align 4
@@ -34,7 +34,7 @@ void foo(void)
   bar(p1, p2, p4, p5);
 }
 
-// CHECK: define void @foo
+// CHECK-LABEL: define void @foo
 // CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s1* %p1, i32 0, i32 0
 // CHECK:  %{{[0-9]+}} = load float* %{{[a-zA-Z0-9.]+}}, align 1
 // CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s2* %p2, i32 0, i32 0
diff --git a/test/CodeGen/ppc64-struct-onevect.c b/test/CodeGen/ppc64-struct-onevect.c
new file mode 100644
index 0000000..a5a1232
--- /dev/null
+++ b/test/CodeGen/ppc64-struct-onevect.c
@@ -0,0 +1,13 @@
+// REQUIRES: ppc64-registered-target
+// RUN: %clang_cc1 -O2 -triple powerpc64-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s
+
+typedef float v4sf __attribute__ ((vector_size (16)));
+
+struct s { v4sf v; };
+
+v4sf foo (struct s a) {
+  return a.v;
+}
+
+// CHECK-LABEL: define <4 x float> @foo(<4 x float> inreg %a.coerce)
+// CHECK: ret <4 x float> %a.coerce
diff --git a/test/CodeGen/pr12251.c b/test/CodeGen/pr12251.c
index b017131..ea74cc6 100644
--- a/test/CodeGen/pr12251.c
+++ b/test/CodeGen/pr12251.c
@@ -5,7 +5,7 @@ enum e1 g1(enum e1 *x) {
   return *x;
 }
 
-// CHECK: define i32 @g1
+// CHECK-LABEL: define i32 @g1
 // CHECK: load i32* %x, align 4
 // CHECK-NOT: range
 // CHECK: ret
diff --git a/test/CodeGen/pr2394.c b/test/CodeGen/pr2394.c
index f1091ec..c92e364 100644
--- a/test/CodeGen/pr2394.c
+++ b/test/CodeGen/pr2394.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple i386-apple-darwin10 -emit-llvm -o - | FileCheck %s
 struct __attribute((packed)) x {int a : 24;};
 int a(struct x* g) {
   // CHECK: load i24
diff --git a/test/CodeGen/pr3518.c b/test/CodeGen/pr3518.c
index f96a5aa..ff8d75e 100644
--- a/test/CodeGen/pr3518.c
+++ b/test/CodeGen/pr3518.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -emit-llvm -O0 -o - | FileCheck %s
+// RUN: %clang_cc1 %s -emit-llvm -o - | FileCheck %s
 // PR 3518
 // Some of the objects were coming out as unintialized (external) before 3518
 // was fixed.  Internal names are different between llvm-gcc and clang so they
diff --git a/test/CodeGen/pr4349.c b/test/CodeGen/pr4349.c
index 94b4fbd..0169958 100644
--- a/test/CodeGen/pr4349.c
+++ b/test/CodeGen/pr4349.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -emit-llvm -O0 -o - | FileCheck %s
+// RUN: %clang_cc1 %s -emit-llvm -o - | FileCheck %s
 // PR 4349
 
 union reg
diff --git a/test/CodeGen/pr9614.c b/test/CodeGen/pr9614.c
index 8fdb2f2..53abef1 100644
--- a/test/CodeGen/pr9614.c
+++ b/test/CodeGen/pr9614.c
@@ -18,7 +18,7 @@ void f(void) {
   strrchr_foo("", '.');
 }
 
-// CHECK: define void @f()
+// CHECK-LABEL: define void @f()
 // CHECK: call void @foo()
 // CHECK-NEXT: call void @bar()
 // CHECK-NEXT: call i8* @strrchr(
diff --git a/test/CodeGen/pragma-comment.c b/test/CodeGen/pragma-comment.c
new file mode 100644
index 0000000..30bf7b7
--- /dev/null
+++ b/test/CodeGen/pragma-comment.c
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 %s -triple i686-pc-win32 -fms-extensions -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple x86_64-pc-win32 -fms-extensions -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple i686-pc-linux -fms-extensions -emit-llvm -o - | FileCheck -check-prefix LINUX %s
+
+#pragma comment(lib, "msvcrt.lib")
+#pragma comment(lib, "kernel32")
+#pragma comment(lib, "USER32.LIB")
+
+#define BAR "2"
+#pragma comment(linker," /bar=" BAR)
+
+// CHECK: !llvm.module.flags = !{!0}
+// CHECK: !0 = metadata !{i32 6, metadata !"Linker Options", metadata ![[link_opts:[0-9]+]]}
+// CHECK: ![[link_opts]] = metadata !{metadata ![[msvcrt:[0-9]+]], metadata ![[kernel32:[0-9]+]], metadata ![[USER32:[0-9]+]], metadata ![[bar:[0-9]+]]}
+// CHECK: ![[msvcrt]] = metadata !{metadata !"/DEFAULTLIB:msvcrt.lib"}
+// CHECK: ![[kernel32]] = metadata !{metadata !"/DEFAULTLIB:kernel32.lib"}
+// CHECK: ![[USER32]] = metadata !{metadata !"/DEFAULTLIB:USER32.LIB"}
+// CHECK: ![[bar]] = metadata !{metadata !" /bar=2"}
+
+// LINUX: metadata !{metadata !"-lmsvcrt.lib"}
+// LINUX: metadata !{metadata !"-lkernel32"}
+// LINUX: metadata !{metadata !"-lUSER32.LIB"}
+// LINUX: metadata !{metadata !" /bar=2"}
diff --git a/test/CodeGen/pragma-detect_mismatch.c b/test/CodeGen/pragma-detect_mismatch.c
new file mode 100644
index 0000000..86cc6d8
--- /dev/null
+++ b/test/CodeGen/pragma-detect_mismatch.c
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 %s -triple i686-pc-win32 -fms-extensions -emit-llvm -o - | FileCheck %s
+
+#pragma detect_mismatch("test", "1")
+
+#define BAR "2"
+#pragma detect_mismatch("test2", BAR)
+
+// CHECK: !llvm.module.flags = !{!0}
+// CHECK: !0 = metadata !{i32 6, metadata !"Linker Options", metadata ![[link_opts:[0-9]+]]}
+// CHECK: ![[link_opts]] = metadata !{metadata ![[test:[0-9]+]], metadata ![[test2:[0-9]+]]}
+// CHECK: ![[test]] = metadata !{metadata !"/FAILIFMISMATCH:\22test=1\22"}
+// CHECK: ![[test2]] = metadata !{metadata !"/FAILIFMISMATCH:\22test2=2\22"}
diff --git a/test/CodeGen/pragma-pack-2.c b/test/CodeGen/pragma-pack-2.c
index bfb34d7..1ca3bdf 100644
--- a/test/CodeGen/pragma-pack-2.c
+++ b/test/CodeGen/pragma-pack-2.c
@@ -1,8 +1,8 @@
-// RUN: %clang_cc1 -triple i386-apple-darwin9 %s -emit-llvm -o - | FileCheck -check-prefix X32 %s
+// RUN: %clang_cc1 -triple i386-apple-darwin9 %s -emit-llvm -o - | FileCheck -check-prefix CHECK-X32 %s
 // CHECK-X32: %struct.s0 = type { i64, i64, i32, [12 x i32] }
 // CHECK-X32: %struct.s1 = type { [15 x i32], %struct.s0 }
 
-// RUN: %clang_cc1 -triple x86_64-apple-darwin9 %s -emit-llvm -o - | FileCheck -check-prefix X64 %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin9 %s -emit-llvm -o - | FileCheck -check-prefix CHECK-X64 %s
 // CHECK-X64: %struct.s0 = type <{ i64, i64, i32, [12 x i32] }>
 // CHECK-X64: %struct.s1 = type <{ [15 x i32], %struct.s0 }>
 
diff --git a/test/CodeGen/pragma-pack-3.c b/test/CodeGen/pragma-pack-3.c
index 04b636e..5863174 100644
--- a/test/CodeGen/pragma-pack-3.c
+++ b/test/CodeGen/pragma-pack-3.c
@@ -1,7 +1,7 @@
-// RUN: %clang_cc1 -triple i386-apple-darwin9 %s -emit-llvm -o - | FileCheck -check-prefix X32 %s
+// RUN: %clang_cc1 -triple i386-apple-darwin9 %s -emit-llvm -o - | FileCheck -check-prefix CHECK-X32 %s
 // CHECK-X32: %union.command = type <{ i8*, [2 x i8] }>
 
-// RUN: %clang_cc1 -triple x86_64-apple-darwin9 %s -emit-llvm -o - | FileCheck -check-prefix X64 %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin9 %s -emit-llvm -o - | FileCheck -check-prefix CHECK-X64 %s
 // CHECK-X64: %union.command = type <{ i8*, [2 x i8] }>
 
 // <rdar://problem/7184250>
diff --git a/test/CodeGen/pragma-visibility.c b/test/CodeGen/pragma-visibility.c
index a7fceb3..56e73f3 100644
--- a/test/CodeGen/pragma-visibility.c
+++ b/test/CodeGen/pragma-visibility.c
@@ -17,8 +17,8 @@ int z = 0;
 
 #pragma GCC visibility push(hidden)
 void f() {}
-// CHECK: define hidden void @f
+// CHECK-LABEL: define hidden void @f
 
 __attribute((visibility("default"))) void g();
 void g() {}
-// CHECK: define void @g
+// CHECK-LABEL: define void @g
diff --git a/test/CodeGen/pragma-weak.c b/test/CodeGen/pragma-weak.c
index d4b1b9f..b5d1863 100644
--- a/test/CodeGen/pragma-weak.c
+++ b/test/CodeGen/pragma-weak.c
@@ -5,24 +5,22 @@
 // CHECK: @correct_linkage = weak global
 
 
-// CHECK: @both = alias void ()* @__both
-// CHECK: @both2 = alias void ()* @__both2
-// CHECK: @both3 = alias weak void ()* @__both3
-// CHECK: @a3 = alias weak void ()* @__a3
-// CHECK: @weakvar_alias = alias weak i32* @__weakvar_alias
-// CHECK: @foo = alias weak void ()* @__foo
-// CHECK: @foo2 = alias weak void ()* @__foo2
-// CHECK: @stutter = alias weak void ()* @__stutter
-// CHECK: @stutter2 = alias weak void ()* @__stutter2
-// CHECK: @declfirst = alias weak void ()* @__declfirst
-// CHECK: @declfirstattr = alias weak void ()* @__declfirstattr
-// CHECK: @mix2 = alias weak void ()* @__mix2
-// CHECK: @a1 = alias weak void ()* @__a1
-// CHECK: @xxx = alias weak void ()* @__xxx
+// CHECK-DAG: @both = alias void ()* @__both
+// CHECK-DAG: @both2 = alias void ()* @__both2
+// CHECK-DAG: @weakvar_alias = alias weak i32* @__weakvar_alias
+// CHECK-DAG: @foo = alias weak void ()* @__foo
+// CHECK-DAG: @foo2 = alias weak void ()* @__foo2
+// CHECK-DAG: @stutter = alias weak void ()* @__stutter
+// CHECK-DAG: @stutter2 = alias weak void ()* @__stutter2
+// CHECK-DAG: @declfirst = alias weak void ()* @__declfirst
+// CHECK-DAG: @declfirstattr = alias weak void ()* @__declfirstattr
+// CHECK-DAG: @mix2 = alias weak void ()* @__mix2
+// CHECK-DAG: @a1 = alias weak void ()* @__a1
+// CHECK-DAG: @xxx = alias weak void ()* @__xxx
 
 
 
-// CHECK: define weak void @weakdef()
+// CHECK-LABEL: define weak void @weakdef()
 
 
 #pragma weak weakvar
@@ -42,12 +40,12 @@ int __weakvar_alias;
 
 #pragma weak foo = __foo
 void __foo(void) {}
-// CHECK: define void @__foo()
+// CHECK-LABEL: define void @__foo()
 
 
 void __foo2(void) {}
 #pragma weak foo2 = __foo2
-// CHECK: define void @__foo2()
+// CHECK-LABEL: define void @__foo2()
 
 
 ///// test errors
@@ -69,12 +67,12 @@ typedef int __td2;
 #pragma weak stutter = __stutter
 #pragma weak stutter = __stutter
 void __stutter(void) {}
-// CHECK: define void @__stutter()
+// CHECK-LABEL: define void @__stutter()
 
 void __stutter2(void) {}
 #pragma weak stutter2 = __stutter2
 #pragma weak stutter2 = __stutter2
-// CHECK: define void @__stutter2()
+// CHECK-LABEL: define void @__stutter2()
 
 
 // test decl/pragma weak order
@@ -82,12 +80,12 @@ void __stutter2(void) {}
 void __declfirst(void);
 #pragma weak declfirst = __declfirst
 void __declfirst(void) {}
-// CHECK: define void @__declfirst()
+// CHECK-LABEL: define void @__declfirst()
 
 void __declfirstattr(void) __attribute((noinline));
 #pragma weak declfirstattr = __declfirstattr
 void __declfirstattr(void) {}
-// CHECK: define void @__declfirstattr()
+// CHECK-LABEL: define void @__declfirstattr()
 
 //// test that other attributes are preserved
 
@@ -96,7 +94,7 @@ void __declfirstattr(void) {}
 void mix(void);
 #pragma weak mix
 __attribute((weak)) void mix(void) { }
-// CHECK: define weak void @mix()
+// CHECK-LABEL: define weak void @mix()
 
 // ensure following __attributes are preserved and that only a single
 // alias is generated
@@ -104,7 +102,7 @@ __attribute((weak)) void mix(void) { }
 void __mix2(void) __attribute((noinline));
 void __mix2(void) __attribute((noinline));
 void __mix2(void) {}
-// CHECK: define void @__mix2()
+// CHECK-LABEL: define void @__mix2()
 
 ////////////// test #pragma weak/__attribute combinations
 
@@ -113,7 +111,7 @@ void __mix2(void) {}
 void both(void) __attribute((alias("__both")));
 #pragma weak both = __both
 void __both(void) {}
-// CHECK: define void @__both()
+// CHECK-LABEL: define void @__both()
 
 // if the TARGET is previously declared then whichever aliasing method
 // comes first applies and subsequent aliases are discarded.
@@ -123,13 +121,7 @@ void __both2(void);
 void both2(void) __attribute((alias("__both2"))); // first, wins
 #pragma weak both2 = __both2
 void __both2(void) {}
-// CHECK: define void @__both2()
-
-void __both3(void);
-#pragma weak both3 = __both3 // first, wins
-void both3(void) __attribute((alias("__both3")));
-void __both3(void) {}
-// CHECK: define void @__both3()
+// CHECK-LABEL: define void @__both2()
 
 ///////////// ensure that #pragma weak does not alter existing __attributes()
 
@@ -138,16 +130,8 @@ void __a1(void) __attribute((noinline));
 void __a1(void) {}
 // CHECK: define void @__a1() [[NI:#[0-9]+]]
 
-// attributes introduced BEFORE a combination of #pragma weak and alias()
-// hold...
-void __a3(void) __attribute((noinline));
-#pragma weak a3 = __a3
-void a3(void) __attribute((alias("__a3")));
-void __a3(void) {}
-// CHECK: define void @__a3() [[NI]]
-
 #pragma weak xxx = __xxx
-__attribute((pure,noinline,const,fastcall)) void __xxx(void) { }
+__attribute((pure,noinline,const)) void __xxx(void) { }
 // CHECK: void @__xxx() [[RN:#[0-9]+]]
 
 ///////////// PR10878: Make sure we can call a weak alias
@@ -165,6 +149,28 @@ void PR14046f() {
 }
 // CHECK: declare extern_weak i32 @PR14046e()
 
+// Parse #pragma weak after a label or case statement
+extern int PR16705a(void);
+extern int PR16705b(void);
+extern int PR16705c(void);
+void PR16705f(int a) {
+  switch(a) {
+  case 1:
+#pragma weak PR16705a
+    PR16705a();
+  default:
+#pragma weak PR16705b
+    PR16705b();
+  }
+label:
+  #pragma weak PR16705c
+  PR16705c();
+}
+
+// CHECK: declare extern_weak i32 @PR16705a()
+// CHECK: declare extern_weak i32 @PR16705b()
+// CHECK: declare extern_weak i32 @PR16705c()
+
 
 ///////////// TODO: stuff that still doesn't work
 
@@ -176,7 +182,7 @@ void yyy(void){}
 void zzz(void){}
 #pragma weak yyy
 // NOTE: weak doesn't apply, not before or in same TopLevelDec(!)
-// CHECK: define void @yyy()
+// CHECK-LABEL: define void @yyy()
 
 int correct_linkage;
 
diff --git a/test/CodeGen/predefined-expr.c b/test/CodeGen/predefined-expr.c
index e2826b6..3471dcd 100644
--- a/test/CodeGen/predefined-expr.c
+++ b/test/CodeGen/predefined-expr.c
@@ -6,6 +6,8 @@
 // CHECK: @__PRETTY_FUNCTION__.externFunction = private unnamed_addr constant [22 x i8] c"void externFunction()\00"
 // CHECK: @__func__.privateExternFunction = private unnamed_addr constant [22 x i8] c"privateExternFunction\00"
 // CHECK: @__PRETTY_FUNCTION__.privateExternFunction = private unnamed_addr constant [29 x i8] c"void privateExternFunction()\00"
+// CHECK: @__func__.__captured_stmt = private unnamed_addr constant [25 x i8] c"functionWithCapturedStmt\00"
+// CHECK: @__PRETTY_FUNCTION__.__captured_stmt = private unnamed_addr constant [32 x i8] c"void functionWithCapturedStmt()\00"
 // CHECK: @__func__.staticFunction = private unnamed_addr constant [15 x i8] c"staticFunction\00"
 // CHECK: @__PRETTY_FUNCTION__.staticFunction = private unnamed_addr constant [22 x i8] c"void staticFunction()\00"
 
@@ -29,6 +31,15 @@ __private_extern__ void privateExternFunction() {
   printf("__PRETTY_FUNCTION__ %s\n\n", __PRETTY_FUNCTION__);
 }
 
+void functionWithCapturedStmt() {
+  #pragma clang __debug captured
+  {
+    printf("__func__ %s\n", __func__);
+    printf("__FUNCTION__ %s\n", __FUNCTION__);
+    printf("__PRETTY_FUNCTION__ %s\n\n", __PRETTY_FUNCTION__);
+  }
+}
+
 static void staticFunction() {
   printf("__func__ %s\n", __func__);
   printf("__FUNCTION__ %s\n", __FUNCTION__);
@@ -39,6 +50,7 @@ int main() {
   plainFunction();
   externFunction();
   privateExternFunction();
+  functionWithCapturedStmt();
   staticFunction();
 
   return 0;
diff --git a/test/CodeGen/regparm.c b/test/CodeGen/regparm.c
index 4c3752c..151c58f 100644
--- a/test/CodeGen/regparm.c
+++ b/test/CodeGen/regparm.c
@@ -15,7 +15,7 @@ extern void FASTCALL reduced(char b, double c, foo* d, double e, int f);
 
 // PR7025
 void FASTCALL f1(int i, int j, int k);
-// CHECK: define void @f1(i32 inreg %i, i32 inreg %j, i32 %k)
+// CHECK-LABEL: define void @f1(i32 inreg %i, i32 inreg %j, i32 %k)
 void f1(int i, int j, int k) { }
 
 int
diff --git a/test/CodeGen/sections.c b/test/CodeGen/sections.c
new file mode 100644
index 0000000..7994acf
--- /dev/null
+++ b/test/CodeGen/sections.c
@@ -0,0 +1,28 @@
+// REQUIRES: x86-registered-target
+
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -S -o - < %s | FileCheck %s --check-prefix=PLAIN
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -S -ffunction-sections -fno-function-sections -o - < %s | FileCheck %s --check-prefix=PLAIN
+
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -S -ffunction-sections -o - < %s | FileCheck %s --check-prefix=FUNC_SECT
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -S -fno-function-sections -ffunction-sections -o - < %s | FileCheck %s --check-prefix=FUNC_SECT
+
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -S -fdata-sections -o - < %s | FileCheck %s --check-prefix=DATA_SECT
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -S -fno-data-sections -fdata-sections -o - < %s | FileCheck %s --check-prefix=DATA_SECT
+
+const int hello = 123;
+void world() {}
+
+// PLAIN-NOT: section
+// PLAIN: world:
+// PLAIN: section .rodata,
+// PLAIN: hello:
+
+// FUNC_SECT: section .text.world,
+// FUNC_SECT: world:
+// FUNC_SECT: section .rodata,
+// FUNC_SECT: hello:
+
+// DATA_SECT-NOT: section
+// DATA_SECT: world:
+// DATA_SECT: .section .rodata.hello,
+// DATA_SECT: hello:
diff --git a/test/CodeGen/sha-builtins.c b/test/CodeGen/sha-builtins.c
new file mode 100644
index 0000000..181dba1
--- /dev/null
+++ b/test/CodeGen/sha-builtins.c
@@ -0,0 +1,35 @@
+// RUN: %clang_cc1 %s -O3 -triple=x86_64-unknown-unknown -target-feature +sha -emit-llvm -o - | FileCheck %s
+
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
+
+#include <immintrin.h>
+
+__m128i test_sha1rnds4(__m128i a, __m128i b) {
+  // CHECK: call <4 x i32> @llvm.x86.sha1rnds4
+  return _mm_sha1rnds4_epu32(a, b, 8);
+}
+__m128i test_sha1nexte(__m128i a, __m128i b) {
+  // CHECK: call <4 x i32> @llvm.x86.sha1nexte
+  return _mm_sha1nexte_epu32(a, b);
+}
+__m128i test_sha1msg1(__m128i a, __m128i b) {
+  // CHECK: call <4 x i32> @llvm.x86.sha1msg1
+  return _mm_sha1msg1_epu32(a, b);
+}
+__m128i test_sha1msg2(__m128i a, __m128i b) {
+  // CHECK: call <4 x i32> @llvm.x86.sha1msg2
+  return _mm_sha1msg2_epu32(a, b);
+}
+__m128i test_sha256rnds2(__m128i a, __m128i b, __m128i c) {
+  // CHECK: call <4 x i32> @llvm.x86.sha256rnds2
+  return _mm_sha256rnds2_epu32(a, b, c);
+}
+__m128i test_sha256msg1(__m128i a, __m128i b) {
+  // CHECK: call <4 x i32> @llvm.x86.sha256msg1
+  return _mm_sha256msg1_epu32(a, b);
+}
+__m128i test_sha256msg2(__m128i a, __m128i b) {
+  // CHECK: call <4 x i32> @llvm.x86.sha256msg2
+  return _mm_sha256msg2_epu32(a, b);
+}
diff --git a/test/CodeGen/sparcv9-abi.c b/test/CodeGen/sparcv9-abi.c
new file mode 100644
index 0000000..4ba4be8
--- /dev/null
+++ b/test/CodeGen/sparcv9-abi.c
@@ -0,0 +1,181 @@
+// RUN: %clang_cc1 -triple sparcv9-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+#include <stdarg.h>
+
+// CHECK-LABEL: define void @f_void()
+void f_void(void) {}
+
+// Arguments and return values smaller than the word size are extended.
+
+// CHECK-LABEL: define signext i32 @f_int_1(i32 signext %x)
+int f_int_1(int x) { return x; }
+
+// CHECK-LABEL: define zeroext i32 @f_int_2(i32 zeroext %x)
+unsigned f_int_2(unsigned x) { return x; }
+
+// CHECK-LABEL: define i64 @f_int_3(i64 %x)
+long long f_int_3(long long x) { return x; }
+
+// CHECK-LABEL: define signext i8 @f_int_4(i8 signext %x)
+char f_int_4(char x) { return x; }
+
+// Small structs are passed in registers.
+struct small {
+  int *a, *b;
+};
+
+// CHECK-LABEL: define %struct.small @f_small(i32* %x.coerce0, i32* %x.coerce1)
+struct small f_small(struct small x) {
+  x.a += *x.b;
+  x.b = 0;
+  return x;
+}
+
+// Medium-sized structs are passed indirectly, but can be returned in registers.
+struct medium {
+  int *a, *b;
+  int *c, *d;
+};
+
+// CHECK-LABEL: define %struct.medium @f_medium(%struct.medium* %x)
+struct medium f_medium(struct medium x) {
+  x.a += *x.b;
+  x.b = 0;
+  return x;
+}
+
+// Large structs are also returned indirectly.
+struct large {
+  int *a, *b;
+  int *c, *d;
+  int x;
+};
+
+// CHECK-LABEL: define void @f_large(%struct.large* noalias sret %agg.result, %struct.large* %x)
+struct large f_large(struct large x) {
+  x.a += *x.b;
+  x.b = 0;
+  return x;
+}
+
+// A 64-bit struct fits in a register.
+struct reg {
+  int a, b;
+};
+
+// CHECK-LABEL: define i64 @f_reg(i64 %x.coerce)
+struct reg f_reg(struct reg x) {
+  x.a += x.b;
+  return x;
+}
+
+// Structs with mixed int and float parts require the inreg attribute.
+struct mixed {
+  int a;
+  float b;
+};
+
+// CHECK-LABEL: define inreg %struct.mixed @f_mixed(i32 inreg %x.coerce0, float inreg %x.coerce1)
+struct mixed f_mixed(struct mixed x) {
+  x.a += 1;
+  return x;
+}
+
+// Struct with padding.
+struct mixed2 {
+  int a;
+  double b;
+};
+
+// CHECK: define { i64, double } @f_mixed2(i64 %x.coerce0, double %x.coerce1)
+// CHECK: store i64 %x.coerce0
+// CHECK: store double %x.coerce1
+struct mixed2 f_mixed2(struct mixed2 x) {
+  x.a += 1;
+  return x;
+}
+
+// Struct with single element and padding in passed in the high bits of a
+// register.
+struct tiny {
+  char a;
+};
+
+// CHECK-LABEL: define i64 @f_tiny(i64 %x.coerce)
+// CHECK: %[[HB:[^ ]+]] = lshr i64 %x.coerce, 56
+// CHECK: = trunc i64 %[[HB]] to i8
+struct tiny f_tiny(struct tiny x) {
+  x.a += 1;
+  return x;
+}
+
+// CHECK-LABEL: define void @call_tiny()
+// CHECK: %[[XV:[^ ]+]] = zext i8 %{{[^ ]+}} to i64
+// CHECK: %[[HB:[^ ]+]] = shl i64 %[[XV]], 56
+// CHECK: = call i64 @f_tiny(i64 %[[HB]])
+void call_tiny() {
+  struct tiny x = { 1 };
+  f_tiny(x);
+}
+
+// CHECK-LABEL: define signext i32 @f_variable(i8* %f, ...)
+// CHECK: %ap = alloca i8*
+// CHECK: call void @llvm.va_start
+//
+int f_variable(char *f, ...) {
+  int s = 0;
+  char c;
+  va_list ap;
+  va_start(ap, f);
+  while ((c = *f++)) switch (c) {
+
+// CHECK: %[[CUR:[^ ]+]] = load i8** %ap
+// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr i8* %[[CUR]], i32 8
+// CHECK-DAG: store i8* %[[NXT]], i8** %ap
+// CHECK-DAG: %[[EXT:[^ ]+]] = getelementptr i8* %[[CUR]], i32 4
+// CHECK-DAG: %[[ADR:[^ ]+]] = bitcast i8* %[[EXT]] to i32*
+// CHECK-DAG: load i32* %[[ADR]]
+// CHECK: br
+  case 'i':
+    s += va_arg(ap, int);
+    break;
+
+// CHECK: %[[CUR:[^ ]+]] = load i8** %ap
+// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr i8* %[[CUR]], i32 8
+// CHECK-DAG: store i8* %[[NXT]], i8** %ap
+// CHECK-DAG: %[[ADR:[^ ]+]] = bitcast i8* %[[CUR]] to i64*
+// CHECK-DAG: load i64* %[[ADR]]
+// CHECK: br
+  case 'l':
+    s += va_arg(ap, long);
+    break;
+
+// CHECK: %[[CUR:[^ ]+]] = load i8** %ap
+// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr i8* %[[CUR]], i32 8
+// CHECK-DAG: store i8* %[[NXT]], i8** %ap
+// CHECK-DAG: %[[ADR:[^ ]+]] = bitcast i8* %[[CUR]] to %struct.tiny*
+// CHECK: br
+  case 't':
+    s += va_arg(ap, struct tiny).a;
+    break;
+
+// CHECK: %[[CUR:[^ ]+]] = load i8** %ap
+// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr i8* %[[CUR]], i32 16
+// CHECK-DAG: store i8* %[[NXT]], i8** %ap
+// CHECK-DAG: %[[ADR:[^ ]+]] = bitcast i8* %[[CUR]] to %struct.small*
+// CHECK: br
+  case 's':
+    s += *va_arg(ap, struct small).a;
+    break;
+
+// CHECK: %[[CUR:[^ ]+]] = load i8** %ap
+// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr i8* %[[CUR]], i32 8
+// CHECK-DAG: store i8* %[[NXT]], i8** %ap
+// CHECK-DAG: %[[IND:[^ ]+]] = bitcast i8* %[[CUR]] to %struct.medium**
+// CHECK-DAG: %[[ADR:[^ ]+]] = load %struct.medium** %[[IND]]
+// CHECK: br
+  case 'm':
+    s += *va_arg(ap, struct medium).a;
+    break;
+  }
+  return s;
+}
diff --git a/test/CodeGen/sret.c b/test/CodeGen/sret.c
index ed1f9a4..828bf9b 100644
--- a/test/CodeGen/sret.c
+++ b/test/CodeGen/sret.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -emit-llvm -O0 -o - | grep sret | count 5
+// RUN: %clang_cc1 %s -emit-llvm -o - | grep sret | count 5
 
 struct abc {
  long a;
diff --git a/test/CodeGen/sret2.c b/test/CodeGen/sret2.c
index c96ce4d..3757462 100644
--- a/test/CodeGen/sret2.c
+++ b/test/CodeGen/sret2.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -emit-llvm -O0 -o - | grep sret | count 2
+// RUN: %clang_cc1 %s -emit-llvm -o - | grep sret | count 2
 
 struct abc {
  long a;
diff --git a/test/CodeGen/sse-builtins.c b/test/CodeGen/sse-builtins.c
index 400209f..1f5cb8e 100644
--- a/test/CodeGen/sse-builtins.c
+++ b/test/CodeGen/sse-builtins.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -ffreestanding -triple i386-apple-darwin9 -target-cpu pentium4 -target-feature +sse4.1 -g -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -ffreestanding -triple x86_64-apple-macosx10.8.0 -target-feature +sse4.1 -g -emit-llvm %s -o - | FileCheck %s
 
 #include <xmmintrin.h>
 #include <emmintrin.h>
@@ -63,7 +63,7 @@ __m128 test_load1_ps(void* y) {
 }
 
 void test_store_ss(__m128 x, void* y) {
-  // CHECK: define void @test_store_ss
+  // CHECK-LABEL: define void @test_store_ss
   // CHECK: store {{.*}} float* {{.*}}, align 1,
   _mm_store_ss(y, x);
 }
@@ -99,32 +99,32 @@ __m128d test_loadl_pd(__m128d x, void* y) {
 }
 
 void test_store_sd(__m128d x, void* y) {
-  // CHECK: define void @test_store_sd
+  // CHECK-LABEL: define void @test_store_sd
   // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
   _mm_store_sd(y, x);
 }
 
 void test_store1_pd(__m128d x, void* y) {
-  // CHECK: define void @test_store1_pd
+  // CHECK-LABEL: define void @test_store1_pd
   // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
   // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
   _mm_store1_pd(y, x);
 }
 
 void test_storer_pd(__m128d x, void* y) {
-  // CHECK: define void @test_storer_pd
+  // CHECK-LABEL: define void @test_storer_pd
   // CHECK: store {{.*}} <2 x double>* {{.*}}, align 16{{$}}
   _mm_storer_pd(y, x);
 }
 
 void test_storeh_pd(__m128d x, void* y) {
-  // CHECK: define void @test_storeh_pd
+  // CHECK-LABEL: define void @test_storeh_pd
   // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
   _mm_storeh_pd(y, x);
 }
 
 void test_storel_pd(__m128d x, void* y) {
-  // CHECK: define void @test_storel_pd
+  // CHECK-LABEL: define void @test_storel_pd
   // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
   _mm_storel_pd(y, x);
 }
@@ -184,7 +184,56 @@ __m128d test_mm_round_sd(__m128d x, __m128d y) {
 }
 
 void test_storel_epi64(__m128i x, void* y) {
-  // CHECK: define void @test_storel_epi64
+  // CHECK-LABEL: define void @test_storel_epi64
   // CHECK: store {{.*}} i64* {{.*}}, align 1{{$}}
   _mm_storel_epi64(y, x);
 }
+
+void test_stream_si32(int x, void *y) {
+  // CHECK-LABEL: define void @test_stream_si32
+  // CHECK: store {{.*}} i32* {{.*}}, align 1, !nontemporal
+  _mm_stream_si32(y, x);
+}
+
+void test_stream_si64(long long x, void *y) {
+  // CHECK-LABEL: define void @test_stream_si64
+  // CHECK: store {{.*}} i64* {{.*}}, align 1, !nontemporal
+  _mm_stream_si64(y, x);
+}
+
+void test_stream_si128(__m128i x, void *y) {
+  // CHECK-LABEL: define void @test_stream_si128
+  // CHECK: store {{.*}} <2 x i64>* {{.*}}, align 16, !nontemporal
+  _mm_stream_si128(y, x);
+}
+
+void test_extract_epi16(__m128i __a) {
+  // CHECK-LABEL: define void @test_extract_epi16
+  // CHECK: [[x:%.*]] = and i32 %{{.*}}, 7
+  // CHECK: extractelement <8 x i16> %{{.*}}, i32 [[x]]
+  _mm_extract_epi16(__a, 8);
+}
+
+int test_extract_ps(__m128i __a) {
+  // CHECK-LABEL: @test_extract_ps
+  // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+  return _mm_extract_ps(__a, 4);
+}
+
+int test_extract_epi8(__m128i __a) {
+  // CHECK-LABEL: @test_extract_epi8
+  // CHECK: extractelement <16 x i8> %{{.*}}, i32 0
+  return _mm_extract_epi8(__a, 16);
+}
+
+int test_extract_epi32(__m128i __a) {
+  // CHECK-LABEL: @test_extract_epi32
+  // CHECK: extractelement <4 x i32> %{{.*}}, i32 0
+  return _mm_extract_epi32(__a, 4);
+}
+
+void test_insert_epi32(__m128i __a, int b) {
+  // CHECK-LABEL: @test_insert_epi32
+  // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 0
+   _mm_insert_epi32(__a, b, 4);
+}
diff --git a/test/CodeGen/statements.c b/test/CodeGen/statements.c
index 5affb9a..ad5cb62 100644
--- a/test/CodeGen/statements.c
+++ b/test/CodeGen/statements.c
@@ -1,13 +1,6 @@
 // RUN: %clang_cc1 -Wno-error=return-type %s -emit-llvm-only
 // REQUIRES: LP64
 
-void test1(int x) {
-switch (x) {
-case 111111111111111111111111111111111111111:
-bar();
-}
-}
-
 // Mismatched type between return and function result.
 int test2() { return; }
 void test3() { return 4; }
diff --git a/test/CodeGen/static-order.c b/test/CodeGen/static-order.c
index e7f9814..58aabbe 100644
--- a/test/CodeGen/static-order.c
+++ b/test/CodeGen/static-order.c
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -emit-llvm -o - %s | FileCheck %s
 // CHECK: ModuleID
 // CHECK-NOT: zeroinitializer
-// CHECK: define i8* @f
+// CHECK-LABEL: define i8* @f
 
 struct s {
     int a;
diff --git a/test/CodeGen/stdcall-fastcall.c b/test/CodeGen/stdcall-fastcall.c
index d518178..b6217ac 100644
--- a/test/CodeGen/stdcall-fastcall.c
+++ b/test/CodeGen/stdcall-fastcall.c
@@ -4,17 +4,17 @@ void __attribute__((fastcall)) f1(void);
 void __attribute__((stdcall)) f2(void);
 void __attribute__((thiscall)) f3(void);
 void __attribute__((fastcall)) f4(void) {
-// CHECK: define x86_fastcallcc void @f4()
+// CHECK-LABEL: define x86_fastcallcc void @f4()
   f1();
 // CHECK: call x86_fastcallcc void @f1()
 }
 void __attribute__((stdcall)) f5(void) {
-// CHECK: define x86_stdcallcc void @f5()
+// CHECK-LABEL: define x86_stdcallcc void @f5()
   f2();
 // CHECK: call x86_stdcallcc void @f2()
 }
 void __attribute__((thiscall)) f6(void) {
-// CHECK: define x86_thiscallcc void @f6()
+// CHECK-LABEL: define x86_thiscallcc void @f6()
   f3();
 // CHECK: call x86_thiscallcc void @f3()
 }
@@ -51,7 +51,7 @@ void f8(void) {
 
 void __attribute__((fastcall)) foo1(int y);
 void bar1(int y) {
-  // CHECK: define void @bar1
+  // CHECK-LABEL: define void @bar1
   // CHECK: call x86_fastcallcc void @foo1(i32 inreg %
   foo1(y);
 }
@@ -61,14 +61,14 @@ struct S1 {
 };
 void __attribute__((fastcall)) foo2(struct S1 y);
 void bar2(struct S1 y) {
-  // CHECK: define void @bar2
+  // CHECK-LABEL: define void @bar2
   // CHECK: call x86_fastcallcc void @foo2(i32 inreg undef, i32 %
   foo2(y);
 }
 
 void __attribute__((fastcall)) foo3(int *y);
 void bar3(int *y) {
-  // CHECK: define void @bar3
+  // CHECK-LABEL: define void @bar3
   // CHECK: call x86_fastcallcc void @foo3(i32* inreg %
   foo3(y);
 }
@@ -76,7 +76,7 @@ void bar3(int *y) {
 enum Enum {Eval};
 void __attribute__((fastcall)) foo4(enum Enum y);
 void bar4(enum Enum y) {
-  // CHECK: define void @bar4
+  // CHECK-LABEL: define void @bar4
   // CHECK: call x86_fastcallcc void @foo4(i32 inreg %
   foo4(y);
 }
@@ -88,49 +88,49 @@ struct S2 {
 };
 void __attribute__((fastcall)) foo5(struct S2 y);
 void bar5(struct S2 y) {
-  // CHECK: define void @bar5
+  // CHECK-LABEL: define void @bar5
   // CHECK: call x86_fastcallcc void @foo5(%struct.S2* byval align 4 %
   foo5(y);
 }
 
 void __attribute__((fastcall)) foo6(long long y);
 void bar6(long long y) {
-  // CHECK: define void @bar6
+  // CHECK-LABEL: define void @bar6
   // CHECK: call x86_fastcallcc void @foo6(i64 %
   foo6(y);
 }
 
 void __attribute__((fastcall)) foo7(int a, struct S1 b, int c);
 void bar7(int a, struct S1 b, int c) {
-  // CHECK: define void @bar7
+  // CHECK-LABEL: define void @bar7
   // CHECK: call x86_fastcallcc void @foo7(i32 inreg %{{.*}}, i32 %{{.*}}, i32 %{{.*}}
   foo7(a, b, c);
 }
 
 void __attribute__((fastcall)) foo8(struct S1 a, int b);
 void bar8(struct S1 a, int b) {
-  // CHECK: define void @bar8
+  // CHECK-LABEL: define void @bar8
   // CHECK: call x86_fastcallcc void @foo8(i32 inreg undef, i32 %{{.*}}, i32 inreg %
   foo8(a, b);
 }
 
 void __attribute__((fastcall)) foo9(struct S2 a, int b);
 void bar9(struct S2 a, int b) {
-  // CHECK: define void @bar9
+  // CHECK-LABEL: define void @bar9
   // CHECK: call x86_fastcallcc void @foo9(%struct.S2* byval align 4 %{{.*}}, i32 %
   foo9(a, b);
 }
 
 void __attribute__((fastcall)) foo10(float y, int x);
 void bar10(float y, int x) {
-  // CHECK: define void @bar10
+  // CHECK-LABEL: define void @bar10
   // CHECK: call x86_fastcallcc void @foo10(float %{{.*}}, i32 inreg %
   foo10(y, x);
 }
 
 void __attribute__((fastcall)) foo11(double y, int x);
 void bar11(double y, int x) {
-  // CHECK: define void @bar11
+  // CHECK-LABEL: define void @bar11
   // CHECK: call x86_fastcallcc void @foo11(double %{{.*}}, i32 inreg %
   foo11(y, x);
 }
@@ -140,7 +140,7 @@ struct S3 {
 };
 void __attribute__((fastcall)) foo12(struct S3 y, int x);
 void bar12(struct S3 y, int x) {
-  // CHECK: define void @bar12
+  // CHECK-LABEL: define void @bar12
   // CHECK: call x86_fastcallcc void @foo12(float %{{.*}}, i32 inreg %
   foo12(y, x);
 }
diff --git a/test/CodeGen/string-literal-unicode-conversion.c b/test/CodeGen/string-literal-unicode-conversion.c
index 3e5b7fb..23205b8 100644
--- a/test/CodeGen/string-literal-unicode-conversion.c
+++ b/test/CodeGen/string-literal-unicode-conversion.c
@@ -1,6 +1,6 @@
-// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=C %s
-// RUN: %clang_cc1 -x c++ -std=c++0x -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=CPP0X %s
-// RUN: %clang_cc1 -x c++ -std=c++0x -fshort-wchar -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=SHORTWCHAR %s
+// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-C %s
+// RUN: %clang_cc1 -x c++ -std=c++0x -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-CPP0X %s
+// RUN: %clang_cc1 -x c++ -std=c++0x -fshort-wchar -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-SHORTWCHAR %s
 
 // This file contains a mix of ISO-8859-1 and UTF-8 encoded data.
 // the literal assigned to 'aa' should be the ISO-8859-1 encoding for the code
@@ -31,10 +31,11 @@ void f() {
   wchar_t const *b = L"Кошка";
 
   // CHECK-C: private unnamed_addr constant [4 x i32] [i32 20320, i32 22909, i32 66304, i32 0], align 4
-  // CHECK-SHORTWCHAR: private unnamed_addr constant [4 x i16] [i16 20320, i16 22909, i16 768, i16 0], align 2
   // CHECK-CPP0X: private unnamed_addr constant [4 x i32] [i32 20320, i32 22909, i32 66304, i32 0], align 4
+#if __WCHAR_MAX__ == 2147483647
   wchar_t const *b2 = L"\x4f60\x597d\x10300";
-  
+#endif
+
 #if __cplusplus >= 201103L
   
   // CHECK-CPP0X: private unnamed_addr constant [12 x i8] c"1\D0\9A\D0\BE\D1\88\D0\BA\D0\B0\00", align 1
diff --git a/test/CodeGen/struct-init.c b/test/CodeGen/struct-init.c
index 5273138..30834ac 100644
--- a/test/CodeGen/struct-init.c
+++ b/test/CodeGen/struct-init.c
@@ -1,5 +1,5 @@
 // REQUIRES: arm-registered-target
-// RUN: %clang_cc1 -S -triple armv7-apple-darwin %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -S -triple armv7-apple-darwin -target-feature +neon %s -emit-llvm -o - | FileCheck %s
 
 typedef struct _zend_ini_entry zend_ini_entry;
 struct _zend_ini_entry {
diff --git a/test/CodeGen/struct-matching-constraint.c b/test/CodeGen/struct-matching-constraint.c
index bdd11c8..dfc3014 100644
--- a/test/CodeGen/struct-matching-constraint.c
+++ b/test/CodeGen/struct-matching-constraint.c
@@ -1,5 +1,5 @@
 // REQUIRES: arm-registered-target
-// RUN: %clang_cc1 -S -emit-llvm -triple armv7a-apple-darwin %s -o /dev/null
+// RUN: %clang_cc1 -S -emit-llvm -triple armv7a-apple-darwin -target-feature +neon %s -o /dev/null
 typedef unsigned short uint16_t;
 typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t;
 
diff --git a/test/CodeGen/switch-dce.c b/test/CodeGen/switch-dce.c
index a18d3bc..5a68ff2 100644
--- a/test/CodeGen/switch-dce.c
+++ b/test/CodeGen/switch-dce.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple i386-unknown-unknown -O0 %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple i386-unknown-unknown %s -emit-llvm -o - | FileCheck %s
 
 // PR9322 and rdar://6970405
 
diff --git a/test/CodeGen/switch.c b/test/CodeGen/switch.c
index 8b94a09..2417a87 100644
--- a/test/CodeGen/switch.c
+++ b/test/CodeGen/switch.c
@@ -63,7 +63,7 @@ static int foo4(int i) {
   return j;
 }
 
-// CHECK: define i32 @foo4t()
+// CHECK-LABEL: define i32 @foo4t()
 // CHECK: ret i32 376
 // CHECK: }
 int foo4t() {
@@ -71,7 +71,7 @@ int foo4t() {
   return foo4(111) + foo4(99) + foo4(222) + foo4(601);
 }
 
-// CHECK: define void @foo5()
+// CHECK-LABEL: define void @foo5()
 // CHECK-NOT: switch
 // CHECK: }
 void foo5(){
@@ -83,7 +83,7 @@ void foo5(){
     }
 }
 
-// CHECK: define void @foo6()
+// CHECK-LABEL: define void @foo6()
 // CHECK-NOT: switch
 // CHECK: }
 void foo6(){
@@ -91,7 +91,7 @@ void foo6(){
     }
 }
 
-// CHECK: define void @foo7()
+// CHECK-LABEL: define void @foo7()
 // CHECK-NOT: switch
 // CHECK: }
 void foo7(){
@@ -101,7 +101,7 @@ void foo7(){
 }
 
 
-// CHECK: define i32 @f8(
+// CHECK-LABEL: define i32 @f8(
 // CHECK: ret i32 3
 // CHECK: }
 int f8(unsigned x) {
@@ -115,7 +115,7 @@ int f8(unsigned x) {
 
 // Ensure that default after a case range is not ignored.
 //
-// CHECK: define i32 @f9()
+// CHECK-LABEL: define i32 @f9()
 // CHECK: ret i32 10
 // CHECK: }
 static int f9_0(unsigned x) {
@@ -134,7 +134,7 @@ int f9() {
 // miscompilation of fallthrough from default to a (tested) case
 // range.
 //
-// CHECK: define i32 @f10()
+// CHECK-LABEL: define i32 @f10()
 // CHECK: ret i32 10
 // CHECK: }
 static int f10_0(unsigned x) {
@@ -153,7 +153,7 @@ int f10() {
 
 // This generated incorrect code because of poor switch chaining.
 //
-// CHECK: define i32 @f11(
+// CHECK-LABEL: define i32 @f11(
 // CHECK: ret i32 3
 // CHECK: }
 int f11(int x) {
@@ -167,7 +167,7 @@ int f11(int x) {
 
 // This just asserted because of the way case ranges were calculated.
 //
-// CHECK: define i32 @f12(
+// CHECK-LABEL: define i32 @f12(
 // CHECK: ret i32 3
 // CHECK: }
 int f12(int x) {
@@ -181,7 +181,7 @@ int f12(int x) {
 
 // Make sure return is not constant (if empty range is skipped or miscompiled)
 //
-// CHECK: define i32 @f13(
+// CHECK-LABEL: define i32 @f13(
 // CHECK: ret i32 %
 // CHECK: }
 int f13(unsigned x) {
diff --git a/test/CodeGen/systemz-inline-asm.c b/test/CodeGen/systemz-inline-asm.c
index 8e5854f..c937233 100644
--- a/test/CodeGen/systemz-inline-asm.c
+++ b/test/CodeGen/systemz-inline-asm.c
@@ -5,31 +5,31 @@ unsigned long gl;
 
 void test_store_m(unsigned int i) {
   asm("st %1, %0" : "=m" (gi) : "r" (i));
-// CHECK: define void @test_store_m(i32 zeroext %i)
+// CHECK-LABEL: define void @test_store_m(i32 zeroext %i)
 // CHECK: call void asm "st $1, $0", "=*m,r"(i32* @gi, i32 %i)
 }
 
 void test_store_Q(unsigned int i) {
   asm("st %1, %0" : "=Q" (gi) : "r" (i));
-// CHECK: define void @test_store_Q(i32 zeroext %i)
+// CHECK-LABEL: define void @test_store_Q(i32 zeroext %i)
 // CHECK: call void asm "st $1, $0", "=*Q,r"(i32* @gi, i32 %i)
 }
 
 void test_store_R(unsigned int i) {
   asm("st %1, %0" : "=R" (gi) : "r" (i));
-// CHECK: define void @test_store_R(i32 zeroext %i)
+// CHECK-LABEL: define void @test_store_R(i32 zeroext %i)
 // CHECK: call void asm "st $1, $0", "=*R,r"(i32* @gi, i32 %i)
 }
 
 void test_store_S(unsigned int i) {
   asm("st %1, %0" : "=S" (gi) : "r" (i));
-// CHECK: define void @test_store_S(i32 zeroext %i)
+// CHECK-LABEL: define void @test_store_S(i32 zeroext %i)
 // CHECK: call void asm "st $1, $0", "=*S,r"(i32* @gi, i32 %i)
 }
 
 void test_store_T(unsigned int i) {
   asm("st %1, %0" : "=T" (gi) : "r" (i));
-// CHECK: define void @test_store_T(i32 zeroext %i)
+// CHECK-LABEL: define void @test_store_T(i32 zeroext %i)
 // CHECK: call void asm "st $1, $0", "=*T,r"(i32* @gi, i32 %i)
 }
 
@@ -37,7 +37,7 @@ int test_load_m() {
   unsigned int i;
   asm("l %0, %1" : "=r" (i) : "m" (gi));
   return i;
-// CHECK: define signext i32 @test_load_m()
+// CHECK-LABEL: define signext i32 @test_load_m()
 // CHECK: call i32 asm "l $0, $1", "=r,*m"(i32* @gi)
 }
 
@@ -45,7 +45,7 @@ int test_load_Q() {
   unsigned int i;
   asm("l %0, %1" : "=r" (i) : "Q" (gi));
   return i;
-// CHECK: define signext i32 @test_load_Q()
+// CHECK-LABEL: define signext i32 @test_load_Q()
 // CHECK: call i32 asm "l $0, $1", "=r,*Q"(i32* @gi)
 }
 
@@ -53,7 +53,7 @@ int test_load_R() {
   unsigned int i;
   asm("l %0, %1" : "=r" (i) : "R" (gi));
   return i;
-// CHECK: define signext i32 @test_load_R()
+// CHECK-LABEL: define signext i32 @test_load_R()
 // CHECK: call i32 asm "l $0, $1", "=r,*R"(i32* @gi)
 }
 
@@ -61,7 +61,7 @@ int test_load_S() {
   unsigned int i;
   asm("l %0, %1" : "=r" (i) : "S" (gi));
   return i;
-// CHECK: define signext i32 @test_load_S()
+// CHECK-LABEL: define signext i32 @test_load_S()
 // CHECK: call i32 asm "l $0, $1", "=r,*S"(i32* @gi)
 }
 
@@ -69,61 +69,61 @@ int test_load_T() {
   unsigned int i;
   asm("l %0, %1" : "=r" (i) : "T" (gi));
   return i;
-// CHECK: define signext i32 @test_load_T()
+// CHECK-LABEL: define signext i32 @test_load_T()
 // CHECK: call i32 asm "l $0, $1", "=r,*T"(i32* @gi)
 }
 
 void test_mI(unsigned char *c) {
   asm volatile("cli %0, %1" :: "Q" (*c), "I" (100));
-// CHECK: define void @test_mI(i8* %c)
+// CHECK-LABEL: define void @test_mI(i8* %c)
 // CHECK: call void asm sideeffect "cli $0, $1", "*Q,I"(i8* %c, i32 100)
 }
 
 unsigned int test_dJa(unsigned int i, unsigned int j) {
   asm("sll %0, %2(%3)" : "=d" (i) : "0" (i), "J" (1000), "a" (j));
   return i;
-// CHECK: define zeroext i32 @test_dJa(i32 zeroext %i, i32 zeroext %j)
+// CHECK-LABEL: define zeroext i32 @test_dJa(i32 zeroext %i, i32 zeroext %j)
 // CHECK: call i32 asm "sll $0, $2($3)", "=d,0,J,a"(i32 %i, i32 1000, i32 %j)
 }
 
 unsigned long test_rK(unsigned long i) {
   asm("aghi %0, %2" : "=r" (i) : "0" (i), "K" (-30000));
   return i;
-// CHECK: define i64 @test_rK(i64 %i)
+// CHECK-LABEL: define i64 @test_rK(i64 %i)
 // CHECK: call i64 asm "aghi $0, $2", "=r,0,K"(i64 %i, i32 -30000)
 }
 
 unsigned long test_rL(unsigned long i) {
   asm("sllg %0, %1, %2" : "=r" (i) : "r" (i), "L" (500000));
   return i;
-// CHECK: define i64 @test_rL(i64 %i)
+// CHECK-LABEL: define i64 @test_rL(i64 %i)
 // CHECK: call i64 asm "sllg $0, $1, $2", "=r,r,L"(i64 %i, i32 500000)
 }
 
 void test_M() {
   asm volatile("#FOO %0" :: "M"(0x7fffffff));
-// CHECK: define void @test_M()
+// CHECK-LABEL: define void @test_M()
 // CHECK: call void asm sideeffect "#FOO $0", "M"(i32 2147483647)
 }
 
 float test_f32(float f, float g) {
   asm("aebr %0, %2" : "=f" (f) : "0" (f), "f" (g));
   return f;
-// CHECK: define float @test_f32(float %f, float %g)
+// CHECK-LABEL: define float @test_f32(float %f, float %g)
 // CHECK: call float asm "aebr $0, $2", "=f,0,f"(float %f, float %g)
 }
 
 double test_f64(double f, double g) {
   asm("adbr %0, %2" : "=f" (f) : "0" (f), "f" (g));
   return f;
-// CHECK: define double @test_f64(double %f, double %g)
+// CHECK-LABEL: define double @test_f64(double %f, double %g)
 // CHECK: call double asm "adbr $0, $2", "=f,0,f"(double %f, double %g)
 }
 
 long double test_f128(long double f, long double g) {
   asm("axbr %0, %2" : "=f" (f) : "0" (f), "f" (g));
   return f;
-// CHECK: define void @test_f128(fp128* noalias nocapture sret [[DEST:%.*]], fp128* byval nocapture, fp128* byval nocapture)
+// CHECK: define void @test_f128(fp128* noalias nocapture sret [[DEST:%.*]], fp128* nocapture readonly, fp128* nocapture readonly)
 // CHECK: %f = load fp128* %0
 // CHECK: %g = load fp128* %1
 // CHECK: [[RESULT:%.*]] = tail call fp128 asm "axbr $0, $2", "=f,0,f"(fp128 %f, fp128 %g)
diff --git a/test/CodeGen/tbaa-class.cpp b/test/CodeGen/tbaa-class.cpp
index 967ba19..bdd155d 100644
--- a/test/CodeGen/tbaa-class.cpp
+++ b/test/CodeGen/tbaa-class.cpp
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin -O1 -disable-llvm-optzns %s -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 -triple x86_64-apple-darwin -O1 -struct-path-tbaa -disable-llvm-optzns %s -emit-llvm -o - | FileCheck %s -check-prefix=PATH
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -O1 -no-struct-path-tbaa -disable-llvm-optzns %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -O1 -disable-llvm-optzns %s -emit-llvm -o - | FileCheck %s -check-prefix=PATH
 // Test TBAA metadata generated by front-end.
 
 typedef unsigned char uint8_t;
@@ -52,8 +52,8 @@ public:
 
 uint32_t g(uint32_t *s, StructA *A, uint64_t count) {
 // CHECK: define i32 @{{.*}}(
-// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa !4
-// CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa !4
+// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32:!.*]]
+// CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // PATH: define i32 @{{.*}}(
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32:!.*]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32:!.*]]
@@ -64,8 +64,8 @@ uint32_t g(uint32_t *s, StructA *A, uint64_t count) {
 
 uint32_t g2(uint32_t *s, StructA *A, uint64_t count) {
 // CHECK: define i32 @{{.*}}(
-// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa !4
-// CHECK: store i16 4, i16* %{{.*}}, align 2, !tbaa !5
+// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
+// CHECK: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_i16:!.*]]
 // PATH: define i32 @{{.*}}(
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // PATH: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_A_f16:!.*]]
@@ -76,8 +76,8 @@ uint32_t g2(uint32_t *s, StructA *A, uint64_t count) {
 
 uint32_t g3(StructA *A, StructB *B, uint64_t count) {
 // CHECK: define i32 @{{.*}}(
-// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa !4
-// CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa !4
+// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
+// CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // PATH: define i32 @{{.*}}(
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_B_a_f32:!.*]]
@@ -88,8 +88,8 @@ uint32_t g3(StructA *A, StructB *B, uint64_t count) {
 
 uint32_t g4(StructA *A, StructB *B, uint64_t count) {
 // CHECK: define i32 @{{.*}}(
-// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa !4
-// CHECK: store i16 4, i16* %{{.*}}, align 2, !tbaa !5
+// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
+// CHECK: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_i16]]
 // PATH: define i32 @{{.*}}(
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
 // PATH: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_B_a_f16:!.*]]
@@ -100,8 +100,8 @@ uint32_t g4(StructA *A, StructB *B, uint64_t count) {
 
 uint32_t g5(StructA *A, StructB *B, uint64_t count) {
 // CHECK: define i32 @{{.*}}(
-// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa !4
-// CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa !4
+// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
+// CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // PATH: define i32 @{{.*}}(
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_B_f32:!.*]]
@@ -112,8 +112,8 @@ uint32_t g5(StructA *A, StructB *B, uint64_t count) {
 
 uint32_t g6(StructA *A, StructB *B, uint64_t count) {
 // CHECK: define i32 @{{.*}}(
-// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa !4
-// CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa !4
+// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
+// CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // PATH: define i32 @{{.*}}(
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_B_a_f32_2:!.*]]
@@ -124,8 +124,8 @@ uint32_t g6(StructA *A, StructB *B, uint64_t count) {
 
 uint32_t g7(StructA *A, StructS *S, uint64_t count) {
 // CHECK: define i32 @{{.*}}(
-// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa !4
-// CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa !4
+// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
+// CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // PATH: define i32 @{{.*}}(
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_S_f32:!.*]]
@@ -136,8 +136,8 @@ uint32_t g7(StructA *A, StructS *S, uint64_t count) {
 
 uint32_t g8(StructA *A, StructS *S, uint64_t count) {
 // CHECK: define i32 @{{.*}}(
-// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa !4
-// CHECK: store i16 4, i16* %{{.*}}, align 2, !tbaa !5
+// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
+// CHECK: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_i16]]
 // PATH: define i32 @{{.*}}(
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
 // PATH: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_S_f16:!.*]]
@@ -148,8 +148,8 @@ uint32_t g8(StructA *A, StructS *S, uint64_t count) {
 
 uint32_t g9(StructS *S, StructS2 *S2, uint64_t count) {
 // CHECK: define i32 @{{.*}}(
-// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa !4
-// CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa !4
+// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
+// CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // PATH: define i32 @{{.*}}(
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_S_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_S_f32:!.*]]
@@ -160,8 +160,8 @@ uint32_t g9(StructS *S, StructS2 *S2, uint64_t count) {
 
 uint32_t g10(StructS *S, StructS2 *S2, uint64_t count) {
 // CHECK: define i32 @{{.*}}(
-// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa !4
-// CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa !4
+// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
+// CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // PATH: define i32 @{{.*}}(
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_S_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_S2_f32_2:!.*]]
@@ -172,8 +172,8 @@ uint32_t g10(StructS *S, StructS2 *S2, uint64_t count) {
 
 uint32_t g11(StructC *C, StructD *D, uint64_t count) {
 // CHECK: define i32 @{{.*}}(
-// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa !4
-// CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa !4
+// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
+// CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // PATH: define i32 @{{.*}}(
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_C_b_a_f32:!.*]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_D_b_a_f32:!.*]]
@@ -184,8 +184,8 @@ uint32_t g11(StructC *C, StructD *D, uint64_t count) {
 
 uint32_t g12(StructC *C, StructD *D, uint64_t count) {
 // CHECK: define i32 @{{.*}}(
-// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa !4
-// CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa !4
+// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
+// CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // TODO: differentiate the two accesses.
 // PATH: define i32 @{{.*}}(
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_B_a_f32]]
@@ -198,12 +198,14 @@ uint32_t g12(StructC *C, StructD *D, uint64_t count) {
   return b1->a.f32;
 }
 
-// CHECK: !1 = metadata !{metadata !"omnipotent char", metadata !2}
-// CHECK: !2 = metadata !{metadata !"Simple C/C++ TBAA"}
-// CHECK: !4 = metadata !{metadata !"int", metadata !1}
-// CHECK: !5 = metadata !{metadata !"short", metadata !1}
+// CHECK: [[TYPE_char:!.*]] = metadata !{metadata !"omnipotent char", metadata [[TAG_cxx_tbaa:!.*]],
+// CHECK: [[TAG_cxx_tbaa]] = metadata !{metadata !"Simple C/C++ TBAA"}
+// CHECK: [[TAG_i32]] = metadata !{metadata [[TYPE_i32:!.*]], metadata [[TYPE_i32]], i64 0}
+// CHECK: [[TYPE_i32]] = metadata !{metadata !"int", metadata [[TYPE_char]],
+// CHECK: [[TAG_i16]] = metadata !{metadata [[TYPE_i16:!.*]], metadata [[TYPE_i16]], i64 0}
+// CHECK: [[TYPE_i16]] = metadata !{metadata !"short", metadata [[TYPE_char]],
 
-// PATH: [[TYPE_CHAR:!.*]] = metadata !{metadata !"omnipotent char", metadata !3
+// PATH: [[TYPE_CHAR:!.*]] = metadata !{metadata !"omnipotent char", metadata
 // PATH: [[TAG_i32]] = metadata !{metadata [[TYPE_INT:!.*]], metadata [[TYPE_INT]], i64 0}
 // PATH: [[TYPE_INT]] = metadata !{metadata !"int", metadata [[TYPE_CHAR]]
 // PATH: [[TAG_A_f32]] = metadata !{metadata [[TYPE_A:!.*]], metadata [[TYPE_INT]], i64 4}
diff --git a/test/CodeGen/tbaa-for-vptr.cpp b/test/CodeGen/tbaa-for-vptr.cpp
index 9369036..7ba058b 100644
--- a/test/CodeGen/tbaa-for-vptr.cpp
+++ b/test/CodeGen/tbaa-for-vptr.cpp
@@ -1,8 +1,8 @@
-// RUN: %clang_cc1 -emit-llvm -o - -O0 -fsanitize=thread %s | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -o - -fsanitize=thread %s | FileCheck %s
 // RUN: %clang_cc1 -emit-llvm -o - -O1 %s | FileCheck %s
 // RUN: %clang_cc1 -emit-llvm -o - -O1  -relaxed-aliasing -fsanitize=thread %s | FileCheck %s
 //
-// RUN: %clang_cc1 -emit-llvm -o - -O0 %s | FileCheck %s --check-prefix=NOTBAA
+// RUN: %clang_cc1 -emit-llvm -o - %s | FileCheck %s --check-prefix=NOTBAA
 // RUN: %clang_cc1 -emit-llvm -o - -O2  -relaxed-aliasing %s | FileCheck %s --check-prefix=NOTBAA
 //
 // Check that we generate TBAA for vtable pointer loads and stores.
@@ -23,5 +23,6 @@ void CallFoo(A *a) {
 
 // CHECK: %{{.*}} = load {{.*}} !tbaa ![[NUM:[0-9]+]]
 // CHECK: store {{.*}} !tbaa ![[NUM]]
-// CHECK: [[NUM]] = metadata !{metadata !"vtable pointer", metadata !{{.*}}}
+// CHECK: [[NUM]] = metadata !{metadata [[TYPE:!.*]], metadata [[TYPE]], i64 0}
+// CHECK: [[TYPE]] = metadata !{metadata !"vtable pointer", metadata !{{.*}}
 // NOTBAA-NOT: = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/tbaa-ms-abi.cpp b/test/CodeGen/tbaa-ms-abi.cpp
new file mode 100644
index 0000000..67390b1
--- /dev/null
+++ b/test/CodeGen/tbaa-ms-abi.cpp
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -cxx-abi microsoft -triple i686-pc-win32 -disable-llvm-optzns -emit-llvm -o - -O1 %s | FileCheck %s
+//
+// Test that TBAA works in the Microsoft C++ ABI.  We used to error out while
+// attempting to mangle RTTI.
+
+struct StructA {
+  int a;
+};
+
+struct StructB : virtual StructA {
+  StructB();
+};
+
+StructB::StructB() {
+  a = 42;
+// CHECK: store i32 42, i32* {{.*}}, !tbaa [[TAG_A_i32:!.*]]
+}
+
+// CHECK: [[TYPE_CHAR:!.*]] = metadata !{metadata !"omnipotent char", metadata
+// CHECK: [[TYPE_INT:!.*]] = metadata !{metadata !"int", metadata [[TYPE_CHAR]], i64 0}
+// CHECK: [[TAG_A_i32]] = metadata !{metadata [[TYPE_A:!.*]], metadata [[TYPE_INT]], i64 0}
+// CHECK: [[TYPE_A]] = metadata !{metadata !"?AUStructA@@", metadata [[TYPE_INT]], i64 0}
diff --git a/test/CodeGen/tbaa-struct.cpp b/test/CodeGen/tbaa-struct.cpp
index 6d593a3..f8bd124 100644
--- a/test/CodeGen/tbaa-struct.cpp
+++ b/test/CodeGen/tbaa-struct.cpp
@@ -65,10 +65,12 @@ void copy5(struct six *a, struct six *b) {
 
 // CHECK: [[TS]] = metadata !{i64 0, i64 2, metadata !{{.*}}, i64 4, i64 4, metadata !{{.*}}, i64 8, i64 1, metadata !{{.*}}, i64 12, i64 4, metadata !{{.*}}}
 // CHECK: [[CHAR:!.*]] = metadata !{metadata !"omnipotent char", metadata !{{.*}}}
-// CHECK: [[INT:!.*]] = metadata !{metadata !"int", metadata [[CHAR]]}
+// CHECK: [[TAG_INT:!.*]] = metadata !{metadata [[INT:!.*]], metadata [[INT]], i64 0}
+// CHECK: [[INT]] = metadata !{metadata !"int", metadata [[CHAR]]
+// CHECK: [[TAG_CHAR:!.*]] = metadata !{metadata [[CHAR]], metadata [[CHAR]], i64 0}
 // (offset, size) = (0,1) char; (4,2) short; (8,4) int; (12,1) char; (16,4) int; (20,4) int
 // CHECK: [[TS2]] = metadata !{i64 0, i64 1, metadata !{{.*}}, i64 4, i64 2, metadata !{{.*}}, i64 8, i64 4, metadata !{{.*}}, i64 12, i64 1, metadata !{{.*}}, i64 16, i64 4, metadata {{.*}}, i64 20, i64 4, metadata {{.*}}}
 // (offset, size) = (0,8) char; (0,2) char; (4,8) char
 // CHECK: [[TS3]] = metadata !{i64 0, i64 8, metadata !{{.*}}, i64 0, i64 2, metadata !{{.*}}, i64 4, i64 8, metadata !{{.*}}}
-// CHECK: [[TS4]] = metadata !{i64 0, i64 1, metadata [[CHAR]], i64 1, i64 1, metadata [[CHAR]], i64 2, i64 1, metadata [[CHAR]]}
-// CHECK: [[TS5]] = metadata !{i64 0, i64 1, metadata [[CHAR]], i64 4, i64 4, metadata [[INT]], i64 4, i64 1, metadata [[CHAR]], i64 5, i64 1, metadata [[CHAR]]}
+// CHECK: [[TS4]] = metadata !{i64 0, i64 1, metadata [[TAG_CHAR]], i64 1, i64 4, metadata [[TAG_INT]], i64 1, i64 1, metadata [[TAG_CHAR]], i64 2, i64 1, metadata [[TAG_CHAR]]}
+// CHECK: [[TS5]] = metadata !{i64 0, i64 1, metadata [[TAG_CHAR]], i64 4, i64 4, metadata [[TAG_INT]], i64 4, i64 1, metadata [[TAG_CHAR]], i64 5, i64 1, metadata [[TAG_CHAR]]}
diff --git a/test/CodeGen/tbaa-thread-sanitizer.cpp b/test/CodeGen/tbaa-thread-sanitizer.cpp
new file mode 100644
index 0000000..abffae3
--- /dev/null
+++ b/test/CodeGen/tbaa-thread-sanitizer.cpp
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm -o - %s -fsanitize=thread -relaxed-aliasing -O1 | FileCheck %s
+
+// Make sure we do not crash when relaxed-aliasing is on.
+// CHECK-NOT: !tbaa
+struct iterator { void *node; };
+
+struct pair {
+  iterator first;
+  pair(const iterator &a) : first(a) {}
+};
+
+void equal_range() {
+  (void)pair(iterator());
+}
diff --git a/test/CodeGen/tbaa.cpp b/test/CodeGen/tbaa.cpp
index afb8893..92d31e5 100644
--- a/test/CodeGen/tbaa.cpp
+++ b/test/CodeGen/tbaa.cpp
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin -O1 -disable-llvm-optzns %s -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 -triple x86_64-apple-darwin -O1 -struct-path-tbaa -disable-llvm-optzns %s -emit-llvm -o - | FileCheck %s -check-prefix=PATH
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -O1 -no-struct-path-tbaa -disable-llvm-optzns %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -O1 -disable-llvm-optzns %s -emit-llvm -o - | FileCheck %s -check-prefix=PATH
 // Test TBAA metadata generated by front-end.
 
 typedef unsigned char uint8_t;
@@ -46,8 +46,8 @@ typedef struct
 
 uint32_t g(uint32_t *s, StructA *A, uint64_t count) {
 // CHECK: define i32 @{{.*}}(
-// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa !4
-// CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa !4
+// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32:!.*]]
+// CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // PATH: define i32 @{{.*}}(
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32:!.*]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32:!.*]]
@@ -58,8 +58,8 @@ uint32_t g(uint32_t *s, StructA *A, uint64_t count) {
 
 uint32_t g2(uint32_t *s, StructA *A, uint64_t count) {
 // CHECK: define i32 @{{.*}}(
-// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa !4
-// CHECK: store i16 4, i16* %{{.*}}, align 2, !tbaa !5
+// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
+// CHECK: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_i16:!.*]]
 // PATH: define i32 @{{.*}}(
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // PATH: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_A_f16:!.*]]
@@ -70,8 +70,8 @@ uint32_t g2(uint32_t *s, StructA *A, uint64_t count) {
 
 uint32_t g3(StructA *A, StructB *B, uint64_t count) {
 // CHECK: define i32 @{{.*}}(
-// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa !4
-// CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa !4
+// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
+// CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // PATH: define i32 @{{.*}}(
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_B_a_f32:!.*]]
@@ -82,8 +82,8 @@ uint32_t g3(StructA *A, StructB *B, uint64_t count) {
 
 uint32_t g4(StructA *A, StructB *B, uint64_t count) {
 // CHECK: define i32 @{{.*}}(
-// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa !4
-// CHECK: store i16 4, i16* %{{.*}}, align 2, !tbaa !5
+// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
+// CHECK: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_i16]]
 // PATH: define i32 @{{.*}}(
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
 // PATH: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_B_a_f16:!.*]]
@@ -94,8 +94,8 @@ uint32_t g4(StructA *A, StructB *B, uint64_t count) {
 
 uint32_t g5(StructA *A, StructB *B, uint64_t count) {
 // CHECK: define i32 @{{.*}}(
-// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa !4
-// CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa !4
+// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
+// CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // PATH: define i32 @{{.*}}(
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_B_f32:!.*]]
@@ -106,8 +106,8 @@ uint32_t g5(StructA *A, StructB *B, uint64_t count) {
 
 uint32_t g6(StructA *A, StructB *B, uint64_t count) {
 // CHECK: define i32 @{{.*}}(
-// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa !4
-// CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa !4
+// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
+// CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // PATH: define i32 @{{.*}}(
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_B_a_f32_2:!.*]]
@@ -118,8 +118,8 @@ uint32_t g6(StructA *A, StructB *B, uint64_t count) {
 
 uint32_t g7(StructA *A, StructS *S, uint64_t count) {
 // CHECK: define i32 @{{.*}}(
-// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa !4
-// CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa !4
+// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
+// CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // PATH: define i32 @{{.*}}(
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_S_f32:!.*]]
@@ -130,8 +130,8 @@ uint32_t g7(StructA *A, StructS *S, uint64_t count) {
 
 uint32_t g8(StructA *A, StructS *S, uint64_t count) {
 // CHECK: define i32 @{{.*}}(
-// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa !4
-// CHECK: store i16 4, i16* %{{.*}}, align 2, !tbaa !5
+// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
+// CHECK: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_i16]]
 // PATH: define i32 @{{.*}}(
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
 // PATH: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_S_f16:!.*]]
@@ -142,8 +142,8 @@ uint32_t g8(StructA *A, StructS *S, uint64_t count) {
 
 uint32_t g9(StructS *S, StructS2 *S2, uint64_t count) {
 // CHECK: define i32 @{{.*}}(
-// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa !4
-// CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa !4
+// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
+// CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // PATH: define i32 @{{.*}}(
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_S_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_S2_f32:!.*]]
@@ -154,8 +154,8 @@ uint32_t g9(StructS *S, StructS2 *S2, uint64_t count) {
 
 uint32_t g10(StructS *S, StructS2 *S2, uint64_t count) {
 // CHECK: define i32 @{{.*}}(
-// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa !4
-// CHECK: store i16 4, i16* %{{.*}}, align 2, !tbaa !5
+// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
+// CHECK: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_i16]]
 // PATH: define i32 @{{.*}}(
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_S_f32]]
 // PATH: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_S2_f16:!.*]]
@@ -166,8 +166,8 @@ uint32_t g10(StructS *S, StructS2 *S2, uint64_t count) {
 
 uint32_t g11(StructC *C, StructD *D, uint64_t count) {
 // CHECK: define i32 @{{.*}}(
-// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa !4
-// CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa !4
+// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
+// CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // PATH: define i32 @{{.*}}(
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_C_b_a_f32:!.*]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_D_b_a_f32:!.*]]
@@ -178,8 +178,8 @@ uint32_t g11(StructC *C, StructD *D, uint64_t count) {
 
 uint32_t g12(StructC *C, StructD *D, uint64_t count) {
 // CHECK: define i32 @{{.*}}(
-// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa !4
-// CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa !4
+// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
+// CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // TODO: differentiate the two accesses.
 // PATH: define i32 @{{.*}}(
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_B_a_f32]]
@@ -203,7 +203,7 @@ struct five {
 char g13(struct five *a, struct five *b) {
   return a->b;
 // CHECK: define signext i8 @{{.*}}(
-// CHECK: load i8* %{{.*}}, align 1, !tbaa !1
+// CHECK: load i8* %{{.*}}, align 1, !tbaa [[TAG_char:!.*]]
 // PATH: define signext i8 @{{.*}}(
 // PATH: load i8* %{{.*}}, align 1, !tbaa [[TAG_five_b:!.*]]
 }
@@ -216,18 +216,35 @@ struct six {
 };
 char g14(struct six *a, struct six *b) {
 // CHECK: define signext i8 @{{.*}}(
-// CHECK: load i8* %{{.*}}, align 1, !tbaa !1
+// CHECK: load i8* %{{.*}}, align 1, !tbaa [[TAG_char]]
 // PATH: define signext i8 @{{.*}}(
 // PATH: load i8* %{{.*}}, align 1, !tbaa [[TAG_six_b:!.*]]
   return a->b;
 }
 
-// CHECK: !1 = metadata !{metadata !"omnipotent char", metadata !2}
-// CHECK: !2 = metadata !{metadata !"Simple C/C++ TBAA"}
-// CHECK: !4 = metadata !{metadata !"int", metadata !1}
-// CHECK: !5 = metadata !{metadata !"short", metadata !1}
+// Types that differ only by name may alias.
+typedef StructS StructS3;
+uint32_t g15(StructS *S, StructS3 *S3, uint64_t count) {
+// CHECK: define i32 @{{.*}}(
+// CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
+// CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
+// PATH: define i32 @{{.*}}(
+// PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_S_f32]]
+// PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_S_f32]]
+  S->f32 = 1;
+  S3->f32 = 4;
+  return S->f32;
+}
+
+// CHECK: [[TYPE_char:!.*]] = metadata !{metadata !"omnipotent char", metadata [[TAG_cxx_tbaa:!.*]],
+// CHECK: [[TAG_cxx_tbaa]] = metadata !{metadata !"Simple C/C++ TBAA"}
+// CHECK: [[TAG_i32]] = metadata !{metadata [[TYPE_i32:!.*]], metadata [[TYPE_i32]], i64 0}
+// CHECK: [[TYPE_i32]] = metadata !{metadata !"int", metadata [[TYPE_char]],
+// CHECK: [[TAG_i16]] = metadata !{metadata [[TYPE_i16:!.*]], metadata [[TYPE_i16]], i64 0}
+// CHECK: [[TYPE_i16]] = metadata !{metadata !"short", metadata [[TYPE_char]],
+// CHECK: [[TAG_char]] = metadata !{metadata [[TYPE_char]], metadata [[TYPE_char]], i64 0}
 
-// PATH: [[TYPE_CHAR:!.*]] = metadata !{metadata !"omnipotent char", metadata !3
+// PATH: [[TYPE_CHAR:!.*]] = metadata !{metadata !"omnipotent char", metadata
 // PATH: [[TAG_i32]] = metadata !{metadata [[TYPE_INT:!.*]], metadata [[TYPE_INT]], i64 0}
 // PATH: [[TYPE_INT]] = metadata !{metadata !"int", metadata [[TYPE_CHAR]]
 // PATH: [[TAG_A_f32]] = metadata !{metadata [[TYPE_A:!.*]], metadata [[TYPE_INT]], i64 4}
@@ -250,6 +267,6 @@ char g14(struct six *a, struct six *b) {
 // PATH: [[TAG_D_b_a_f32]] = metadata !{metadata [[TYPE_D:!.*]], metadata [[TYPE_INT]], i64 12}
 // PATH: [[TYPE_D]] = metadata !{metadata !"_ZTS7StructD", metadata [[TYPE_SHORT]], i64 0, metadata [[TYPE_B]], i64 4, metadata [[TYPE_INT]], i64 28, metadata [[TYPE_CHAR]], i64 32}
 // PATH: [[TAG_five_b]] = metadata !{metadata [[TYPE_five:!.*]], metadata [[TYPE_CHAR]], i64 1}
-// PATH: [[TYPE_five]] = metadata !{metadata !"_ZTS4five", metadata [[TYPE_CHAR]], i64 0, metadata [[TYPE_CHAR]], i64 1, metadata [[TYPE_CHAR]], i64 2}
+// PATH: [[TYPE_five]] = metadata !{metadata !"_ZTS4five", metadata [[TYPE_CHAR]], i64 0, metadata [[TYPE_INT]], i64 1, metadata [[TYPE_CHAR]], i64 1, metadata [[TYPE_CHAR]], i64 2}
 // PATH: [[TAG_six_b]] = metadata !{metadata [[TYPE_six:!.*]], metadata [[TYPE_CHAR]], i64 4}
 // PATH: [[TYPE_six]] = metadata !{metadata !"_ZTS3six", metadata [[TYPE_CHAR]], i64 0, metadata [[TYPE_INT]], i64 4, metadata [[TYPE_CHAR]], i64 4, metadata [[TYPE_CHAR]], i64 5}
diff --git a/test/CodeGen/tbm-builtins.c b/test/CodeGen/tbm-builtins.c
new file mode 100644
index 0000000..e3a7021
--- /dev/null
+++ b/test/CodeGen/tbm-builtins.c
@@ -0,0 +1,137 @@
+// RUN: %clang_cc1 %s -O3 -triple=x86_64-unknown-unknown -target-feature +tbm -emit-llvm -o - | FileCheck %s
+
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
+
+#include <x86intrin.h>
+
+unsigned int test__bextri_u32(unsigned int a) {
+  // CHECK: call i32 @llvm.x86.tbm.bextri.u32
+  return __bextri_u32(a, 1);
+}
+
+unsigned long long test__bextri_u64(unsigned long long a) {
+  // CHECK: call i64 @llvm.x86.tbm.bextri.u64
+  return __bextri_u64(a, 2);
+}
+
+unsigned long long test__bextri_u64_bigint(unsigned long long a) {
+  // CHECK: call i64 @llvm.x86.tbm.bextri.u64
+  return __bextri_u64(a, 0x7fffffffffLL);
+}
+
+unsigned int test__blcfill_u32(unsigned int a) {
+  // CHECK: [[TMP:%.*]] = add i32 [[SRC:%.*]], 1
+  // CHECK-NEXT: %{{.*}} = and i32 [[TMP]], [[SRC]]
+  return __blcfill_u32(a);
+}
+
+unsigned long long test__blcfill_u64(unsigned long long a) {
+  // CHECK: [[TMPT:%.*]] = add i64 [[SRC:%.*]], 1
+  // CHECK-NEXT: %{{.*}} = and i64 [[TMP]], [[SRC]]
+  return __blcfill_u64(a);
+}
+
+unsigned int test__blci_u32(unsigned int a) {
+  // CHECK: [[TMP:%.*]] = sub i32 -2, [[SRC:%.*]]
+  // CHECK-NEXT: %{{.*}} = or i32 [[TMP]], [[SRC]]
+  return __blci_u32(a);
+}
+
+unsigned long long test__blci_u64(unsigned long long a) {
+  // CHECK: [[TMP:%.*]] = sub i64 -2, [[SRC:%.*]]
+  // CHECK-NEXT: %{{.*}} = or i64 [[TMP]], [[SRC]]
+  return __blci_u64(a);
+}
+
+unsigned int test__blcic_u32(unsigned int a) {
+  // CHECK: [[TMP1:%.*]] = xor i32 [[SRC:%.*]], -1
+  // CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SRC]], 1
+  // CHECK-NEXT: {{.*}} = and i32 [[TMP2]], [[TMP1]]
+  return __blcic_u32(a);
+}
+
+unsigned long long test__blcic_u64(unsigned long long a) {
+  // CHECK: [[TMP1:%.*]] = xor i64 [[SRC:%.*]], -1
+  // CHECK-NEXT: [[TMP2:%.*]] = add i64 [[SRC]], 1
+  // CHECK-NEXT: {{.*}} = and i64 [[TMP2]], [[TMP1]]
+  return __blcic_u64(a);
+}
+
+unsigned int test__blcmsk_u32(unsigned int a) {
+  // CHECK: [[TMP:%.*]] = add i32 [[SRC:%.*]], 1
+  // CHECK-NEXT: {{.*}} = xor i32 [[TMP]], [[SRC]]
+  return __blcmsk_u32(a);
+}
+
+unsigned long long test__blcmsk_u64(unsigned long long a) {
+  // CHECK: [[TMP:%.*]] = add i64 [[SRC:%.*]], 1
+  // CHECK-NEXT: {{.*}} = xor i64 [[TMP]], [[SRC]]
+  return __blcmsk_u64(a);
+}
+
+unsigned int test__blcs_u32(unsigned int a) {
+  // CHECK: [[TMP:%.*]] = add i32 [[SRC:%.*]], 1
+  // CHECK-NEXT: {{.*}} = or i32 [[TMP]], [[SRC]]
+  return __blcs_u32(a);
+}
+
+unsigned long long test__blcs_u64(unsigned long long a) {
+  // CHECK: [[TMP:%.*]] = add i64 [[SRC:%.*]], 1
+  // CHECK-NEXT: {{.*}} = or i64 [[TMP]], [[SRC]]
+  return __blcs_u64(a);
+}
+
+unsigned int test__blsfill_u32(unsigned int a) {
+  // CHECK: [[TMP:%.*]] = add i32 [[SRC:%.*]], -1
+  // CHECK-NEXT: {{.*}} = or i32 [[TMP]], [[SRC]]
+  return __blsfill_u32(a);
+}
+
+unsigned long long test__blsfill_u64(unsigned long long a) {
+  // CHECK: [[TMP:%.*]] = add i64 [[SRC:%.*]], -1
+  // CHECK-NEXT: {{.*}} = or i64 [[TMP]], [[SRC]]
+  return __blsfill_u64(a);
+}
+
+unsigned int test__blsic_u32(unsigned int a) {
+  // CHECK: [[TMP1:%.*]] = xor i32 [[SRC:%.*]], -1
+  // CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SRC:%.*]], -1
+  // CHECK-NEXT: {{.*}} = or i32 [[TMP2]], [[TMP1]]
+  return __blsic_u32(a);
+}
+
+unsigned long long test__blsic_u64(unsigned long long a) {
+  // CHECK: [[TMP1:%.*]] = xor i64 [[SRC:%.*]], -1
+  // CHECK-NEXT: [[TMP2:%.*]] = add i64 [[SRC:%.*]], -1
+  // CHECK-NEXT: {{.*}} = or i64 [[TMP2]], [[TMP1]]
+  return __blsic_u64(a);
+}
+
+unsigned int test__t1mskc_u32(unsigned int a) {
+  // CHECK: [[TMP1:%.*]] = xor i32 [[SRC:%.*]], -1
+  // CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SRC:%.*]], 1
+  // CHECK-NEXT: {{.*}} = or i32 [[TMP2]], [[TMP1]]
+  return __t1mskc_u32(a);
+}
+
+unsigned long long test__t1mskc_u64(unsigned long long a) {
+  // CHECK: [[TMP1:%.*]] = xor i64 [[SRC:%.*]], -1
+  // CHECK-NEXT: [[TMP2:%.*]] = add i64 [[SRC:%.*]], 1
+  // CHECK-NEXT: {{.*}} = or i64 [[TMP2]], [[TMP1]]
+  return __t1mskc_u64(a);
+}
+
+unsigned int test__tzmsk_u32(unsigned int a) {
+  // CHECK: [[TMP1:%.*]] = xor i32 [[SRC:%.*]], -1
+  // CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SRC:%.*]], -1
+  // CHECK-NEXT: {{.*}} = and i32 [[TMP2]], [[TMP1]]
+  return __tzmsk_u32(a);
+}
+
+unsigned long long test__tzmsk_u64(unsigned long long a) {
+  // CHECK: [[TMP1:%.*]] = xor i64 [[SRC:%.*]], -1
+  // CHECK-NEXT: [[TMP2:%.*]] = add i64 [[SRC:%.*]], -1
+  // CHECK-NEXT: {{.*}} = and i64 [[TMP2]], [[TMP1]]
+  return __tzmsk_u64(a);
+}
diff --git a/test/CodeGen/transparent-union.c b/test/CodeGen/transparent-union.c
index afdb3d6..21040e4 100644
--- a/test/CodeGen/transparent-union.c
+++ b/test/CodeGen/transparent-union.c
@@ -10,7 +10,7 @@ typedef union {
 
 void f0(transp_t0 obj);
 
-// CHECK: define void @f1_0(i32* %a0) 
+// CHECK-LABEL: define void @f1_0(i32* %a0) 
 // CHECK:  call void @f0(%union.transp_t0* byval align 4 %{{.*}})
 // CHECK:  call void %{{.*}}(i8* %{{[a-z0-9]*}})
 // CHECK: }
diff --git a/test/CodeGen/trapv.c b/test/CodeGen/trapv.c
index bc8bc70..5103410 100644
--- a/test/CodeGen/trapv.c
+++ b/test/CodeGen/trapv.c
@@ -3,7 +3,7 @@
 unsigned int ui, uj, uk;
 int i, j, k;
 
-// CHECK: define void @test0()
+// CHECK-LABEL: define void @test0()
 void test0() {
   // -ftrapv doesn't affect unsigned arithmetic.
   // CHECK:      [[T1:%.*]] = load i32* @uj
@@ -23,7 +23,7 @@ void test0() {
   i = j + k;
 }
 
-// CHECK: define void @test1()
+// CHECK-LABEL: define void @test1()
 void test1() {
   extern void opaque(int);
   opaque(i++);
@@ -37,7 +37,7 @@ void test1() {
   // CHECK:      call void @llvm.trap()
 }
 
-// CHECK: define void @test2()
+// CHECK-LABEL: define void @test2()
 void test2() {
   extern void opaque(int);
   opaque(++i);
@@ -51,7 +51,7 @@ void test2() {
   // CHECK:      call void @llvm.trap()
 }
 
-// CHECK: define void @test3(
+// CHECK-LABEL: define void @test3(
 void test3(int a, int b, float c, float d) {
   // CHECK-NOT:  @llvm.trap
   (void)(a / b);
diff --git a/test/CodeGen/unsigned-overflow.c b/test/CodeGen/unsigned-overflow.c
index 341ea35..01ed0bf 100644
--- a/test/CodeGen/unsigned-overflow.c
+++ b/test/CodeGen/unsigned-overflow.c
@@ -8,7 +8,7 @@ unsigned int ii, ij, ik;
 extern void opaquelong(unsigned long);
 extern void opaqueint(unsigned int);
 
-// CHECK: define void @testlongadd()
+// CHECK-LABEL: define void @testlongadd()
 void testlongadd() {
 
   // CHECK:      [[T1:%.*]] = load i64* @lj
@@ -20,7 +20,7 @@ void testlongadd() {
   li = lj + lk;
 }
 
-// CHECK: define void @testlongsub()
+// CHECK-LABEL: define void @testlongsub()
 void testlongsub() {
 
   // CHECK:      [[T1:%.*]] = load i64* @lj
@@ -32,7 +32,7 @@ void testlongsub() {
   li = lj - lk;
 }
 
-// CHECK: define void @testlongmul()
+// CHECK-LABEL: define void @testlongmul()
 void testlongmul() {
 
   // CHECK:      [[T1:%.*]] = load i64* @lj
@@ -44,7 +44,7 @@ void testlongmul() {
   li = lj * lk;
 }
 
-// CHECK: define void @testlongpostinc()
+// CHECK-LABEL: define void @testlongpostinc()
 void testlongpostinc() {
   opaquelong(li++);
 
@@ -55,7 +55,7 @@ void testlongpostinc() {
   // CHECK:      call void @__ubsan_handle_add_overflow
 }
 
-// CHECK: define void @testlongpreinc()
+// CHECK-LABEL: define void @testlongpreinc()
 void testlongpreinc() {
   opaquelong(++li);
 
@@ -66,7 +66,7 @@ void testlongpreinc() {
   // CHECK:      call void @__ubsan_handle_add_overflow
 }
 
-// CHECK: define void @testintadd()
+// CHECK-LABEL: define void @testintadd()
 void testintadd() {
 
   // CHECK:      [[T1:%.*]] = load i32* @ij
@@ -78,7 +78,7 @@ void testintadd() {
   ii = ij + ik;
 }
 
-// CHECK: define void @testintsub()
+// CHECK-LABEL: define void @testintsub()
 void testintsub() {
 
   // CHECK:      [[T1:%.*]] = load i32* @ij
@@ -90,7 +90,7 @@ void testintsub() {
   ii = ij - ik;
 }
 
-// CHECK: define void @testintmul()
+// CHECK-LABEL: define void @testintmul()
 void testintmul() {
 
   // CHECK:      [[T1:%.*]] = load i32* @ij
@@ -102,7 +102,7 @@ void testintmul() {
   ii = ij * ik;
 }
 
-// CHECK: define void @testintpostinc()
+// CHECK-LABEL: define void @testintpostinc()
 void testintpostinc() {
   opaqueint(ii++);
 
@@ -113,7 +113,7 @@ void testintpostinc() {
   // CHECK:      call void @__ubsan_handle_add_overflow
 }
 
-// CHECK: define void @testintpreinc()
+// CHECK-LABEL: define void @testintpreinc()
 void testintpreinc() {
   opaqueint(++ii);
 
diff --git a/test/CodeGen/unsigned-promotion.c b/test/CodeGen/unsigned-promotion.c
index c263c0c..2c34152 100644
--- a/test/CodeGen/unsigned-promotion.c
+++ b/test/CodeGen/unsigned-promotion.c
@@ -12,8 +12,8 @@ unsigned char ci, cj, ck;
 extern void opaqueshort(unsigned short);
 extern void opaquechar(unsigned char);
 
-// CHECKS:   define void @testshortadd()
-// CHECKU: define void @testshortadd()
+// CHECKS-LABEL:   define void @testshortadd()
+// CHECKU-LABEL: define void @testshortadd()
 void testshortadd() {
   // CHECKS:        load i16* @sj
   // CHECKS:        load i16* @sk
@@ -33,8 +33,8 @@ void testshortadd() {
   si = sj + sk;
 }
 
-// CHECKS:   define void @testshortsub()
-// CHECKU: define void @testshortsub()
+// CHECKS-LABEL:   define void @testshortsub()
+// CHECKU-LABEL: define void @testshortsub()
 void testshortsub() {
 
   // CHECKS:        load i16* @sj
@@ -55,8 +55,8 @@ void testshortsub() {
   si = sj - sk;
 }
 
-// CHECKS:   define void @testshortmul()
-// CHECKU: define void @testshortmul()
+// CHECKS-LABEL:   define void @testshortmul()
+// CHECKU-LABEL: define void @testshortmul()
 void testshortmul() {
 
   // CHECKS:        load i16* @sj
@@ -76,8 +76,8 @@ void testshortmul() {
   si = sj * sk;
 }
 
-// CHECKS:   define void @testcharadd()
-// CHECKU: define void @testcharadd()
+// CHECKS-LABEL:   define void @testcharadd()
+// CHECKU-LABEL: define void @testcharadd()
 void testcharadd() {
 
   // CHECKS:        load i8* @cj
@@ -98,8 +98,8 @@ void testcharadd() {
   ci = cj + ck;
 }
 
-// CHECKS:   define void @testcharsub()
-// CHECKU: define void @testcharsub()
+// CHECKS-LABEL:   define void @testcharsub()
+// CHECKU-LABEL: define void @testcharsub()
 void testcharsub() {
 
   // CHECKS:        load i8* @cj
@@ -120,8 +120,8 @@ void testcharsub() {
   ci = cj - ck;
 }
 
-// CHECKS:   define void @testcharmul()
-// CHECKU: define void @testcharmul()
+// CHECKS-LABEL:   define void @testcharmul()
+// CHECKU-LABEL: define void @testcharmul()
 void testcharmul() {
 
   // CHECKS:        load i8* @cj
diff --git a/test/CodeGen/unwind-attr.c b/test/CodeGen/unwind-attr.c
index e505a6e..5272375 100644
--- a/test/CodeGen/unwind-attr.c
+++ b/test/CodeGen/unwind-attr.c
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -triple i386-unknown-unknown -fexceptions -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm -o - %s | FileCheck -check-prefix NOEXC %s
+// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm -o - %s | FileCheck -check-prefix CHECK-NOEXC %s
 
 int opaque();
 
diff --git a/test/CodeGen/vector.c b/test/CodeGen/vector.c
index 3fa5f14..6c14b7f 100644
--- a/test/CodeGen/vector.c
+++ b/test/CodeGen/vector.c
@@ -55,3 +55,10 @@ unsigned long test_epi16(__m128i x) { return _mm_extract_epi16(x, 3); }
 // CHECK: @test_epi16
 // CHECK: extractelement <8 x i16> {{.*}}, i32 3
 // CHECK: zext i16 {{.*}} to i32
+
+void extractinttypes() {
+  extern int check_extract_result_int;
+  extern __typeof(_mm_extract_epi8(_mm_setzero_si128(), 3)) check_result_int;
+  extern __typeof(_mm_extract_epi16(_mm_setzero_si128(), 3)) check_result_int;
+  extern __typeof(_mm_extract_epi32(_mm_setzero_si128(), 3)) check_result_int;
+}
diff --git a/test/CodeGen/visibility.c b/test/CodeGen/visibility.c
index 3082b7b..8e153b8 100644
--- a/test/CodeGen/visibility.c
+++ b/test/CodeGen/visibility.c
@@ -23,15 +23,15 @@ static char g_deferred[] = "hello";
 // CHECK-PROTECTED: @test4 = hidden global i32 10
 // CHECK-HIDDEN: @test4 = hidden global i32 10
 
-// CHECK-DEFAULT: define i32 @f_def()
+// CHECK-DEFAULT-LABEL: define i32 @f_def()
 // CHECK-DEFAULT: declare void @f_ext()
-// CHECK-DEFAULT: define internal void @f_deferred()
-// CHECK-PROTECTED: define protected i32 @f_def()
+// CHECK-DEFAULT-LABEL: define internal void @f_deferred()
+// CHECK-PROTECTED-LABEL: define protected i32 @f_def()
 // CHECK-PROTECTED: declare void @f_ext()
-// CHECK-PROTECTED: define internal void @f_deferred()
-// CHECK-HIDDEN: define hidden i32 @f_def()
+// CHECK-PROTECTED-LABEL: define internal void @f_deferred()
+// CHECK-HIDDEN-LABEL: define hidden i32 @f_def()
 // CHECK-HIDDEN: declare void @f_ext()
-// CHECK-HIDDEN: define internal void @f_deferred()
+// CHECK-HIDDEN-LABEL: define internal void @f_deferred()
 
 extern void f_ext(void);
 
@@ -45,22 +45,22 @@ int f_def(void) {
 }
 
 // PR8457
-// CHECK-DEFAULT: define void @test1(
-// CHECK-PROTECTED: define void @test1(
-// CHECK-HIDDEN: define void @test1(
+// CHECK-DEFAULT-LABEL: define void @test1(
+// CHECK-PROTECTED-LABEL: define void @test1(
+// CHECK-HIDDEN-LABEL: define void @test1(
 struct Test1 { int field; };
 void  __attribute__((visibility("default"))) test1(struct Test1 *v) { }
 
 // rdar://problem/8595231
-// CHECK-DEFAULT: define void @test2()
-// CHECK-PROTECTED: define void @test2()
-// CHECK-HIDDEN: define void @test2()
+// CHECK-DEFAULT-LABEL: define void @test2()
+// CHECK-PROTECTED-LABEL: define void @test2()
+// CHECK-HIDDEN-LABEL: define void @test2()
 void test2(void);
 void __attribute__((visibility("default"))) test2(void) {}
 
-// CHECK-DEFAULT: define hidden void @test3()
-// CHECK-PROTECTED: define hidden void @test3()
-// CHECK-HIDDEN: define hidden void @test3()
+// CHECK-DEFAULT-LABEL: define hidden void @test3()
+// CHECK-PROTECTED-LABEL: define hidden void @test3()
+// CHECK-HIDDEN-LABEL: define hidden void @test3()
 extern void test3(void);
 __private_extern__ void test3(void) {}
 
@@ -69,8 +69,8 @@ extern int test4;
 __private_extern__ int test4 = 10;
 
 // rdar://12399248
-// CHECK-DEFAULT: define hidden void @test5()
-// CHECK-PROTECTED: define hidden void @test5()
-// CHECK-HIDDEN: define hidden void @test5()
+// CHECK-DEFAULT-LABEL: define hidden void @test5()
+// CHECK-PROTECTED-LABEL: define hidden void @test5()
+// CHECK-HIDDEN-LABEL: define hidden void @test5()
 __attribute__((availability(macosx,introduced=10.5,deprecated=10.6)))
 __private_extern__ void test5(void) {}
diff --git a/test/CodeGen/vla.c b/test/CodeGen/vla.c
index f63796b..1757ef7 100644
--- a/test/CodeGen/vla.c
+++ b/test/CodeGen/vla.c
@@ -37,7 +37,7 @@ void g(int count) {
 }
 
 // rdar://8403108
-// CHECK: define void @f_8403108
+// CHECK-LABEL: define void @f_8403108
 void f_8403108(unsigned x) {
   // CHECK: call i8* @llvm.stacksave()
   char s1[x];
@@ -86,7 +86,7 @@ int test2(int n)
 }
 
 // http://llvm.org/PR8567
-// CHECK: define double @test_PR8567
+// CHECK-LABEL: define double @test_PR8567
 double test_PR8567(int n, double (*p)[n][5]) {
   // CHECK:      [[NV:%.*]] = alloca i32, align 4
   // CHECK-NEXT: [[PV:%.*]] = alloca [5 x double]*, align 4
@@ -104,7 +104,7 @@ double test_PR8567(int n, double (*p)[n][5]) {
 }
 
 int test4(unsigned n, char (*p)[n][n+1][6]) {
-  // CHECK:    define i32 @test4(
+  // CHECK-LABEL:    define i32 @test4(
   // CHECK:      [[N:%.*]] = alloca i32, align 4
   // CHECK-NEXT: [[P:%.*]] = alloca [6 x i8]*, align 4
   // CHECK-NEXT: [[P2:%.*]] = alloca [6 x i8]*, align 4
@@ -146,7 +146,7 @@ int test4(unsigned n, char (*p)[n][n+1][6]) {
 // rdar://11485774
 void test5(void)
 {
-  // CHECK: define void @test5(
+  // CHECK-LABEL: define void @test5(
   int a[5], i = 0;
   // CHECK: [[A:%.*]] = alloca [5 x i32], align 4
   // CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
@@ -169,7 +169,7 @@ void test5(void)
 
 void test6(void)
 {
-  // CHECK: define void @test6(
+  // CHECK-LABEL: define void @test6(
   int n = 20, **a, i=0;
   // CHECK: [[N:%.*]] = alloca i32, align 4
   // CHECK-NEXT: [[A:%.*]] = alloca i32**, align 4
@@ -192,6 +192,6 @@ void test6(void)
 
 // Follow gcc's behavior for VLAs in parameter lists.  PR9559.
 void test7(int a[b(0)]) {
-  // CHECK: define void @test7(
+  // CHECK-LABEL: define void @test7(
   // CHECK: call i32 @b(i8* null)
 }
diff --git a/test/CodeGen/vld_dup.c b/test/CodeGen/vld_dup.c
index 2bc2519..9590412 100644
--- a/test/CodeGen/vld_dup.c
+++ b/test/CodeGen/vld_dup.c
@@ -1,7 +1,7 @@
 // REQUIRES: arm-registered-target
 // RUN: %clang_cc1 -triple armv7a-linux-gnueabi \
 // RUN:   -target-cpu cortex-a8 \
-// RUN:   -emit-llvm -O0 -o - %s | FileCheck %s
+// RUN:   -emit-llvm -o - %s | FileCheck %s
 #include <arm_neon.h>
 int main(){
     int32_t v0[3];
diff --git a/test/CodeGen/volatile-1.c b/test/CodeGen/volatile-1.c
index 6551159..d1861d5 100644
--- a/test/CodeGen/volatile-1.c
+++ b/test/CodeGen/volatile-1.c
@@ -22,49 +22,49 @@ int printf(const char *, ...);
 // that do implicit lvalue-to-rvalue conversion are substantially
 // reduced.
 
-// CHECK: define void @test()
+// CHECK-LABEL: define void @test()
 void test() {
   // CHECK: load volatile [[INT]]* @i
   i;
-  // CHECK-NEXT: load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 0)
-  // CHECK-NEXT: load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 1)
+  // CHECK-NEXT: load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 0), align 4
+  // CHECK-NEXT: load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 1), align 4
   // CHECK-NEXT: sitofp [[INT]]
   (float)(ci);
-  // CHECK-NEXT: load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 0)
-  // CHECK-NEXT: load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 1)
+  // CHECK-NEXT: load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 0), align 4
+  // CHECK-NEXT: load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 1), align 4
   (void)ci;
   // CHECK-NEXT: bitcast
   // CHECK-NEXT: memcpy
   (void)a;
-  // CHECK-NEXT: [[R:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 0)
-  // CHECK-NEXT: [[I:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 1)
-  // CHECK-NEXT: store volatile [[INT]] [[R]], [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 0)
-  // CHECK-NEXT: store volatile [[INT]] [[I]], [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 1)
+  // CHECK-NEXT: [[R:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 0), align 4
+  // CHECK-NEXT: [[I:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 1), align 4
+  // CHECK-NEXT: store volatile [[INT]] [[R]], [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 0), align 4
+  // CHECK-NEXT: store volatile [[INT]] [[I]], [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 1), align 4
   (void)(ci=ci);
   // CHECK-NEXT: [[T:%.*]] = load volatile [[INT]]* @j
   // CHECK-NEXT: store volatile [[INT]] [[T]], [[INT]]* @i
   (void)(i=j);
-  // CHECK-NEXT: [[R1:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 0)
-  // CHECK-NEXT: [[I1:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 1)
-  // CHECK-NEXT: [[R2:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 0)
-  // CHECK-NEXT: [[I2:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 1)
+  // CHECK-NEXT: [[R1:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 0), align 4
+  // CHECK-NEXT: [[I1:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 1), align 4
+  // CHECK-NEXT: [[R2:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 0), align 4
+  // CHECK-NEXT: [[I2:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 1), align 4
   // Not sure why they're ordered this way.
   // CHECK-NEXT: [[R:%.*]] = add [[INT]] [[R2]], [[R1]]
   // CHECK-NEXT: [[I:%.*]] = add [[INT]] [[I2]], [[I1]]
-  // CHECK-NEXT: store volatile [[INT]] [[R]], [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 0)
-  // CHECK-NEXT: store volatile [[INT]] [[I]], [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 1)
+  // CHECK-NEXT: store volatile [[INT]] [[R]], [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 0), align 4
+  // CHECK-NEXT: store volatile [[INT]] [[I]], [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 1), align 4
   ci+=ci;
 
-  // CHECK-NEXT: [[R1:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 0)
-  // CHECK-NEXT: [[I1:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 1)
-  // CHECK-NEXT: [[R2:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 0)
-  // CHECK-NEXT: [[I2:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 1)
+  // CHECK-NEXT: [[R1:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 0), align 4
+  // CHECK-NEXT: [[I1:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 1), align 4
+  // CHECK-NEXT: [[R2:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 0), align 4
+  // CHECK-NEXT: [[I2:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 1), align 4
   // CHECK-NEXT: [[R:%.*]] = add [[INT]] [[R2]], [[R1]]
   // CHECK-NEXT: [[I:%.*]] = add [[INT]] [[I2]], [[I1]]
-  // CHECK-NEXT: store volatile [[INT]] [[R]], [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 0)
-  // CHECK-NEXT: store volatile [[INT]] [[I]], [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 1)
-  // CHECK-NEXT: [[R2:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 0)
-  // CHECK-NEXT: [[I2:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 1)
+  // CHECK-NEXT: store volatile [[INT]] [[R]], [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 0), align 4
+  // CHECK-NEXT: store volatile [[INT]] [[I]], [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 1), align 4
+  // CHECK-NEXT: [[R2:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 0), align 4
+  // CHECK-NEXT: [[I2:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 1), align 4
   // These additions can be elided
   // CHECK-NEXT: add [[INT]] [[R]], [[R2]]
   // CHECK-NEXT: add [[INT]] [[I]], [[I2]]
@@ -303,7 +303,7 @@ void test() {
 }
 
 extern volatile enum X x;
-// CHECK: define void @test1()
+// CHECK-LABEL: define void @test1()
 void test1() {
   extern void test1_helper(void);
   test1_helper();
@@ -313,3 +313,15 @@ void test1() {
   (void) x;
   return x;
 }
+
+// CHECK: define {{.*}} @test2()
+int test2() {
+  // CHECK: load volatile i32*
+  // CHECK-NEXT: load volatile i32*
+  // CHECK-NEXT: load volatile i32*
+  // CHECK-NEXT: add i32
+  // CHECK-NEXT: add i32
+  // CHECK-NEXT: store volatile i32
+  // CHECK-NEXT: ret i32
+  return i += ci;
+}
diff --git a/test/CodeGen/volatile-2.c b/test/CodeGen/volatile-2.c
index 3d342de..18d0d31 100644
--- a/test/CodeGen/volatile-2.c
+++ b/test/CodeGen/volatile-2.c
@@ -1,10 +1,10 @@
 // RUN: %clang_cc1 -triple x86_64-apple-darwin10 -emit-llvm -o - %s | FileCheck %s
 
 void test0() {
-  // CHECK: define void @test0()
+  // CHECK-LABEL: define void @test0()
   // CHECK:      [[F:%.*]] = alloca float
-  // CHECK-NEXT: [[REAL:%.*]] = load volatile float* getelementptr inbounds ({ float, float }* @test0_v, i32 0, i32 0)
-  // CHECK-NEXT: load volatile float* getelementptr inbounds ({{.*}} @test0_v, i32 0, i32 1)
+  // CHECK-NEXT: [[REAL:%.*]] = load volatile float* getelementptr inbounds ({ float, float }* @test0_v, i32 0, i32 0), align 4
+  // CHECK-NEXT: load volatile float* getelementptr inbounds ({{.*}} @test0_v, i32 0, i32 1), align 4
   // CHECK-NEXT: store float [[REAL]], float* [[F]], align 4
   // CHECK-NEXT: ret void
   extern volatile _Complex float test0_v;
@@ -12,11 +12,11 @@ void test0() {
 }
 
 void test1() {
-  // CHECK: define void @test1()
-  // CHECK:      [[REAL:%.*]] = load volatile float* getelementptr inbounds ({{.*}} @test1_v, i32 0, i32 0)
-  // CHECK-NEXT: [[IMAG:%.*]] = load volatile float* getelementptr inbounds ({{.*}} @test1_v, i32 0, i32 1)
-  // CHECK-NEXT: store volatile float [[REAL]], float* getelementptr inbounds ({{.*}} @test1_v, i32 0, i32 0)
-  // CHECK-NEXT: store volatile float [[IMAG]], float* getelementptr inbounds ({{.*}} @test1_v, i32 0, i32 1)
+  // CHECK-LABEL: define void @test1()
+  // CHECK:      [[REAL:%.*]] = load volatile float* getelementptr inbounds ({{.*}} @test1_v, i32 0, i32 0), align 4
+  // CHECK-NEXT: [[IMAG:%.*]] = load volatile float* getelementptr inbounds ({{.*}} @test1_v, i32 0, i32 1), align 4
+  // CHECK-NEXT: store volatile float [[REAL]], float* getelementptr inbounds ({{.*}} @test1_v, i32 0, i32 0), align 4
+  // CHECK-NEXT: store volatile float [[IMAG]], float* getelementptr inbounds ({{.*}} @test1_v, i32 0, i32 1), align 4
   // CHECK-NEXT: ret void
   extern volatile _Complex float test1_v;
   test1_v = test1_v;
diff --git a/test/CodeGen/volatile-complex.c b/test/CodeGen/volatile-complex.c
new file mode 100644
index 0000000..71e5db6
--- /dev/null
+++ b/test/CodeGen/volatile-complex.c
@@ -0,0 +1,67 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+
+// Validate that volatile _Complex loads and stores are generated
+// properly, including their alignment (even when overaligned).
+//
+// This test assumes that floats are 32-bit aligned and doubles are
+// 64-bit aligned, and uses x86-64 as a target that should have this
+// datalayout.
+
+// CHECK: target datalayout = "{{.*}}f32:32:32-f64:64:64{{.*}}"
+
+volatile _Complex float cf;
+volatile _Complex double cd;
+volatile _Complex float cf32 __attribute__((aligned(32)));
+volatile _Complex double cd32 __attribute__((aligned(32)));
+
+// CHECK-LABEL-LABEL: define void @test_cf()
+void test_cf() {
+  // CHECK:      load volatile float* getelementptr inbounds ({ float, float }* @cf, i32 0, i32 0), align 4
+  // CHECK-NEXT: load volatile float* getelementptr inbounds ({ float, float }* @cf, i32 0, i32 1), align 4
+  (void)(cf);
+  // CHECK-NEXT: [[R:%.*]] = load volatile float* getelementptr inbounds ({ float, float }* @cf, i32 0, i32 0), align 4
+  // CHECK-NEXT: [[I:%.*]] = load volatile float* getelementptr inbounds ({ float, float }* @cf, i32 0, i32 1), align 4
+  // CHECK-NEXT: store volatile float [[R]], float* getelementptr inbounds ({ float, float }* @cf, i32 0, i32 0), align 4
+  // CHECK-NEXT: store volatile float [[I]], float* getelementptr inbounds ({ float, float }* @cf, i32 0, i32 1), align 4
+  (void)(cf=cf);
+  // CHECK-NEXT: ret void
+}
+
+// CHECK-LABEL-LABEL: define void @test_cd()
+void test_cd() {
+  // CHECK:      load volatile double* getelementptr inbounds ({ double, double }* @cd, i32 0, i32 0), align 8
+  // CHECK-NEXT: load volatile double* getelementptr inbounds ({ double, double }* @cd, i32 0, i32 1), align 8
+  (void)(cd);
+  // CHECK-NEXT: [[R:%.*]] = load volatile double* getelementptr inbounds ({ double, double }* @cd, i32 0, i32 0), align 8
+  // CHECK-NEXT: [[I:%.*]] = load volatile double* getelementptr inbounds ({ double, double }* @cd, i32 0, i32 1), align 8
+  // CHECK-NEXT: store volatile double [[R]], double* getelementptr inbounds ({ double, double }* @cd, i32 0, i32 0), align 8
+  // CHECK-NEXT: store volatile double [[I]], double* getelementptr inbounds ({ double, double }* @cd, i32 0, i32 1), align 8
+  (void)(cd=cd);
+  // CHECK-NEXT: ret void
+}
+
+// CHECK-LABEL-LABEL: define void @test_cf32()
+void test_cf32() {
+  // CHECK:      load volatile float* getelementptr inbounds ({ float, float }* @cf32, i32 0, i32 0), align 32
+  // CHECK-NEXT: load volatile float* getelementptr inbounds ({ float, float }* @cf32, i32 0, i32 1), align 4
+  (void)(cf32);
+  // CHECK-NEXT: [[R:%.*]] = load volatile float* getelementptr inbounds ({ float, float }* @cf32, i32 0, i32 0), align 32
+  // CHECK-NEXT: [[I:%.*]] = load volatile float* getelementptr inbounds ({ float, float }* @cf32, i32 0, i32 1), align 4
+  // CHECK-NEXT: store volatile float [[R]], float* getelementptr inbounds ({ float, float }* @cf32, i32 0, i32 0), align 32
+  // CHECK-NEXT: store volatile float [[I]], float* getelementptr inbounds ({ float, float }* @cf32, i32 0, i32 1), align 4
+  (void)(cf32=cf32);
+  // CHECK-NEXT: ret void
+}
+
+// CHECK-LABEL-LABEL: define void @test_cd32()
+void test_cd32() {
+  // CHECK:      load volatile double* getelementptr inbounds ({ double, double }* @cd32, i32 0, i32 0), align 32
+  // CHECK-NEXT: load volatile double* getelementptr inbounds ({ double, double }* @cd32, i32 0, i32 1), align 8
+  (void)(cd32);
+  // CHECK-NEXT: [[R:%.*]] = load volatile double* getelementptr inbounds ({ double, double }* @cd32, i32 0, i32 0), align 32
+  // CHECK-NEXT: [[I:%.*]] = load volatile double* getelementptr inbounds ({ double, double }* @cd32, i32 0, i32 1), align 8
+  // CHECK-NEXT: store volatile double [[R]], double* getelementptr inbounds ({ double, double }* @cd32, i32 0, i32 0), align 32
+  // CHECK-NEXT: store volatile double [[I]], double* getelementptr inbounds ({ double, double }* @cd32, i32 0, i32 1), align 8
+  (void)(cd32=cd32);
+  // CHECK-NEXT: ret void
+}
diff --git a/test/CodeGen/wchar-const.c b/test/CodeGen/wchar-const.c
index a9e7e52..2e9af53 100644
--- a/test/CodeGen/wchar-const.c
+++ b/test/CodeGen/wchar-const.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -emit-llvm %s -o - -triple i386-pc-win32 | FileCheck %s --check-prefix=WIN
-// RUN: %clang_cc1 -emit-llvm %s -o - -triple x86_64-apple-darwin | FileCheck %s --check-prefix=DAR
+// RUN: %clang_cc1 -emit-llvm %s -o - -triple i386-pc-win32 | FileCheck %s --check-prefix=CHECK-WIN
+// RUN: %clang_cc1 -emit-llvm %s -o - -triple x86_64-apple-darwin | FileCheck %s --check-prefix=CHECK-DAR
 // This should pass for any endianness combination of host and target.
 
 // This bit is taken from Sema/wchar.c so we can avoid the wchar.h include.
diff --git a/test/CodeGen/x86_32-arguments-darwin.c b/test/CodeGen/x86_32-arguments-darwin.c
index 4aa4295..422e030 100644
--- a/test/CodeGen/x86_32-arguments-darwin.c
+++ b/test/CodeGen/x86_32-arguments-darwin.c
@@ -1,44 +1,44 @@
 // RUN: %clang_cc1 -w -fblocks -triple i386-apple-darwin9 -target-cpu yonah -emit-llvm -o - %s | FileCheck %s
 
-// CHECK: define signext i8 @f0()
+// CHECK-LABEL: define signext i8 @f0()
 char f0(void) {
   return 0;
 }
 
-// CHECK: define signext i16 @f1()
+// CHECK-LABEL: define signext i16 @f1()
 short f1(void) {
   return 0;
 }
 
-// CHECK: define i32 @f2()
+// CHECK-LABEL: define i32 @f2()
 int f2(void) {
   return 0;
 }
 
-// CHECK: define float @f3()
+// CHECK-LABEL: define float @f3()
 float f3(void) {
   return 0;
 }
 
-// CHECK: define double @f4()
+// CHECK-LABEL: define double @f4()
 double f4(void) {
   return 0;
 }
 
-// CHECK: define x86_fp80 @f5()
+// CHECK-LABEL: define x86_fp80 @f5()
 long double f5(void) {
   return 0;
 }
 
-// CHECK: define void @f6(i8 signext %a0, i16 signext %a1, i32 %a2, i64 %a3, i8* %a4)
+// CHECK-LABEL: define void @f6(i8 signext %a0, i16 signext %a1, i32 %a2, i64 %a3, i8* %a4)
 void f6(char a0, short a1, int a2, long long a3, void *a4) {}
 
-// CHECK: define void @f7(i32 %a0)
+// CHECK-LABEL: define void @f7(i32 %a0)
 typedef enum { A, B, C } e7;
 void f7(e7 a0) {}
 
-// CHECK: define i64 @f8_1()
-// CHECK: define void @f8_2(i32 %a0.0, i32 %a0.1)
+// CHECK-LABEL: define i64 @f8_1()
+// CHECK-LABEL: define void @f8_2(i32 %a0.0, i32 %a0.1)
 struct s8 {
   int a;
   int b;
@@ -48,11 +48,11 @@ void f8_2(struct s8 a0) {}
 
 // This should be passed just as s8.
 
-// CHECK: define i64 @f9_1()
+// CHECK-LABEL: define i64 @f9_1()
 
 // FIXME: llvm-gcc expands this, this may have some value for the
 // backend in terms of optimization but doesn't change the ABI.
-// CHECK: define void @f9_2(%struct.s9* byval align 4 %a0)
+// CHECK-LABEL: define void @f9_2(%struct.s9* byval align 4 %a0)
 struct s9 {
   int a : 17;
   int b;
@@ -123,91 +123,91 @@ struct s27 { struct { char a, b, c; } a; struct { char a; } b; } f27(void) { whi
 // CHECK: void @f28(%struct.s28* noalias sret %agg.result)
 struct s28 { int a; int b[]; } f28(void) { while (1) {} }
 
-// CHECK: define i16 @f29()
+// CHECK-LABEL: define i16 @f29()
 struct s29 { struct { } a[1]; char b; char c; } f29(void) { while (1) {} }
 
-// CHECK: define i16 @f30()
+// CHECK-LABEL: define i16 @f30()
 struct s30 { char a; char b : 4; } f30(void) { while (1) {} }
 
-// CHECK: define float @f31()
+// CHECK-LABEL: define float @f31()
 struct s31 { char : 0; float b; char : 0; } f31(void) { while (1) {} }
 
-// CHECK: define i32 @f32()
+// CHECK-LABEL: define i32 @f32()
 struct s32 { char a; unsigned : 0; } f32(void) { while (1) {} }
 
-// CHECK: define float @f33()
+// CHECK-LABEL: define float @f33()
 struct s33 { float a; long long : 0; } f33(void) { while (1) {} }
 
-// CHECK: define float @f34()
+// CHECK-LABEL: define float @f34()
 struct s34 { struct { int : 0; } a; float b; } f34(void) { while (1) {} }
 
-// CHECK: define i16 @f35()
+// CHECK-LABEL: define i16 @f35()
 struct s35 { struct { int : 0; } a; char b; char c; } f35(void) { while (1) {} }
 
-// CHECK: define i16 @f36()
+// CHECK-LABEL: define i16 @f36()
 struct s36 { struct { int : 0; } a[2][10]; char b; char c; } f36(void) { while (1) {} }
 
-// CHECK: define float @f37()
+// CHECK-LABEL: define float @f37()
 struct s37 { float c[1][1]; } f37(void) { while (1) {} }
 
-// CHECK: define void @f38(%struct.s38* noalias sret %agg.result)
+// CHECK-LABEL: define void @f38(%struct.s38* noalias sret %agg.result)
 struct s38 { char a[3]; short b; } f38(void) { while (1) {} }
 
-// CHECK: define void @f39(%struct.s39* byval align 16 %x)
+// CHECK-LABEL: define void @f39(%struct.s39* byval align 16 %x)
 typedef int v39 __attribute((vector_size(16)));
 struct s39 { v39 x; };
 void f39(struct s39 x) {}
 
 // <rdar://problem/7247671>
-// CHECK: define i32 @f40()
+// CHECK-LABEL: define i32 @f40()
 enum e40 { ec0 = 0 };
 enum e40 f40(void) { }
 
-// CHECK: define void ()* @f41()
+// CHECK-LABEL: define void ()* @f41()
 typedef void (^vvbp)(void);
 vvbp f41(void) { }
 
-// CHECK: define i32 @f42()
+// CHECK-LABEL: define i32 @f42()
 struct s42 { enum e40 f0; } f42(void) {  }
 
-// CHECK: define i64 @f43()
+// CHECK-LABEL: define i64 @f43()
 struct s43 { enum e40 f0; int f1; } f43(void) {  }
 
-// CHECK: define void ()* @f44()
+// CHECK-LABEL: define void ()* @f44()
 struct s44 { vvbp f0; } f44(void) {  }
 
-// CHECK: define i64 @f45()
+// CHECK-LABEL: define i64 @f45()
 struct s45 { vvbp f0; int f1; } f45(void) {  }
 
-// CHECK: define void @f46(i32 %a0)
+// CHECK-LABEL: define void @f46(i32 %a0)
 void f46(enum e40 a0) { }
 
-// CHECK: define void @f47(void ()* %a1)
+// CHECK-LABEL: define void @f47(void ()* %a1)
 void f47(vvbp a1) { }
 
-// CHECK: define void @f48(i32 %a0.0)
+// CHECK-LABEL: define void @f48(i32 %a0.0)
 struct s48 { enum e40 f0; };
 void f48(struct s48 a0) { }
 
-// CHECK: define void @f49(i32 %a0.0, i32 %a0.1)
+// CHECK-LABEL: define void @f49(i32 %a0.0, i32 %a0.1)
 struct s49 { enum e40 f0; int f1; };
 void f49(struct s49 a0) { }
 
-// CHECK: define void @f50(void ()* %a0.0)
+// CHECK-LABEL: define void @f50(void ()* %a0.0)
 struct s50 { vvbp f0; };
 void f50(struct s50 a0) { }
 
-// CHECK: define void @f51(void ()* %a0.0, i32 %a0.1)
+// CHECK-LABEL: define void @f51(void ()* %a0.0, i32 %a0.1)
 struct s51 { vvbp f0; int f1; };
 void f51(struct s51 a0) { }
 
-// CHECK: define void @f52(%struct.s52* byval align 4)
+// CHECK-LABEL: define void @f52(%struct.s52* byval align 4)
 struct s52 {
   long double a;
 };
 void f52(struct s52 x) {}
 
-// CHECK: define void @f53(%struct.s53* byval align 4)
+// CHECK-LABEL: define void @f53(%struct.s53* byval align 4)
 struct __attribute__((aligned(32))) s53 {
   int x;
   int y;
@@ -216,18 +216,18 @@ void f53(struct s53 x) {}
 
 typedef unsigned short v2i16 __attribute__((__vector_size__(4)));
 
-// CHECK: define i32 @f54(i32 %arg.coerce)
+// CHECK-LABEL: define i32 @f54(i32 %arg.coerce)
 // rdar://8359483
 v2i16 f54(v2i16 arg) { return arg+arg; }
 
 
 typedef int v4i32 __attribute__((__vector_size__(16)));
 
-// CHECK: define <2 x i64> @f55(<4 x i32> %arg)
+// CHECK-LABEL: define <2 x i64> @f55(<4 x i32> %arg)
 // PR8029
 v4i32 f55(v4i32 arg) { return arg+arg; }
 
-// CHECK: define void @f56(
+// CHECK-LABEL: define void @f56(
 // CHECK: i8 signext %a0, %struct.s56_0* byval align 4 %a1,
 // CHECK: i64 %a2.coerce, %struct.s56_1* byval align 4,
 // CHECK: i64 %a4.coerce, %struct.s56_2* byval align 4,
@@ -276,34 +276,34 @@ void f56(char a0, struct s56_0 a1,
         a10, a11, a12, a13);
 }
 
-// CHECK: define void @f57(i32 %x.0, i32 %x.1)
+// CHECK-LABEL: define void @f57(i32 %x.0, i32 %x.1)
 // CHECK: call void @f57(
 struct s57 { _Complex int x; };
 void f57(struct s57 x) {} void f57a(void) { f57((struct s57){1}); }
 
-// CHECK: define void @f58()
+// CHECK-LABEL: define void @f58()
 union u58 {};
 void f58(union u58 x) {}
 
-// CHECK: define i64 @f59()
+// CHECK-LABEL: define i64 @f59()
 struct s59 { float x __attribute((aligned(8))); };
 struct s59 f59() { while (1) {} }
 
-// CHECK: define void @f60(%struct.s60* byval align 4, i32 %y)
+// CHECK-LABEL: define void @f60(%struct.s60* byval align 4, i32 %y)
 struct s60 { int x __attribute((aligned(8))); };
 void f60(struct s60 x, int y) {}
 
-// CHECK: define void @f61(i32 %x, %struct.s61* byval align 16 %y)
+// CHECK-LABEL: define void @f61(i32 %x, %struct.s61* byval align 16 %y)
 typedef int T61 __attribute((vector_size(16)));
 struct s61 { T61 x; int y; };
 void f61(int x, struct s61 y) {}
 
-// CHECK: define void @f62(i32 %x, %struct.s62* byval align 4)
+// CHECK-LABEL: define void @f62(i32 %x, %struct.s62* byval align 4)
 typedef int T62 __attribute((vector_size(16)));
 struct s62 { T62 x; int y; } __attribute((packed, aligned(8)));
 void f62(int x, struct s62 y) {}
 
-// CHECK: define i32 @f63
+// CHECK-LABEL: define i32 @f63
 // CHECK: ptrtoint
 // CHECK: and {{.*}}, -16
 // CHECK: inttoptr
@@ -317,15 +317,15 @@ int f63(int i, ...) {
   return s.y;
 }
 
-// CHECK: define void @f64(%struct.s64* byval align 4 %x)
+// CHECK-LABEL: define void @f64(%struct.s64* byval align 4 %x)
 struct s64 { signed char a[0]; signed char b[]; };
 void f64(struct s64 x) {}
 
-// CHECK: define float @f65()
+// CHECK-LABEL: define float @f65()
 struct s65 { signed char a[0]; float b; };
 struct s65 f65() { return (struct s65){{},2}; }
 
-// CHECK: define <2 x i64> @f66
+// CHECK-LABEL: define <2 x i64> @f66
 // CHECK: ptrtoint
 // CHECK: and {{.*}}, -16
 // CHECK: inttoptr
@@ -341,4 +341,4 @@ T66 f66(int i, ...) {
 // PR14453
 struct s67 { _Complex unsigned short int a; };
 void f67(struct s67 x) {}
-// CHECK: define void @f67(%struct.s67* byval align 4 %x)
+// CHECK-LABEL: define void @f67(%struct.s67* byval align 4 %x)
diff --git a/test/CodeGen/x86_32-arguments-linux.c b/test/CodeGen/x86_32-arguments-linux.c
index e93f9dc..1a8c600 100644
--- a/test/CodeGen/x86_32-arguments-linux.c
+++ b/test/CodeGen/x86_32-arguments-linux.c
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -w -fblocks -triple i386-pc-linux-gnu -target-cpu pentium4 -emit-llvm -o %t %s
 // RUN: FileCheck < %t %s
 
-// CHECK: define void @f56(
+// CHECK-LABEL: define void @f56(
 // CHECK: i8 signext %a0, %struct.s56_0* byval align 4 %a1,
 // CHECK: i64 %a2.coerce, %struct.s56_1* byval align 4,
 // CHECK: <1 x double> %a4, %struct.s56_2* byval align 4,
diff --git a/test/CodeGen/x86_32-arguments-nommx.c b/test/CodeGen/x86_32-arguments-nommx.c
index 40362f7..ce68e3a 100644
--- a/test/CodeGen/x86_32-arguments-nommx.c
+++ b/test/CodeGen/x86_32-arguments-nommx.c
@@ -3,9 +3,9 @@
 // no-mmx should put mmx into memory
 typedef int __attribute__((vector_size (8))) i32v2;
 int a(i32v2 x) { return x[0]; }
-// CHECK: define i32 @a(i64 %x.coerce)
+// CHECK-LABEL: define i32 @a(i64 %x.coerce)
 
 // but SSE2 vectors should still go into an SSE2 register 
 typedef int __attribute__((vector_size (16))) i32v4;
 int b(i32v4 x) { return x[0]; }
-// CHECK: define i32 @b(<4 x i32> %x)
+// CHECK-LABEL: define i32 @b(<4 x i32> %x)
diff --git a/test/CodeGen/x86_32-arguments-realign.c b/test/CodeGen/x86_32-arguments-realign.c
index b08862e..768e1cc 100644
--- a/test/CodeGen/x86_32-arguments-realign.c
+++ b/test/CodeGen/x86_32-arguments-realign.c
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -w -fblocks -triple i386-apple-darwin9 -emit-llvm -o %t %s
 // RUN: FileCheck < %t %s
 
-// CHECK: define void @f0(%struct.s0* byval align 4)
+// CHECK-LABEL: define void @f0(%struct.s0* byval align 4)
 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %{{.*}}, i8* %{{.*}}, i32 16, i32 4, i1 false)
 // CHECK: }
 struct s0 { long double a; };
diff --git a/test/CodeGen/x86_32-arguments-win32.c b/test/CodeGen/x86_32-arguments-win32.c
index 77ff9e2..f8b0995 100644
--- a/test/CodeGen/x86_32-arguments-win32.c
+++ b/test/CodeGen/x86_32-arguments-win32.c
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -w -triple i386-pc-win32 -emit-llvm -o - %s | FileCheck %s
 
-// CHECK: define i64 @f1_1()
-// CHECK: define void @f1_2(%struct.s1* byval align 4 %a0)
+// CHECK-LABEL: define i64 @f1_1()
+// CHECK-LABEL: define void @f1_2(%struct.s1* byval align 4 %a0)
 struct s1 {
   int a;
   int b;
@@ -9,37 +9,37 @@ struct s1 {
 struct s1 f1_1(void) { while (1) {} }
 void f1_2(struct s1 a0) {}
 
-// CHECK: define i32 @f2_1()
+// CHECK-LABEL: define i32 @f2_1()
 struct s2 {
   short a;
   short b;
 };
 struct s2 f2_1(void) { while (1) {} }
 
-// CHECK: define i16 @f3_1()
+// CHECK-LABEL: define i16 @f3_1()
 struct s3 {
   char a;
   char b;
 };
 struct s3 f3_1(void) { while (1) {} }
 
-// CHECK: define i8 @f4_1()
+// CHECK-LABEL: define i8 @f4_1()
 struct s4 {
   char a:4;
   char b:4;
 };
 struct s4 f4_1(void) { while (1) {} }
 
-// CHECK: define i64 @f5_1()
-// CHECK: define void @f5_2(%struct.s5* byval align 4)
+// CHECK-LABEL: define i64 @f5_1()
+// CHECK-LABEL: define void @f5_2(%struct.s5* byval align 4)
 struct s5 {
   double a;
 };
 struct s5 f5_1(void) { while (1) {} }
 void f5_2(struct s5 a0) {}
 
-// CHECK: define i32 @f6_1()
-// CHECK: define void @f6_2(%struct.s6* byval align 4 %a0)
+// CHECK-LABEL: define i32 @f6_1()
+// CHECK-LABEL: define void @f6_2(%struct.s6* byval align 4 %a0)
 struct s6 {
   float a;
 };
diff --git a/test/CodeGen/x86_32-fpcc-struct-return.c b/test/CodeGen/x86_32-fpcc-struct-return.c
new file mode 100644
index 0000000..9f61599
--- /dev/null
+++ b/test/CodeGen/x86_32-fpcc-struct-return.c
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -triple i386-apple-darwin9 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-REG
+// RUN: %clang_cc1 -triple i386-apple-darwin9 -fpcc-struct-return -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-PCC
+// RUN: %clang_cc1 -triple i386-apple-darwin9 -freg-struct-return -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-REG
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-PCC
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -fpcc-struct-return -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-PCC
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -freg-struct-return -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-REG
+// RUN: %clang_cc1 -triple i386-pc-win32 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-REG
+// RUN: %clang_cc1 -triple i386-pc-win32 -fpcc-struct-return -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-PCC
+// RUN: %clang_cc1 -triple i386-pc-win32 -freg-struct-return -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-REG
+
+typedef struct { int a,b,c,d; } Big;
+typedef struct { int i; } Small;
+typedef struct { short s; } Short;
+typedef struct { } ZeroSized;
+
+// CHECK-LABEL: define void @returnBig
+// CHECK: ret void
+Big returnBig(Big x) { return x; }
+
+// CHECK-PCC-LABEL: define void @returnSmall
+// CHECK-PCC: ret void
+// CHECK-REG-LABEL: define i32 @returnSmall
+// CHECK-REG: ret i32
+Small returnSmall(Small x) { return x; }
+
+// CHECK-PCC-LABEL: define void @returnShort
+// CHECK-PCC: ret void
+// CHECK-REG-LABEL: define i16 @returnShort
+// CHECK-REG: ret i16
+Short returnShort(Short x) { return x; }
+
+// CHECK-LABEL: define void @returnZero()
+// CHECK: ret void
+ZeroSized returnZero(ZeroSized x) { return x; }
diff --git a/test/CodeGen/x86_64-arguments-nacl.c b/test/CodeGen/x86_64-arguments-nacl.c
index 8f756ca..1c3f5b0 100644
--- a/test/CodeGen/x86_64-arguments-nacl.c
+++ b/test/CodeGen/x86_64-arguments-nacl.c
@@ -21,21 +21,21 @@ struct PP_Var f0() {
   return result;
 }
 
-// CHECK: define void @f1(i64 %p1.coerce0, i64 %p1.coerce1)
+// CHECK-LABEL: define void @f1(i64 %p1.coerce0, i64 %p1.coerce1)
 void f1(struct PP_Var p1) { while(1) {} }
 
 // long doubles are 64 bits on NaCl
-// CHECK: define double @f5()
+// CHECK-LABEL: define double @f5()
 long double f5(void) {
   return 0;
 }
 
-// CHECK: define void @f6(i8 signext %a0, i16 signext %a1, i32 %a2, i64 %a3, i8* %a4)
+// CHECK-LABEL: define void @f6(i8 signext %a0, i16 signext %a1, i32 %a2, i64 %a3, i8* %a4)
 void f6(char a0, short a1, int a2, long long a3, void *a4) {
 }
 
-// CHECK: define i64 @f8_1()
-// CHECK: define void @f8_2(i64 %a0.coerce)
+// CHECK-LABEL: define i64 @f8_1()
+// CHECK-LABEL: define void @f8_2(i64 %a0.coerce)
 union u8 {
   long double a;
   int b;
@@ -43,18 +43,18 @@ union u8 {
 union u8 f8_1() { while (1) {} }
 void f8_2(union u8 a0) {}
 
-// CHECK: define i64 @f9()
+// CHECK-LABEL: define i64 @f9()
 struct s9 { int a; int b; int : 0; } f9(void) { while (1) {} }
 
-// CHECK: define void @f10(i64 %a0.coerce)
+// CHECK-LABEL: define void @f10(i64 %a0.coerce)
 struct s10 { int a; int b; int : 0; };
 void f10(struct s10 a0) {}
 
-// CHECK: define double @f11()
+// CHECK-LABEL: define double @f11()
 union { long double a; float b; } f11() { while (1) {} }
 
-// CHECK: define i32 @f12_0()
-// CHECK: define void @f12_1(i32 %a0.coerce)
+// CHECK-LABEL: define i32 @f12_0()
+// CHECK-LABEL: define void @f12_1(i32 %a0.coerce)
 struct s12 { int a __attribute__((aligned(16))); };
 struct s12 f12_0(void) { while (1) {} }
 void f12_1(struct s12 a0) {}
@@ -68,7 +68,7 @@ struct s13_1 { long long f0[2]; };
 struct s13_0 f13(int a, int b, int c, int d,
                  struct s13_1 e, int f) { while (1) {} }
 
-// CHECK: define void @f20(%struct.s20* byval align 32 %x)
+// CHECK-LABEL: define void @f20(%struct.s20* byval align 32 %x)
 struct __attribute__((aligned(32))) s20 {
   int x;
   int y;
@@ -96,10 +96,10 @@ typedef struct {
   int a;
   int b;
 } s1;
-// CHECK: define i32 @f48(%struct.s1* byval %s)
+// CHECK-LABEL: define i32 @f48(%struct.s1* byval %s)
 int __attribute__((pnaclcall)) f48(s1 s) { return s.a; }
 
-// CHECK: define void @f49(%struct.s1* noalias sret %agg.result)
+// CHECK-LABEL: define void @f49(%struct.s1* noalias sret %agg.result)
 s1 __attribute__((pnaclcall)) f49() { s1 s; s.a = s.b = 1; return s; }
 
 union simple_union {
@@ -107,7 +107,7 @@ union simple_union {
   char b;
 };
 // Unions should be passed as byval structs
-// CHECK: define void @f50(%union.simple_union* byval %s)
+// CHECK-LABEL: define void @f50(%union.simple_union* byval %s)
 void __attribute__((pnaclcall)) f50(union simple_union s) {}
 
 typedef struct {
@@ -116,5 +116,5 @@ typedef struct {
   int b8 : 8;
 } bitfield1;
 // Bitfields should be passed as byval structs
-// CHECK: define void @f51(%struct.bitfield1* byval %bf1)
+// CHECK-LABEL: define void @f51(%struct.bitfield1* byval %bf1)
 void __attribute__((pnaclcall)) f51(bitfield1 bf1) {}
diff --git a/test/CodeGen/x86_64-arguments.c b/test/CodeGen/x86_64-arguments.c
index 518ee84..5d01d3b 100644
--- a/test/CodeGen/x86_64-arguments.c
+++ b/test/CodeGen/x86_64-arguments.c
@@ -2,49 +2,49 @@
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx | FileCheck %s -check-prefix=AVX
 #include <stdarg.h>
 
-// CHECK: define signext i8 @f0()
+// CHECK-LABEL: define signext i8 @f0()
 char f0(void) {
   return 0;
 }
 
-// CHECK: define signext i16 @f1()
+// CHECK-LABEL: define signext i16 @f1()
 short f1(void) {
   return 0;
 }
 
-// CHECK: define i32 @f2()
+// CHECK-LABEL: define i32 @f2()
 int f2(void) {
   return 0;
 }
 
-// CHECK: define float @f3()
+// CHECK-LABEL: define float @f3()
 float f3(void) {
   return 0;
 }
 
-// CHECK: define double @f4()
+// CHECK-LABEL: define double @f4()
 double f4(void) {
   return 0;
 }
 
-// CHECK: define x86_fp80 @f5()
+// CHECK-LABEL: define x86_fp80 @f5()
 long double f5(void) {
   return 0;
 }
 
-// CHECK: define void @f6(i8 signext %a0, i16 signext %a1, i32 %a2, i64 %a3, i8* %a4)
+// CHECK-LABEL: define void @f6(i8 signext %a0, i16 signext %a1, i32 %a2, i64 %a3, i8* %a4)
 void f6(char a0, short a1, int a2, long long a3, void *a4) {
 }
 
-// CHECK: define void @f7(i32 %a0)
+// CHECK-LABEL: define void @f7(i32 %a0)
 typedef enum { A, B, C } e7;
 void f7(e7 a0) {
 }
 
 // Test merging/passing of upper eightbyte with X87 class.
 //
-// CHECK: define void @f8_1(%union.u8* noalias sret %agg.result)
-// CHECK: define void @f8_2(%union.u8* byval align 16 %a0)
+// CHECK-LABEL: define void @f8_1(%union.u8* noalias sret %agg.result)
+// CHECK-LABEL: define void @f8_2(%union.u8* byval align 16 %a0)
 union u8 {
   long double a;
   int b;
@@ -52,18 +52,18 @@ union u8 {
 union u8 f8_1() { while (1) {} }
 void f8_2(union u8 a0) {}
 
-// CHECK: define i64 @f9()
+// CHECK-LABEL: define i64 @f9()
 struct s9 { int a; int b; int : 0; } f9(void) { while (1) {} }
 
-// CHECK: define void @f10(i64 %a0.coerce)
+// CHECK-LABEL: define void @f10(i64 %a0.coerce)
 struct s10 { int a; int b; int : 0; };
 void f10(struct s10 a0) {}
 
-// CHECK: define void @f11(%union.anon* noalias sret %agg.result)
+// CHECK-LABEL: define void @f11(%union.anon* noalias sret %agg.result)
 union { long double a; float b; } f11() { while (1) {} }
 
-// CHECK: define i32 @f12_0()
-// CHECK: define void @f12_1(i32 %a0.coerce)
+// CHECK-LABEL: define i32 @f12_0()
+// CHECK-LABEL: define void @f12_1(i32 %a0.coerce)
 struct s12 { int a __attribute__((aligned(16))); };
 struct s12 f12_0(void) { while (1) {} }
 void f12_1(struct s12 a0) {}
@@ -94,19 +94,19 @@ void f17(float a, float b, float c, float d, float e, float f, float g, float h,
 // Check for valid coercion.  The struct should be passed/returned as i32, not
 // as i64 for better code quality.
 // rdar://8135035
-// CHECK: define void @f18(i32 %a, i32 %f18_arg1.coerce) 
+// CHECK-LABEL: define void @f18(i32 %a, i32 %f18_arg1.coerce) 
 struct f18_s0 { int f0; };
 void f18(int a, struct f18_s0 f18_arg1) { while (1) {} }
 
 // Check byval alignment.
 
-// CHECK: define void @f19(%struct.s19* byval align 16 %x)
+// CHECK-LABEL: define void @f19(%struct.s19* byval align 16 %x)
 struct s19 {
   long double a;
 };
 void f19(struct s19 x) {}
 
-// CHECK: define void @f20(%struct.s20* byval align 32 %x)
+// CHECK-LABEL: define void @f20(%struct.s20* byval align 32 %x)
 struct __attribute__((aligned(32))) s20 {
   int x;
   int y;
@@ -119,7 +119,7 @@ struct StringRef {
 };
 
 // rdar://7375902
-// CHECK: define i8* @f21(i64 %S.coerce0, i8* %S.coerce1) 
+// CHECK-LABEL: define i8* @f21(i64 %S.coerce0, i8* %S.coerce1) 
 const char *f21(struct StringRef S) { return S.x+S.Ptr; }
 
 // PR7567
@@ -140,7 +140,7 @@ struct f23S {
 
 
 void f23(int A, struct f23S B) {
-  // CHECK: define void @f23(i32 %A, i64 %B.coerce0, i32 %B.coerce1)
+  // CHECK-LABEL: define void @f23(i32 %A, i64 %B.coerce0, i32 %B.coerce1)
 }
 
 struct f24s { long a; int b; };
@@ -154,7 +154,7 @@ struct f23S f24(struct f23S *X, struct f24s *P2) {
 // rdar://8248065
 typedef float v4f32 __attribute__((__vector_size__(16)));
 v4f32 f25(v4f32 X) {
-  // CHECK: define <4 x float> @f25(<4 x float> %X)
+  // CHECK-LABEL: define <4 x float> @f25(<4 x float> %X)
   // CHECK-NOT: alloca
   // CHECK: alloca <4 x float>
   // CHECK-NOT: alloca
@@ -180,7 +180,7 @@ struct v4f32wrapper {
 };
 
 struct v4f32wrapper f27(struct v4f32wrapper X) {
-  // CHECK: define <4 x float> @f27(<4 x float> %X.coerce)
+  // CHECK-LABEL: define <4 x float> @f27(<4 x float> %X.coerce)
   return X;
 }
 
@@ -190,7 +190,7 @@ struct f28c {
   int y;
 };
 void f28(struct f28c C) {
-  // CHECK: define void @f28(double %C.coerce0, i32 %C.coerce1)
+  // CHECK-LABEL: define void @f28(double %C.coerce0, i32 %C.coerce1)
 }
 
 struct f29a {
@@ -201,26 +201,26 @@ struct f29a {
 };
 
 void f29a(struct f29a A) {
-  // CHECK: define void @f29a(double %A.coerce0, i32 %A.coerce1)
+  // CHECK-LABEL: define void @f29a(double %A.coerce0, i32 %A.coerce1)
 }
 
 // rdar://8249586
 struct S0 { char f0[8]; char f2; char f3; char f4; };
 void f30(struct S0 p_4) {
-  // CHECK: define void @f30(i64 %p_4.coerce0, i24 %p_4.coerce1)
+  // CHECK-LABEL: define void @f30(i64 %p_4.coerce0, i24 %p_4.coerce1)
 }
 
 // Pass the third element as a float when followed by tail padding.
 // rdar://8251384
 struct f31foo { float a, b, c; };
 float f31(struct f31foo X) {
-  // CHECK: define float @f31(<2 x float> %X.coerce0, float %X.coerce1)
+  // CHECK-LABEL: define float @f31(<2 x float> %X.coerce0, float %X.coerce1)
   return X.c;
 }
 
 _Complex float f32(_Complex float A, _Complex float B) {
   // rdar://6379669
-  // CHECK: define <2 x float> @f32(<2 x float> %A.coerce, <2 x float> %B.coerce)
+  // CHECK-LABEL: define <2 x float> @f32(<2 x float> %A.coerce, <2 x float> %B.coerce)
   return A+B;
 }
 
@@ -235,12 +235,12 @@ void f33(va_list X) {
 typedef unsigned long long v1i64 __attribute__((__vector_size__(8)));
 
 // rdar://8359248
-// CHECK: define i64 @f34(i64 %arg.coerce)
+// CHECK-LABEL: define i64 @f34(i64 %arg.coerce)
 v1i64 f34(v1i64 arg) { return arg; }
 
 
 // rdar://8358475
-// CHECK: define i64 @f35(i64 %arg.coerce)
+// CHECK-LABEL: define i64 @f35(i64 %arg.coerce)
 typedef unsigned long v1i64_2 __attribute__((__vector_size__(8)));
 v1i64_2 f35(v1i64_2 arg) { return arg+arg; }
 
@@ -260,7 +260,7 @@ void f9122143()
   func(ss);
 }
 
-// CHECK: define double @f36(double %arg.coerce)
+// CHECK-LABEL: define double @f36(double %arg.coerce)
 typedef unsigned v2i32 __attribute((__vector_size__(8)));
 v2i32 f36(v2i32 arg) { return arg; }
 
@@ -308,7 +308,7 @@ void func43(SA s) {
   func42(s);
 }
 
-// CHECK: define i32 @f44
+// CHECK-LABEL: define i32 @f44
 // CHECK: ptrtoint
 // CHECK-NEXT: and {{.*}}, -32
 // CHECK-NEXT: inttoptr
@@ -323,7 +323,7 @@ int f44(int i, ...) {
 }
 
 // Text that vec3 returns the correct LLVM IR type.
-// AVX: define i32 @foo(<3 x i64> %X)
+// AVX-LABEL: define i32 @foo(<3 x i64> %X)
 typedef long long3 __attribute((ext_vector_type(3)));
 int foo(long3 X)
 {
@@ -379,7 +379,7 @@ void test49_helper(double, ...);
 void test49(double d, double e) {
   test49_helper(d, e);
 }
-// CHECK:    define void @test49(
+// CHECK-LABEL:    define void @test49(
 // CHECK:      [[T0:%.*]] = load double*
 // CHECK-NEXT: [[T1:%.*]] = load double*
 // CHECK-NEXT: call void (double, ...)* @test49_helper(double [[T0]], double [[T1]])
@@ -388,7 +388,49 @@ void test50_helper();
 void test50(double d, double e) {
   test50_helper(d, e);
 }
-// CHECK:    define void @test50(
+// CHECK-LABEL:    define void @test50(
 // CHECK:      [[T0:%.*]] = load double*
 // CHECK-NEXT: [[T1:%.*]] = load double*
 // CHECK-NEXT: call void (double, double, ...)* bitcast (void (...)* @test50_helper to void (double, double, ...)*)(double [[T0]], double [[T1]])
+
+struct test51_s { __uint128_t intval; };
+void test51(struct test51_s *s, __builtin_va_list argList) {
+    *s = __builtin_va_arg(argList, struct test51_s);
+}
+
+// CHECK-LABEL: define void @test51
+// CHECK: [[TMP_ADDR:%.*]] = alloca [[STRUCT_TEST51:%.*]], align 16
+// CHECK: br i1
+// CHECK: [[REG_SAVE_AREA_PTR:%.*]] = getelementptr inbounds {{.*}}, i32 0, i32 3
+// CHECK-NEXT: [[REG_SAVE_AREA:%.*]] = load i8** [[REG_SAVE_AREA_PTR]]
+// CHECK-NEXT: [[VALUE_ADDR:%.*]] = getelementptr i8* [[REG_SAVE_AREA]], i32 {{.*}}
+// CHECK-NEXT: [[CASTED_VALUE_ADDR:%.*]] = bitcast i8* [[VALUE_ADDR]] to [[STRUCT_TEST51]]
+// CHECK-NEXT: [[CASTED_TMP_ADDR:%.*]] = bitcast [[STRUCT_TEST51]]* [[TMP_ADDR]] to i8*
+// CHECK-NEXT: [[RECASTED_VALUE_ADDR:%.*]] = bitcast [[STRUCT_TEST51]]* [[CASTED_VALUE_ADDR]] to i8*
+// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[CASTED_TMP_ADDR]], i8* [[RECASTED_VALUE_ADDR]], i64 16, i32 8, i1 false)
+// CHECK-NEXT: add i32 {{.*}}, 16
+// CHECK-NEXT: store i32 {{.*}}, i32* {{.*}}
+// CHECK-NEXT: br label
+
+void test52_helper(int, ...);
+__m256 x52;
+void test52() {
+  test52_helper(0, x52, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
+}
+// AVX: @test52_helper(i32 0, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
+
+void test53(__m256 *m, __builtin_va_list argList) {
+  *m = __builtin_va_arg(argList, __m256);
+}
+// AVX-LABEL: define void @test53
+// AVX-NOT: br i1
+// AVX: ret void
+
+void test54_helper(__m256, ...);
+__m256 x54;
+void test54() {
+  test54_helper(x54, x54, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
+  test54_helper(x54, x54, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
+}
+// AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
+// AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[a-zA-Z0-9]+}})
diff --git a/test/CodeGen/xcore-abi.c b/test/CodeGen/xcore-abi.c
new file mode 100644
index 0000000..10881de
--- /dev/null
+++ b/test/CodeGen/xcore-abi.c
@@ -0,0 +1,129 @@
+// RUN: %clang_cc1 -triple xcore -verify %s
+_Static_assert(sizeof(long long) == 8, "sizeof long long is wrong");
+_Static_assert(_Alignof(long long) == 4, "alignof long long is wrong");
+
+_Static_assert(sizeof(double) == 8, "sizeof double is wrong");
+_Static_assert(_Alignof(double) == 4, "alignof double is wrong");
+
+// RUN: %clang_cc1 -triple xcore-unknown-unknown -fno-signed-char -fno-common -emit-llvm -o - %s | FileCheck %s
+
+// CHECK: target datalayout = "e-p:32:32:32-a0:0:32-n32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f16:16:32-f32:32:32-f64:32:32"
+// CHECK: target triple = "xcore-unknown-unknown"
+
+// CHECK: @g1 = global i32 0, align 4
+int g1;
+
+#include <stdarg.h>
+struct x { int a[5]; };
+void f(void*);
+void testva (int n, ...) {
+  // CHECK-LABEL: testva
+  va_list ap;
+  va_start(ap,n);
+  // CHECK: [[AP:%[a-z0-9]+]] = alloca i8*, align 4
+  // CHECK: [[AP1:%[a-z0-9]+]] = bitcast i8** [[AP]] to i8*
+  // CHECK: call void @llvm.va_start(i8* [[AP1]])
+
+  char* v1 = va_arg (ap, char*);
+  f(v1);
+  // CHECK: [[I:%[a-z0-9]+]] = load i8** [[AP]]
+  // CHECK: [[P:%[a-z0-9]+]] = bitcast i8* [[I]] to i8**
+  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr i8* [[I]], i32 4
+  // CHECK: store i8* [[IN]], i8** [[AP]]
+  // CHECK: [[V1:%[a-z0-9]+]] = load i8** [[P]]
+  // CHECK: store i8* [[V1]], i8** [[V:%[a-z0-9]+]], align 4
+  // CHECK: [[V2:%[a-z0-9]+]] = load i8** [[V]], align 4
+  // CHECK: call void @f(i8* [[V2]])
+
+  char v2 = va_arg (ap, char); // expected-warning{{second argument to 'va_arg' is of promotable type 'char'}}
+  f(&v2);
+  // CHECK: [[I:%[a-z0-9]+]] = load i8** [[AP]]
+  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr i8* [[I]], i32 4
+  // CHECK: store i8* [[IN]], i8** [[AP]]
+  // CHECK: [[V1:%[a-z0-9]+]] = load i8* [[I]]
+  // CHECK: store i8 [[V1]], i8* [[V:%[a-z0-9]+]], align 1
+  // CHECK: call void @f(i8* [[V]])
+
+  int v3 = va_arg (ap, int);
+  f(&v3);
+  // CHECK: [[I:%[a-z0-9]+]] = load i8** [[AP]]
+  // CHECK: [[P:%[a-z0-9]+]] = bitcast i8* [[I]] to i32*
+  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr i8* [[I]], i32 4
+  // CHECK: store i8* [[IN]], i8** [[AP]]
+  // CHECK: [[V1:%[a-z0-9]+]] = load i32* [[P]]
+  // CHECK: store i32 [[V1]], i32* [[V:%[a-z0-9]+]], align 4
+  // CHECK: [[V2:%[a-z0-9]+]] = bitcast i32* [[V]] to i8*
+  // CHECK: call void @f(i8* [[V2]])
+
+  long long int v4 = va_arg (ap, long long int);
+  f(&v4);
+  // CHECK: [[I:%[a-z0-9]+]] = load i8** [[AP]]
+  // CHECK: [[P:%[a-z0-9]+]] = bitcast i8* [[I]] to i64*
+  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr i8* [[I]], i32 8
+  // CHECK: store i8* [[IN]], i8** [[AP]]
+  // CHECK: [[V1:%[a-z0-9]+]] = load i64* [[P]]
+  // CHECK: store i64 [[V1]], i64* [[V:%[a-z0-9]+]], align 4
+  // CHECK:[[V2:%[a-z0-9]+]] = bitcast i64* [[V]] to i8*
+  // CHECK: call void @f(i8* [[V2]])
+
+  struct x v5 = va_arg (ap, struct x);  // typical agregate type
+  f(&v5);
+  // CHECK: [[I:%[a-z0-9]+]] = load i8** [[AP]]
+  // CHECK: [[I2:%[a-z0-9]+]] = bitcast i8* [[I]] to %struct.x**
+  // CHECK: [[P:%[a-z0-9]+]] = load %struct.x** [[I2]]
+  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr i8* [[I]], i32 4
+  // CHECK: store i8* [[IN]], i8** [[AP]]
+  // CHECK: [[V1:%[a-z0-9]+]] = bitcast %struct.x* [[V:%[a-z0-9]+]] to i8*
+  // CHECK: [[P1:%[a-z0-9]+]] = bitcast %struct.x* [[P]] to i8*
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[V1]], i8* [[P1]], i32 20, i32 4, i1 false)
+  // CHECK: [[V2:%[a-z0-9]+]] = bitcast %struct.x* [[V]] to i8*
+  // CHECK: call void @f(i8* [[V2]])
+
+  int* v6 = va_arg (ap, int[4]);  // an unusual agregate type
+  f(v6);
+  // CHECK: [[I:%[a-z0-9]+]] = load i8** [[AP]]
+  // CHECK: [[I2:%[a-z0-9]+]] = bitcast i8* [[I]] to [4 x i32]**
+  // CHECK: [[P:%[a-z0-9]+]] = load [4 x i32]** [[I2]]
+  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr i8* [[I]], i32 4
+  // CHECK: store i8* [[IN]], i8** [[AP]]
+  // CHECK: [[V1:%[a-z0-9]+]] = bitcast [4 x i32]* [[V0:%[a-z0-9]+]] to i8*
+  // CHECK: [[P1:%[a-z0-9]+]] = bitcast [4 x i32]* [[P]] to i8*
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[V1]], i8* [[P1]], i32 16, i32 4, i1 false)
+  // CHECK: [[V2:%[a-z0-9]+]] = getelementptr inbounds [4 x i32]* [[V0]], i32 0, i32 0
+  // CHECK: store i32* [[V2]], i32** [[V:%[a-z0-9]+]], align 4
+  // CHECK: [[V3:%[a-z0-9]+]] = load i32** [[V]], align 4
+  // CHECK: [[V4:%[a-z0-9]+]] = bitcast i32* [[V3]] to i8*
+  // CHECK: call void @f(i8* [[V4]])
+
+  double v7 = va_arg (ap, double);
+  f(&v7);
+  // CHECK: [[I:%[a-z0-9]+]] = load i8** [[AP]]
+  // CHECK: [[P:%[a-z0-9]+]] = bitcast i8* [[I]] to double*
+  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr i8* [[I]], i32 8
+  // CHECK: store i8* [[IN]], i8** [[AP]]
+  // CHECK: [[V1:%[a-z0-9]+]] = load double* [[P]]
+  // CHECK: store double [[V1]], double* [[V:%[a-z0-9]+]], align 4
+  // CHECK: [[V2:%[a-z0-9]+]] = bitcast double* [[V]] to i8*
+  // CHECK: call void @f(i8* [[V2]])
+}
+
+void testbuiltin (void) {
+  // CHECK-LABEL: testbuiltin
+  // CHECK: call i32 @llvm.xcore.getid()
+  // CHECK: call i32 @llvm.xcore.getps(i32 {{%[a-z0-9]+}})
+  // CHECK: call i32 @llvm.xcore.bitrev(i32 {{%[a-z0-9]+}})
+  // CHECK: call void @llvm.xcore.setps(i32 {{%[a-z0-9]+}}, i32 {{%[a-z0-9]+}})
+  int i = __builtin_getid();
+  unsigned int ui = __builtin_getps(i);
+  ui = __builtin_bitrev(ui);
+  __builtin_setps(i,ui);
+}
+
+// CHECK-LABEL: define zeroext i8 @testchar()
+// CHECK: ret i8 -1
+char testchar (void) {
+  return (char)-1;
+}
+
+// CHECK: "no-frame-pointer-elim"="false"
+// CHECK-NOT: "no-frame-pointer-elim-non-leaf"